]> git.ipfire.org Git - thirdparty/gcc.git/commitdiff
cobol: Support National characters and Unicode runtime encoding.
authorRobert Dubner <rdubner@symas.com>
Fri, 16 Jan 2026 20:42:50 +0000 (15:42 -0500)
committerRobert Dubner <rdubner@symas.com>
Sat, 17 Jan 2026 03:09:57 +0000 (22:09 -0500)
The last few months have seen an evolution in the COBOL compiler.  Up
until now it could use either CP1252/ASCII or CP1140/EBCDIC to represent
alphanumeric variables and numeric types that are stored as character
strings.  With these changes, those types can be represented in many
other single-byte encodings, as well as UTF16 and UTF32 encodings.

These changes required extensive changes.

1) The initial parsing has to handle the extended capabilities.

2) Each run-time variable designates its character set.

3) The run-time code has to be able to handle wide characters.

Since the development took place over a period of time, other changes
crept in. In particular, there is an expansion of bindings making
certain POSIX functions available to the COBOL programmer.

There has also been an expansion of gcobol's use of the GCC diagnostic
framework.

Co-Authored-By: Robert Dubner <rdubner@symas.com>
Co-Authored-By: James K. Lowden <jklowden@cobolworx.com>
gcc/cobol/ChangeLog:

* cbldiag.h (struct cbl_loc_t): Diagnostics.
(enum cbl_diag_id_t): Diagnostics.
* cdf.y: Includes.
* cobol1.cc (cobol_warning_suppress): Diagnostics.
(cobol_langhook_handle_option): Implement -fexec-charset.  Expand
the use of diagnostics.
* gcobc: Expand options and warnings.
* gcobol.1: Documentation.
* genapi.cc (level_88_helper): Charsets.
(get_level_88_domain): Charsets.
(get_class_condition_string): Charsets.
(function_pointer_from_name): Charsets.
(initialize_variable_internal):  Charsets.
(parser_initialize): Charsets.
(get_binary_value_from_float): Charsets.
(get_bytes_needed): Charsets.
(cobol_compare): Charsets.
(move_tree): Eliminate function.
(move_tree_to_field): Eliminate function.
(get_string_from): Eliminate function.
(parser_init_list): Charsets.
(psa_FldLiteralN): Charsets.
(parser_accept_date_yymmdd): Charsets.
(parser_accept_date_yyyymmdd): Charsets.
(parser_accept_date_yyddd): Charsets.
(parser_accept_date_yyyyddd): Charsets.
(parser_accept_date_dow): Charsets.
(parser_accept_date_hhmmssff): Charsets.
(parser_alphabet): Charsets.
(parser_alphabet_use): Charsets.
(parser_display_internal): Charsets.
(get_literalN_value): Charsets.
(tree_type_from_field_type): Charsets.
(program_end_stuff): Charsets.
(walk_initialization): Charsets.
(parser_xml_parse): Charsets.
(initialize_the_data): Charsets.
(establish_using): Charsets.
(parser_setop): Charsets.
(parser_set_conditional88): Charsets.
(parser_file_add): Charsets.
(get_the_filename): Eliminate function.
(parser_file_open): Charsets.
(parser_file_delete_file): Charsets.
(parser_file_start): Charsets.
(parser_module_name): Charsets.
(parser_intrinsic_find_string): New function.
(parser_intrinsic_numval_c): Charsets.
(parser_intrinsic_convert): New function.
(parser_intrinsic_call_1): Charsets.
(create_and_call): Charsets.
(mh_identical): Charsets.
(mh_source_is_literalN): Charsets.
(float_type_of): Charsets.
(mh_dest_is_float): Charsets.
(mh_numeric_display): Charsets.
(mh_little_endian): Charsets.
(mh_source_is_group): Charsets.
(mh_source_is_literalA): Charsets.
(move_helper): Charsets.
(binary_initial): Eliminate function.
(digits_from_int128): Eliminate function.
(digits_from_float128): Eliminate function.
(initial_from_initial):  Eliminate function.
(convert_data_initial): New function.
(actually_create_the_static_field): Charsets.
(psa_new_var_decl): Charsets.
(psa_FldLiteralA): Charsets.
(parser_local_add): Charsets.
(parser_symbol_add): Charsets.
* genapi.h (parser_intrinsic_convert): New function.
(parser_intrinsic_find_string): New function.
* genmath.cc (arithmetic_operation): Charsets.
(largest_binary_term): Charsets.
(fast_add): Charsets.
(fast_subtract): Charsets.
(fast_multiply): Charsets.
(fast_divide): Charsets.
(parser_subtract): Fix subtract float from float.
* genutil.cc (get_any_capacity): Charsets.
(get_and_check_refstart_and_reflen): Charsets.
(get_data_offset): Charsets.
(get_binary_value): Charsets.
(tree_type_from_field): Charsets.
(copy_little_endian_into_place): Charsets.
(get_literal_string): Charsets.
(refer_is_clean): Charsets.
(refer_fill_depends): Charsets.
(refer_size_source): Comment.
* lang-specs.h: Charsets.
* lang.opt: Charsets.
* lexio.cc (parse_copy_directive): Diagnostics.
* messages.cc (cbl_diagnostic_kind): Diagnostics.
(cobol_warning_suppress): Diagnostics.
* parse.y: Many changes for charsets and diagnostics.
* parse_ante.h (MAXLENGTH_FORMATTED_DATE): Charsets.
(MAXLENGTH_FORMATTED_TIME): Charsets.
(MAXLENGTH_CALENDAR_DATE): Charsets.
(MAXLENGTH_FORMATTED_DATETIME): Charsets.
(consistent_encoding_check): Charsets.
(enum data_clause_t): Charsets.
(new_alphanumeric): Charsets.
(name_of): Charsets.
(class eval_subject_t): Charsets.
(struct domain_t): Charsets.
(struct file_list_t): Charsets.
(current_encoding): Charsets.
(new_tempnumeric): Charsets.
(is_integer_literal): Charsets.
(new_literal): Charsets.
(new_constant): Charsets.
(conditional_set): Charsets.
(field_find): Charsets.
(valid_redefine): Charsets.
(field_value_all): Charsets.
(parent_has_picture): Charsets.
(parent_has_value): Charsets.
(blank_pad_initial): Charsets.
(blankit): Charsets.
(cbl_field_t::blank_initial): Charsets.
(value_encoding_check): Charsets.
(cbl_field_t::set_initial): Charsets.
(field_alloc): Charsets.
(parser_move_carefully): Charsets.
(data_division_ready): Charsets.
(anybody_redefines): Charsets.
(procedure_division_ready): Charsets.
(file_section_parent_set): Charsets.
(field_binary_usage): Charsets.
(goodnight_gracie): Formatting.
* scan.l: Charsets.
* scan_ante.h (numstr_of): Charsets.
(typed_name): Charsets.
* show_parse.h: Charsets.
* structs.cc (create_cblc_file_t): Charsets.
* symbols.cc (symbol_table_extend): Charsets.
(WARNING_FIELD): Diagnostics.
(constq): Charsets.
(elementize): Charsets.
(field_size): Charsets.
(cbl_field_t::set_attr): Eliminate run-time component.
(cbl_field_t::clear_attr): Eliminate run-time component.
(field_memsize): Charsets.
(cbl_encoding_str): Charsets.
(symbols_dump): Charsets.
(is_variable_length): Formatting.
(field_str): Charsets.
(extend_66_capacity): Charsets.
(operator<<): Charsets.
(symbols_update): Charsets.
(symbol_field_parent_set): Charsets.
(symbol_table_init): Charsets.
(numeric_group_attrs): Charsets.
(symbol_field_add): Charsets.
(symbol_field_alias): Charsets.
(fd_record_size_cmp): Charsets.
(symbol_file_record_sizes): Charsets.
(cbl_alphabet_t::reencode): Charsets.
(symbol_temporary_location): Charsets.
(new_literal_2): Charsets.
(new_alphanumeric): Charsets.
(standard_internal): Charsets.
(cbl_field_t::codeset_t::stride): Charsets.
(cobol_alpha_encoding): Charsets.
(cobol_national_encoding): Charsets.
(new_temporary): Charsets.
(new_literal_float): Charsets.
(cbl_field_t::is_ascii): Charsets.
(cbl_field_t::internalize): Eliminate function.
(cbl_field_t::source_code_check): Charsets.
(iconv_cd): Charsets.
(cbl_field_t::encode): New function for charsets.
(cbl_field_t::set_capacity): Charsets.
(cbl_field_t::add_capacity): Charsets.
(cbl_field_t::char_capacity): Charsets.
(symbol_label_section_exists): Charsets.
(size): Charsets.
(validate_numeric_edited): Charsets.
* symbols.h (cobol_alpha_encoding): Charsets.
(cobol_national_encoding): Charsets.
(consistent_encoding_check): Charsets.
(class cbl_domain_elem_t): Charsets.
(struct cbl_domain_t): Charsets.
(struct cbl_field_data_t): Charsets.
(class cbl_field_data_t): Charsets.
(struct cbl_subtable_t): Charsets.
(struct cbl_field_t): Charsets.
(new_literal_float): Charsets.
(new_temporary): Charsets.
(new_literal_2): Charsets.
(symbol_temporary_location): Charsets.
(class temporaries_t): Charsets.
(struct symbol_elem_t): Charsets.
(symbol_elem_of): Charsets.
(symbol_unique_index): Charsets.
(cbl_field_type_name): Charsets.
(validate_numeric_edited): Charsets.
* token_names.h: Charsets.
* util.cc (cdf_literalize): Charsets.
(cbl_field_type_name): Charsets.
(determine_intermediate_type): Charsets.
(is_alpha_edited): Charsets.
(cbl_field_data_t::is_alpha_edited): Charsets.
(symbol_field_type_update): Charsets.
(redefine_field): Charsets.
(FIXED_WIDE_INT): Charsets.
(dirty_to_binary): Charsets.
(digits_from_int128): Charsets.
(binary_initial): Charsets.
(cbl_field_t::encode_numeric): Charsets.
(FOR_JIM): Temporary conditional demonstration code.
(parse_error_inc): Diagnostics.
(parse_error_count): Diagnostics.
(cbl_field_t::report_invalid_initial_value): Diagnostics.
(valid_move): Diagnostics.
(type_capacity): Charsets.
(symbol_unique_index): New function.
(cbl_unimplementedw): Formatting.

libgcobol/ChangeLog:

* charmaps.cc (__gg__encoding_iconv_name): Charsets.
(__gg__encoding_iconv_valid): Charsets.
(__gg__encoding_iconv_type): Charsets.
(encoding_descr): Charsets.
(__gg__encoding_iconv_descr): Charsets.
(__gg__iconverter): Charsets.
(__gg__miconverter): Charsets.
* charmaps.h (NOT_A_CHARACTER): Charsets.
(ascii_nul): Charsets.
(ascii_bang): Charsets.
(__gg__encoding_iconv_type): Charsets.
(__gg__iconverter): Charsets.
(__gg__miconverter): Charsets.
(DEFAULT_32_ENCODING): Charsets.
(class charmap_t): Charsets.
(__gg__get_charmap): Charsets.
* common-defs.h (enum cbl_field_attr_t):
(enum cbl_figconst_t): Formatting.
(LOW_VALUE_E): Handle enum arithmetic.
(ZERO_VALUE_E): Handle enum arithmetic.
(SPACE_VALUE_E): Handle enum arithmetic.
(QUOTE_VALUE_E): Handle enum arithmetic.
(HIGH_VALUE_E): Handle enum arithmetic.
(enum convert_type_t): Enum for new FUNCTION CONVERT.
(struct cbl_declarative_t): Formatting.
* encodings.h (struct encodings_t): Charsets.
* gcobolio.h: Charsets.
* gfileio.cc (get_filename): Rename to establish filename.
(establish_filename): Renamed from get_filename.
(relative_file_delete):  Charsets.
(__io__file_remove): Moved.
(trim_in_place): Charsets.
(relative_file_start): Charsets.
(relative_file_rewrite): Charsets.
(relative_file_write): Charsets.
(sequential_file_write): Charsets.
(line_sequential_file_read): Charsets.
(sequential_file_read): Charsets.
(relative_file_read): Charsets.
(__gg__file_reopen): Charsets.
(__io__file_open): Charsets.
(__io__file_close): Charsets.
(gcobol_fileops): Charsets.
(__gg__file_open): Charsets.
(__gg__file_remove): Charsets.
* gfileio.h (__gg__file_open): Charsets.
* gmath.cc (__gg__subtractf1_float_phase2): Comment.
(__gg__subtractf2_float_phase1): Comment.
(__gg__multiplyf1_phase2): Comment.
* intrinsic.cc (is_zulu_format): Charsets.
(string_to_dest): Charsets.
(get_all_time): Charsets.
(ftime_replace): Charsets.
(__gg__char): Charsets.
(__gg__current_date): Charsets.
(__gg__formatted_current_date): Charsets.
(__gg__formatted_date): Charsets.
(__gg__formatted_datetime): Charsets.
(__gg__formatted_time): Charsets.
(change_case): Charsets.
(__gg__upper_case): Charsets.
(numval): Charsets.
(numval_c): Charsets.
(__gg__trim): Charsets.
(__gg__reverse): Charsets.
(fill_cobol_tm): Charsets.
(__gg__seconds_from_formatted_time): Charsets.
(__gg__hex_of): Charsets.
(__gg__numval_f): Charsets.
(__gg__test_numval_f): Charsets.
(__gg__locale_date): Charsets.
(__gg__locale_time): Charsets.
(__gg__locale_time_from_seconds): Charsets.
* libgcobol.cc (NO_RDIGITS): Alias for (0).
(__gg__move): Forward reference.
(struct program_state): Charsets.
(cstrncmp): Charsets.
(__gg__init_program_state): Charsets.
(edited_to_binary): Charsets.
(var_is_refmod): Comment.
(__gg__power_of_ten): Reworked data initialization.
(__gg__scale_by_power_of_ten_1): Likewise.
(__gg__scale_by_power_of_ten_2): Likewise.
(value_is_too_big): Likewise.
(binary_to_big_endian): Likewise.
(binary_to_little_endian): Likewise.
(int128_to_int128_rounded): Likewise.
(get_binary_value_local): Likewise.
(get_init_value): Likewise.
(f128_to_i128_rounded): Likewise.
(__gg__initialization_values): Likewise.
(int128_to_field): Likewise.
(__gg__get_date_yymmdd): Charsets.
(__gg__field_from_string): Charsets.
(field_from_ascii): Charsets.
(__gg__get_date_yyyymmdd): Charsets.
(__gg__get_date_yyddd): Charsets.
(__gg__get_yyyyddd): Charsets.
(__gg__get_date_dow): Charsets.
(__gg__get_date_hhmmssff): Charsets.
(collation_position): Charsets.
(uber_compare): Charsets.
(__gg__dirty_to_binary): Charsets.
(__gg__dirty_to_float): Charsets.
(format_for_display_internal): Charsets.
(compare_88): Charsets.
(get_float128): Reworked.
(compare_field_class): Charsets.
(interconvert): Charsets.
(compare_strings): Charsets.
(__gg__compare_2): Charsets.
(compare_two_records): Charsets.
(__gg__sort_table): Charsets.
(init_var_both): Charsets.
(__gg__initialize_variable_clean): Charsets.
(alpha_to_alpha_move_from_location): Charsets.
(__gg__memdup): New function.
(alpha_to_alpha_move): Charsets.
(__gg__sort_workfile): Charsets.
(__gg__merge_files): Charsets.
(funky_find_wide): Charsets.
(funky_find_wide_backward): Charsets.
(normalize_id): Charsets.
(match_lengths): Charsets.
(the_alpha_and_omega): Charsets.
(the_alpha_and_omega_backward): Charsets.
(inspect_backward_format_1): Charsets.
(__gg__inspect_format_1): Charsets.
(inspect_backward_format_2): Charsets.
(__gg__inspect_format_2): Charsets.
(normalize_for_inspect_format_4): Charsets.
(__gg__inspect_format_4): Charsets.
(move_string): Charsets.
(brute_force_trim): Charsets.
(__gg__string): Charsets.
(display_both): Charsets.
(__gg__display_string): Charsets.
(__gg__bitwise_op): Charsets.
(is_numeric_display_numeric): Charsets.
(is_alpha_a_number): Charsets.
(classify_numeric_type): Charsets.
(classify_alphabetic_type): Charsets.
(__gg__classify): Charsets.
(__gg__convert_encoding): Charsets.
(accept_envar): Charsets.
(__gg__accept_envar): Charsets.
(__gg__get_argc): Charsets.
(__gg__get_argv): Charsets.
(__gg__get_command_line): Charsets.
(__gg__parser_set_conditional): Charsets.
(__gg__literaln_alpha_compare): Charsets.
(string_in): Charsets.
(__gg__unstring): Charsets.
(__gg__integer_from_float128): Charsets.
(__gg__adjust_dest_size): Charsets.
(__gg__just_mangle_name): Charsets.
(__gg__function_handle_from_name): Charsets.
(get_the_byte): Charsets.
(__gg__refer_from_string): Charsets.
(__gg__refer_from_psz): Charsets.
(__gg__find_string): Charsets.
(convert_for_convert): Charsets.
(__gg__convert): Charsets.
* libgcobol.h (__gg__compare_2): Charsets.
(__gg__field_from_string): Charsets.
(__gg__memdup): Charsets.
* posix/bin/Makefile: Posix bindings.
* posix/bin/scrape.awk: Posix bindings.
* posix/bin/udf-gen: Posix bindings.
* posix/udf/posix-lseek.cbl: Posix bindings.
* posix/udf/posix-unlink.cbl: Posix bindings.
* stringbin.cc (__gg__binary_to_string_encoded): Charsets.
(__gg__numeric_display_to_binary): Charsets.
* stringbin.h (__gg__binary_to_string_encoded): Charsets.
* valconv.cc (__gg__string_to_numeric_edited): Charsets.
* posix/cpy/psx-lseek.cpy: New file.
* posix/shim/lseek.cc: New file.

gcc/testsuite/ChangeLog:

* cobol.dg/group2/CHAR_and_ORD_with_COLLATING_sequence_-_EBCDIC.cob:
Change diagnostics message.
* cobol.dg/group2/Multi-target_MOVE_with_subscript_re-evaluation.cob:
Change diagnostics message.
* cobol.dg/group2/floating-point_SUBTRACT_FORMAT_2.out:
Change diagnostics message.
* cobol.dg/group2/floating-point_literals.out:
Change diagnostics message.

48 files changed:
gcc/cobol/cbldiag.h
gcc/cobol/cdf.y
gcc/cobol/cobol1.cc
gcc/cobol/gcobc
gcc/cobol/gcobol.1
gcc/cobol/genapi.cc
gcc/cobol/genapi.h
gcc/cobol/genmath.cc
gcc/cobol/genutil.cc
gcc/cobol/lang-specs.h
gcc/cobol/lang.opt
gcc/cobol/lexio.cc
gcc/cobol/messages.cc
gcc/cobol/parse.y
gcc/cobol/parse_ante.h
gcc/cobol/scan.l
gcc/cobol/scan_ante.h
gcc/cobol/show_parse.h
gcc/cobol/structs.cc
gcc/cobol/symbols.cc
gcc/cobol/symbols.h
gcc/cobol/token_names.h
gcc/cobol/util.cc
gcc/testsuite/cobol.dg/group2/CHAR_and_ORD_with_COLLATING_sequence_-_EBCDIC.cob
gcc/testsuite/cobol.dg/group2/Multi-target_MOVE_with_subscript_re-evaluation.cob
gcc/testsuite/cobol.dg/group2/floating-point_SUBTRACT_FORMAT_2.out
gcc/testsuite/cobol.dg/group2/floating-point_literals.out
libgcobol/charmaps.cc
libgcobol/charmaps.h
libgcobol/common-defs.h
libgcobol/encodings.h
libgcobol/gcobolio.h
libgcobol/gfileio.cc
libgcobol/gfileio.h
libgcobol/gmath.cc
libgcobol/intrinsic.cc
libgcobol/libgcobol.cc
libgcobol/libgcobol.h
libgcobol/posix/bin/Makefile
libgcobol/posix/bin/scrape.awk
libgcobol/posix/bin/udf-gen
libgcobol/posix/cpy/psx-lseek.cpy [new file with mode: 0644]
libgcobol/posix/shim/lseek.cc [new file with mode: 0644]
libgcobol/posix/udf/posix-lseek.cbl
libgcobol/posix/udf/posix-unlink.cbl
libgcobol/stringbin.cc
libgcobol/stringbin.h
libgcobol/valconv.cc

index 388bc781093afcaa155704b9272578e435566815..9167c563821d3b6fc79d956c0c262d7d840ad9af 100644 (file)
@@ -83,6 +83,12 @@ struct cbl_loc_t {
   int last_line;
   int last_column;
 
+  cbl_loc_t() 
+    : first_line(0)
+    , first_column(0)
+    , last_line(0)
+    , last_column(0)
+  {}
   cbl_loc_t( const YYLTYPE& loc ) 
     : first_line(loc.first_line)
     , first_column(loc.first_column)
@@ -90,7 +96,7 @@ struct cbl_loc_t {
     , last_column(loc.last_column)
   {}
 
-  operator YYLTYPE() const {
+  operator YYLTYPE() const {  //  cppcheck-suppress syntaxError
     return { first_line, first_column, last_line, last_column };
   } 
 };
@@ -137,11 +143,14 @@ enum cbl_diag_id_t : uint64_t {
 
   MfBinaryLongLong,
   MfCallGiving,
+  MfCallLiteral,
   MfCdfDollar, 
   MfComp6,
   MfCompX,
   MfLevel_1_Occurs, 
   MfLevel78,
+  MfAnyLength, 
+  MfMoveIndex, 
   MfMovePointer, 
   MfReturningNum,
   MfUsageTypename,
index 72e46b3f86e802ee9f93583429231d867cdf9d01..11dfcf5b6dbd2b82d19a6bda671c0259449c6dcf 100644 (file)
 %{
 
 #include "cobol-system.h"
-#include "coretypes.h"
-#include "tree.h"
+#include <coretypes.h>
+#include <tree.h>
+
 #undef yy_flex_debug
+
 #include "../../libgcobol/ec.h"
 #include "../../libgcobol/common-defs.h"
 #include "util.h"
index bda0aa846c4cf7cefac3d606eb15956e17211cdb..19ef652a3f03e2f4d1fc0f23156d481e5662da29 100644 (file)
@@ -329,6 +329,7 @@ enable_exceptions( bool enable ) {
 }
 
 void cobol_warning( cbl_diag_id_t id, int yn, bool );
+void cobol_warning_suppress( cbl_dialect_t dialect );
 
 static bool
 cobol_langhook_handle_option (size_t scode,
@@ -360,6 +361,18 @@ cobol_langhook_handle_option (size_t scode,
             copybook_extension_add(cobol_copyext);
             return true;
 
+        case OPT_fexec_charset_:
+            if( ! cobol_alpha_encoding( arg ) ) {
+              cbl_errx( "no such charset %qs", arg);
+            }
+            return true;
+
+        case OPT_fexec_national_charset_:
+          if( ! cobol_national_encoding( arg ) ) {
+              cbl_errx( "no such national charset %qs", arg);
+            }
+            return true;
+
         case OPT_M:
             cobol_set_pp_option('M');
             return true;
@@ -369,7 +382,7 @@ cobol_langhook_handle_option (size_t scode,
             return true;
 
         case OPT_fdefaultbyte:
-            // cobol_default_byte is an unsigned ing
+            // cobol_default_byte is an unsigned int
             wsclear(cobol_default_byte);
             return true;
 
@@ -410,6 +423,7 @@ cobol_langhook_handle_option (size_t scode,
             // gcc disallows 0 as an enumerated value, so we used 0x10 for iso.
             if( cobol_dialect == 0x100 ) cobol_dialect = 0; 
             cobol_dialect_set(cbl_dialect_t(cobol_dialect));
+            cobol_warning_suppress(cbl_dialect_t(cobol_dialect));
             return true;
 
         case OPT_fsyntax_only:
@@ -441,10 +455,6 @@ cobol_langhook_handle_option (size_t scode,
         case OPT_nomain:
             return true;
 
-        case OPT_finternal_ebcdic:
-            cobol_gcobol_feature_set(feature_internal_ebcdic_e);
-            return true;
-
         // Warnings and errors
 
         case OPT_Wbinary_long_long:
@@ -455,6 +465,10 @@ cobol_langhook_handle_option (size_t scode,
           cobol_warning(MfCallGiving, call_giving, warning_as_error);
           return true;
 
+        case OPT_Wcall_literal:
+          cobol_warning(MfCallLiteral, call_literal, warning_as_error);
+          return true;
+
         case OPT_Wcdf_dollar:
           cobol_warning(MfCdfDollar, cdf_dollar, warning_as_error);
           return true;
@@ -479,6 +493,10 @@ cobol_langhook_handle_option (size_t scode,
           cobol_warning(Par78CdfDefinedW, level_78_defined, warning_as_error);
           return true;
 
+        case OPT_Wmove_index:
+          cobol_warning(MfMoveIndex, move_index, warning_as_error);
+          return true;
+
         case OPT_Wmove_pointer:
           cobol_warning(MfMovePointer, move_pointer, warning_as_error);
           return true;
@@ -487,6 +505,10 @@ cobol_langhook_handle_option (size_t scode,
           cobol_warning(MfLevel78, level_78, warning_as_error);
           return true;
 
+        case OPT_Wany_length:
+          cobol_warning(MfAnyLength, cobol_any_length, warning_as_error);
+          return true;
+
         case OPT_Wreturning_number:
           cobol_warning(MfReturningNum, returning_number, warning_as_error);
           return true;
index fa9f6095d327607dda6d6840f80236823f162255..f4c805fdcc8bc7ae694aa3b258ac7411bc9d9d2d 100755 (executable)
@@ -184,7 +184,6 @@ do
          -C) error "$opt $incomparable"
              ;;
         -d | -debug | --debug) opts="$opts -fcobol-exceptions=EC-ALL"
-                       warn "$opt implies -fstack-check:"
                        ;;
         # define for preprocessor, note: -D* is directly passed
         -D)
@@ -200,7 +199,7 @@ do
              opts="$opts $opt"
              ;;
          -ext)
-            pending_arg="$opt "
+            pending_arg="-copyext "
              ;;
          -ext=*) opts="$opts $(echo "$opt" | sed 's/-ext=/-copyext ./')"
                  ;;
@@ -230,7 +229,7 @@ do
         -fwrite-after) warn "$opt" ;;
         -fmfcomment) warn "$opt" ;;
         -facucomment) warn "$opt" ;;
-        -fno-trunc) no_warn "$opt" ;;
+        -ftrunc | -fno-trunc | -fnotrunc) no_warn "$opt" ;;
         -fsingle-quote) warn "$opt" ;;
         -foptional-file) warn "$opt" ;;
         -fstatic-call | -fno-static-call)
@@ -384,8 +383,9 @@ do
         -fformat=auto) ;; # gcobol and gnucobol default
 
         -fixed | --fixed | -fformat=fixed | -fformat=variable | -fformat=xcard)
-                    # note: variable + xcard are only _more similar_ to fixed than free,
-                    # (with changing right-column to 250/255, which isn't supported in gcobol, yet)
+                    # note: variable + xcard are only _more similar_
+                    # to fixed than free, (with changing right-column
+                    # to 250/255, which isn't supported in gcobol
                      opts="$opts -ffixed-form"
                      ;;
 
@@ -400,8 +400,11 @@ do
                ;;
         -i | --info) warn "$opt"
                      ;;
-
-        # -I
+        -I) pending_arg=$opt
+            ;;
+        -I*)
+            opts="$opts $opt"
+            ;;
         -fimplicit-init) warn "$opt"
                          ;;
         -j | -job)  warn "$opt"
@@ -411,7 +414,8 @@ do
         -K*) warn "$opt"
             ;;
         # -l
-        # -L
+        -L) pending_arg=$opt
+            ;;
         --list*) warn "$opt"
                  ;;
         -m) mode="-shared"
@@ -425,9 +429,6 @@ do
              opts="$opts $opt"
             ;;
 
-        # -O0, -Ox
-        -O | -O2 | -Os) warn "$opt"
-                        ;;
         -S) mode="$opt"
             ;;
         -save-temps=*) opt="$(echo "$opt" | sed -E 's/^.+=//')"
index 92b22265bb404212bf616945b092c032558925fb..432977cf5cfe1c95c7b5b99e54276141052deb0a 100644 (file)
 .Op Fl copyext Ar ext
 .Op Fl ffixed-form | Fl ffree-form
 .Op Fl findicator-column
-.Op Fl finternal-ebcdic
+.Op Fl fexec-charset= Ns Ar encoding
+.Op Fl fexec-national-charset= Ns Ar encoding
+.ig
+.Op Fl collseq Ar encoding Ns \/, Fl ncolseq Ar encoding
+..
 .Op Fl dialect Ar dialect-name
 .Op Fl include Ar filename
 .Op Fl preprocess Ar preprocess-filter
@@ -39,6 +43,7 @@
 .Op Fl Wno-bad-numeric
 .Op Fl Wno-binary-long-long
 .Op Fl Wno-call-giving
+.Op Fl Wno-call-literal
 .Op Fl Wno-cdf-dollar
 .Op Fl Wno-cdf-invalid-parameter
 .Op Fl Wno-cdf-name-not-found
@@ -61,6 +66,7 @@
 .Op Fl Wno-literal-concat
 .Op Fl Wno-locale-error
 .Op Fl Wno-move-corresponding
+.Op Fl Wno-move-index
 .Op Fl Wno-move-pointer
 .Op Fl Wno-nllanginfo-error
 .Op Fl Wno-operator-space
@@ -325,6 +331,35 @@ is a non-constant data item, it is always resolved using dynamic
 linking, with
 .Xr dlsym 3 Ns Li ,
 because its value is determined at run time.
+.
+.It Fl fexec-charset= Ns Ar encoding
+Set the default execution character set for alphanumeric data items
+and literals in the absence of
+.Sy "CHARACTER CLASSIFICATION"
+in
+.Sy "CONFIGURATION SECTION" .
+.Ar encoding
+is an encoding name as defined by
+.Xr iconv 3 .
+Unless otherwise specified, the runtime encoding for both alphanumeric
+and NATIONAL is
+.Sy CP1252
+as defined by
+.Xr iconv 3 .
+.
+.It Fl fexec-national-charset= Ns Ar encoding
+Set the default execution character set for NATIONAL data items
+and literals in the absence of
+.Sy "CHARACTER CLASSIFICATION"
+in
+.Sy "CONFIGURATION SECTION" .
+.Ar encoding
+is an encoding name as defined by
+.Xr iconv 3 .
+To use an EBCDIC encoding for data items, one might use
+.D1 Fl fexec-national-charset= Ns Li CP1140
+for example. 
+.
 .It Fl dialect Ar dialect-name
 By default,
 .Nm
@@ -476,6 +511,8 @@ Warn if malformed %<#line%> directive is encountered.
 Warn if BINARY-LONG-LONG is used.
 .It Fl Wno-call_giving
 Warn if CALL ... GIVING is used.
+.It Fl Wno-call_literal
+Warn if CALL is used is used with a literal parameter by reference.
 .It Fl Wno-cdf-dollar
 Warn if CDF \[Do]IF is used.
 .It Fl Wno-comp-6
@@ -490,6 +527,8 @@ Warn if INSPECT ... TRAILING is used.
 Warn if Level 01 is used with OCCURS.
 .It Fl Wno-level-78-defined
 Warn if CDF defines Level 78 constant.
+.It Fl Wno-move-index
+Warn if MOVE INDEX is used.
 .It Fl Wno-move-pointer
 Warn if MOVE POINTER is used.
 .It Fl Wno-returning-number
@@ -633,7 +672,7 @@ The fourth links the three .o files into an
 .
 .Sh EBCDIC
 The
-.Fl finternal-ebcdic
+.Fl fexec-charset=cp1140
 option is useful when working with mainframe \*[lang] programs intended
 for EBCDIC-encoded files.  With this option, while the \*[lang] text
 remains in ASCII, the character literals and field initial values
@@ -641,10 +680,13 @@ produce EBCDIC strings in the compiled binary, and any character data
 read from a file are interpreted as EBCDIC data.  The file data are
 not
 .Em converted ;
-rather, the file is assumed to use EBCDIC representation. String
+rather, the file is assumed to use EBCDIC representation.  String
 literals in the \*[lang] text
 .Em are
-converted, so that they can be compared meaningfully with data in the file.
+converted, so that they can be compared meaningfully with data in the
+file.  Code Page 1140 is one of a number of EBCDIC code pages; it is
+often useful because it closely parallels the commonly-used Code Page
+1252 that provides many Western European characters.
 .Pp
 Only file data and character literals are affected.  Data read from
 and written to the environment, or taken from the command line, are
@@ -1387,7 +1429,8 @@ in the name, which is otherwise invalid in a \*[lang] identifier:
 .Bl -tag -compact
 .It Sy %EBCDIC-MODE
 is set by
-.Fl finternal-ebcdic .
+.Fl fexec-charset= Ns Ar <encoding>
+for all EBCDIC encodings.
 .It Sy %64-BIT-POINTER
 is implied by
 .Fl "dialect ibm" .
index ee325fca2c00230aa26132d8f1151ca0b2c8ca09..fac689e3f6798da9dcbfe47c72c6fcf5226bb6e9 100644 (file)
@@ -366,8 +366,7 @@ static
 char *
 level_88_helper(size_t parent_capacity,
                 const cbl_domain_elem_t &elem,
-                size_t &returned_size,
-                cbl_encoding_t encoding)
+                size_t &returned_size)
   {
   // We return a MALLOCed return value, which the caller must free.
   char *retval  = static_cast<char *>(xmalloc(parent_capacity + 64));
@@ -385,7 +384,7 @@ level_88_helper(size_t parent_capacity,
     switch(figconst)
       {
       case normal_value_e :
-        // This really should never happend
+        // This really should never happen
         abort();
         break;
       case low_value_e    :
@@ -420,21 +419,6 @@ level_88_helper(size_t parent_capacity,
     memcpy(first_name, elem.name(), first_name_length);
     first_name[first_name_length] = '\0';
 
-    /*  By rights, the parser should have given us this string in the target
-        encoding.  When I discovered that it was not, Jim Lowden was out of
-        town for a week, and I didn't feel like figuring out where in the
-        parser the fix should be.
-
-        So, I am doing the conversion here.  Eventually that will be fixed, and
-        chaos will reign here.  When that happens, remove the following
-        conversion. */
-    charmap_t *charmap = __gg__get_charmap(encoding);
-    for(size_t i=0; i<strlen(first_name); i++)
-      {
-      first_name[i] = charmap->mapped_character(first_name[i]);
-      }
-    ///////////////// end of conversion
-
     if( parent_capacity == 0 )
       {
       // Special case:  parent_capacity is zero when this routine has been
@@ -517,8 +501,7 @@ get_level_88_domain(size_t parent_capacity, cbl_field_t *var, size_t &returned_s
     // Do the first element of the domain
     stream = level_88_helper( parent_capacity,
                               domain->first,
-                              stream_len,
-                              var->codeset.encoding);
+                              stream_len);
     if( output_index + stream_len > retval_capacity )
       {
       retval_capacity *= 2;
@@ -533,8 +516,7 @@ get_level_88_domain(size_t parent_capacity, cbl_field_t *var, size_t &returned_s
     // Do the second element of the domain
     stream = level_88_helper( parent_capacity,
                               domain->last,
-                              stream_len,
-                              var->codeset.encoding);
+                              stream_len);
     if( output_index + stream_len > retval_capacity )
       {
       retval_capacity *= 2;
@@ -563,6 +545,8 @@ static
 char *
 get_class_condition_string(cbl_field_t *var)
   {
+  // This routine returns a malloced pointer.
+
   // We know at this point that var is FldClass
   // The LEVEL is not 88, so this is a CLASS SPECIAL-NAME
 
@@ -579,21 +563,17 @@ get_class_condition_string(cbl_field_t *var)
       "ABCJ12"        // This is the same as "A" "B" "C" ...
 
       Expressly presented numbers are the ordinal positions in the run-time
-      character set.  So, an ASCII "A" would be given as 66, which is one
-      greater than 65, which is the ASCII codepoint for "A".  An EBCDIC "A"
-      would be presented as 194, which is one greater than 193, which is the
-      decimal representation of an EBCDIC "A", whose hex code is 0xC2.
-
-      We need to account for EBCDIC as well as ASCII.  In EBCDIC,
-      "A" THROUGH "Z" doesn't mean what it looks like it means, because EBCIDC
-      encoding has gaps between I and J, and between R and S.  That isn't true
-      in ASCII.  We don't want to deal with these issues at compile time, so we
-      are encoding numeric ordinals with their negated values, while other
-      characters are given as the numeric forms of their ASCII encoding.
-      Conversion to EBCDIC occurs at runtime.
-
-      In support of this strategy, character strings like "ABCD" are broken up
-      into "A" "B" "C" "D" and converted to their hexadecimal representations.
+      character set. We encode those values with a leading ascii_hyphen to
+      distinguish them from characters.
+
+      Characters are converted to UTF32 values, and then encoded as big-endian
+      hexadecimal characters.
+
+      A range of values is encoded as a pair of hexadecimal values with an
+      ascii_slash between them.  The second value ends with a space
+
+      A list of characters is encoded simply as a stream of hexadecimal values
+      separated by spaces.
       */
 
   char ach[8192];
@@ -602,66 +582,66 @@ get_class_condition_string(cbl_field_t *var)
 
   while( domain->first.is_numeric || domain->first.name() )
     {
-    // *What* were they smoking back then?
-
-    uint8_t value1;
-    uint8_t value2;
-
     size_t first_name_length = domain->first.size()
                               ? domain->first.size()
                               : strlen(domain->first.name());
 
+    cbl_encoding_t from = var->codeset.default_encodings.source->type;
+    cbl_encoding_t to = DEFAULT_32_ENCODING;
+    size_t nbytes;
+    const char *converted;
+
     if( domain->first.is_numeric )
       {
       if( strlen(ach) > sizeof(ach) - 1000  )
         {
-        cbl_internal_error("Nice try, but you cannot fire me.");
+        cbl_internal_error("That string should not be that long.");
         }
 
-      // We are working with unquoted strings that contain the values 1 through
-      // 256:
-      value1 = (uint8_t)atoi(domain->first.name());
-      value2 = (uint8_t)atoi(domain->last.name());
+      // We are working with unquoted strings that contain the values
+      uint32_t value1 = atoll(domain->first.name());
+      uint32_t value2 = atoll(domain->last.name());
       if( value2 < value1 )
         {
         std::swap(value1, value2);
         }
       if( value1 != value2  )
         {
-        p += sprintf(p, "-%2.2X/-%2.2X ", value1-1, value2-1);
+        p += sprintf(p, "-%X/-%X ", value1, value2);
         }
       else
         {
-        p += sprintf(p, "-%2.2X ", value1-1);
+        p += sprintf(p, "-%X ", value1);
         }
       }
     else if( first_name_length == 1 )
       {
       // Since the first.name is a single character, we can do this as
       // a single-character pair.
-      uint8_t ch1;
-      uint8_t ch2;
-
-      ch2 = domain->last.name()[0];
-      ch1 = domain->first.name()[0];
-
-      if( ch1 < ch2 )
-        {
-        value1 = ch1;
-        value2 = ch2;
-        }
-      else
+      converted = __gg__iconverter(from,
+                                   to,
+                                   domain->first.name(),
+                                   1,
+                                   &nbytes);
+      cbl_char_t ch1 = *reinterpret_cast<const cbl_char_t *>(converted);
+      converted = __gg__iconverter(from,
+                                   to,
+                                   domain->last.name(),
+                                   1,
+                                   &nbytes);
+      cbl_char_t ch2 = *reinterpret_cast<const cbl_char_t *>(converted);
+
+      if( ch1 > ch2 )
         {
-        value2 = ch1;
-        value1 = ch2;
+        std::swap(ch1, ch2);
         }
-      if( value1 != value2  )
+      if( ch1 != ch2  )
         {
-        p += sprintf(p, "%2.2X/%2.2X ", value1, value2);
+        p += sprintf(p, "%X/%X ", ch1, ch2);
         }
       else
         {
-        p += sprintf(p, "%2.2X ", value1);
+        p += sprintf(p, "%X ", ch1);
         }
       }
     else
@@ -675,7 +655,13 @@ get_class_condition_string(cbl_field_t *var)
                                 // : strlen(domain->first.name());
       for(size_t i=0; i<first_name_length; i++)
         {
-        p += sprintf(p, "%2.2X ", (unsigned char)domain->first.name()[i]);
+        converted = __gg__iconverter(from,
+                                     to,
+                                     domain->first.name()+i,
+                                     1,
+                                     &nbytes);
+        cbl_char_t ch1 = *reinterpret_cast<const cbl_char_t *>(converted);
+        p += sprintf(p, "%X ", ch1);
         }
       }
     domain += 1;
@@ -863,9 +849,9 @@ function_pointer_from_name(const cbl_refer_t &name,
                        NULL); // And, hence, no types
 
     // Fetch the FUNCTION_DECL for that FUNCTION_TYPE
-    char *tname = static_cast<char *>(xmalloc(name.field->data.capacity+1));
-    memcpy(tname, name.field->data.original(), name.field->data.capacity);
-    tname[name.field->data.capacity] = '\0';
+    char *tname = static_cast<char *>(xmalloc(name.field->data.capacity()+1));
+    memcpy(tname, name.field->data.original(), name.field->data.capacity());
+    tname[name.field->data.capacity()] = '\0';
     tree function_decl = gg_build_fn_decl(tname,
                                           fndecl_type);
     free(tname);
@@ -1210,10 +1196,6 @@ initialize_variable_internal( cbl_refer_t refer,
                               bool explicitly=false,
                               bool just_once=false)
   {
-  // fprintf(stderr, "initialize_variable_internal for %s\n", refer.field->name);
-  // gg_printf("initialize_variable_internal for %s\n",
-            // gg_string_literal(refer.field->name),
-            // NULL_TREE);
   cbl_field_t *parsed_var = refer.field;
   if( !parsed_var )
     {
@@ -1245,7 +1227,7 @@ initialize_variable_internal( cbl_refer_t refer,
       }
     while(0);
     SHOW_PARSE_REF(" ", refer);
-    if( parsed_var->data.initial )
+    if( parsed_var->data.original() )
       {
       SHOW_PARSE_TEXT(" >>")
       if( parsed_var->level == 88)
@@ -1327,7 +1309,7 @@ initialize_variable_internal( cbl_refer_t refer,
           case FldNumericEdited:
           case FldAlphaEdited:
           case FldLiteralA:
-            SHOW_PARSE_TEXT(parsed_var->data.initial);
+            SHOW_PARSE_TEXT(parsed_var->data.original());
             break;
           default:
             {
@@ -1370,7 +1352,7 @@ initialize_variable_internal( cbl_refer_t refer,
     is_redefined = true;
     }
 
-  if( parsed_var->data.initial )
+  if( parsed_var->data.original() )
     {
     bool a_parent_initialized = false;
     const cbl_field_t *parent = parent_of(parsed_var);
@@ -1407,7 +1389,6 @@ initialize_variable_internal( cbl_refer_t refer,
   flag_bits     |= just_once ? JUST_ONCE_BIT : 0 ;
 
   suppress_dest_depends = false;  // Set this to false so that refer_is_clean is valid
-  //fprintf(stderr, "refer_is_clean %2.2d %s %d 0x%lx\n", refer.field->level, refer.field->name, refer_is_clean(refer), refer.field->attr);
 
   if( !refer_is_clean(refer) )
     {
@@ -1524,7 +1505,7 @@ initialize_variable_internal( cbl_refer_t refer,
          || parsed_var->type == FldLiteralA )
         {
         gg_assign(spaces, integer_one_node);
-        tree counter = gg_define_int(parsed_var->data.capacity);
+        tree counter = gg_define_int(parsed_var->data.capacity());
         WHILE(counter, gt_op, integer_zero_node)
           {
           gg_decrement(counter);
@@ -1556,17 +1537,6 @@ initialize_variable_internal( cbl_refer_t refer,
   suppress_dest_depends = false;
   }
 
-//static void
-//initialize_variable_internal( cbl_field_t *field,
-//                              bool explicitly=false,
-//                              bool just_once=false)
-//  {
-//  cbl_refer_t wrapper(field);
-//  initialize_variable_internal( wrapper,
-//                                explicitly,
-//                                just_once);
-//  }
-
 void
 parser_initialize(const cbl_refer_t& refer, bool like_parser_symbol_add)
   {
@@ -1577,7 +1547,7 @@ parser_initialize(const cbl_refer_t& refer, bool like_parser_symbol_add)
     }
   else
     {
-    gcc_assert(refer.field->data.initial);
+    gcc_assert(refer.field->data.original());
     static const bool explicitly = true;
     initialize_variable_internal(refer, explicitly);
     }
@@ -1592,7 +1562,7 @@ get_binary_value_from_float(tree         value,
   {
   // The destination is something with rdigits; the source is FldFloat
   tree ftype;
-  switch( source->data.capacity )
+  switch( source->data.capacity() )
     {
     case 4:
       ftype = FLOAT;
@@ -1722,7 +1692,7 @@ get_bytes_needed(cbl_field_t *field)
     case FldPointer:
     case FldFloat:
     case FldLiteralN:
-      retval = field->data.capacity;
+      retval = field->data.capacity();
       break;
 
     case FldNumericDisplay:
@@ -1778,7 +1748,7 @@ get_bytes_needed(cbl_field_t *field)
         }
       else
         {
-        retval = field->data.capacity;
+        retval = field->data.capacity();
         }
       break;
       }
@@ -2195,7 +2165,7 @@ cobol_compare(  tree return_int,
           // gg_string_literal(left_side_ref.field->name),
           // gg_string_literal(right_side_ref.field->name),
           // member(left_side_ref.field, "data"),
-          // gg_string_literal(right_side_ref.field->data.initial),
+          // gg_string_literal(right_side_ref.field->data.original()),
           // NULL_TREE);
 
   CHECK_FIELD(left_side_ref.field);
@@ -2246,28 +2216,25 @@ cobol_compare(  tree return_int,
 
             case FldGroup:
             case FldAlphanumeric:
-            case FldLiteralA:
               {
-              // Comparing a FldLiteralN to an alphanumeric
-
-              // This next conversion may be overkill.  But just in case
-              // the encodings of the two variables are different, we are
-              // going to convert left-side text to the right-side encoding
-              cbl_encoding_t enc_left  = lefty->field->codeset.encoding;
-              cbl_encoding_t enc_right = righty->field->codeset.encoding;
+              // Comparing a FldLiteralN to an alphanumeric.  The alphanumeric
+              // is encoded in its codeset.encoding, but the FldLiteralN is,
+              // in accordance with the rules in cbl_field_t::internalize,
+              // encoded in the source-code encoding.  The routine we are about
+              // to call assumes that the literal string is encoded the same
+              // as the alphanumeric, so we have to make it match.
               size_t outlength;
-              size_t inlength = strlen(lefty->field->data.initial);
+              cbl_encoding_t enc_right = righty->field->codeset.encoding;
               char *converted = __gg__iconverter(
-                                         enc_left,
+                                         DEFAULT_SOURCE_ENCODING,
                                          enc_right,
-                                         lefty->field->data.initial,
-                                         inlength,
+                                         lefty->field->data.original(),
+                                         strlen(lefty->field->data.original()),
                                          &outlength );
               gg_assign(  return_int, gg_call_expr(
                           INT,
                           "__gg__literaln_alpha_compare",
-                    build_string_literal(strlen(lefty->field->data.initial)+1,
-                                         converted),
+                          gg_string_literal(converted),
                           gg_get_address_of(righty->field->var_decl_node),
                           refer_offset(*righty),
                           refer_size_source(  *righty),
@@ -2278,6 +2245,20 @@ cobol_compare(  tree return_int,
               break;
               }
 
+            case FldLiteralA:
+              {
+              // Comparing a FldLiteralN to an FldLiteralA.
+              // lefty->field->data.original() is the numeric string in ASCII.
+              // righty->field->data.original() is original alphanumeric
+              // string in ASCII.
+              int icmp = strcmp(lefty->field->data.original(),
+                                righty->field->data.original());
+              gg_assign(return_int, build_int_cst_type(INT, icmp));
+              compared = true;
+              break;
+              }
+
+
             default:
               break;
             }
@@ -2338,6 +2319,7 @@ cobol_compare(  tree return_int,
     int rightflags =   (right_side_ref.all         ? REFER_T_MOVE_ALL   : 0)
                     +  (right_side_ref.addr_of     ? REFER_T_ADDRESS_OF : 0)
                     +  (right_side_ref.refmod.from ? REFER_T_REFMOD     : 0);
+
     gg_assign(  return_int, gg_call_expr(
                 INT,
                 "__gg__compare",
@@ -2355,218 +2337,6 @@ cobol_compare(  tree return_int,
     }
   }
 
-static void
-move_tree(  cbl_field_t  *dest,
-            tree          offset,
-            tree          psz_source, // psz_source is a null-terminated string
-            tree length_bump=integer_zero_node)
-  {
-  // This routine assumes that the psz_source is in the same codeset as the
-  // dest.
-
-  Analyze();
-  SHOW_PARSE
-    {
-    SHOW_PARSE_HEADER
-    SHOW_PARSE_FIELD(" ", dest);
-    SHOW_PARSE_END
-    }
-
-  CHECK_FIELD(dest);
-
-  bool moved = true;
-
-  tree source_length = gg_define_size_t();
-  gg_assign(source_length, gg_strlen(psz_source));
-  gg_assign(source_length, gg_add(source_length, gg_cast(SIZE_T, length_bump)));
-
-  tree min_length = gg_define_size_t();
-
-  tree location = gg_define_uchar_star();
-  tree length   = gg_define_size_t();
-
-  gg_assign(location,
-            gg_add(member(dest->var_decl_node, "data"),
-                   offset));
-  gg_assign(length,
-            member(dest->var_decl_node, "capacity"));
-
-  IF(source_length, lt_op, length)
-    {
-    gg_assign(min_length, source_length);
-    }
-  ELSE
-    {
-    gg_assign(min_length, length);
-    }
-  ENDIF
-
-  tree value;
-  tree rdigits;
-
-  switch( dest->type )
-    {
-    case FldGroup:
-    case FldAlphanumeric:
-      {
-      // Space out the alphanumeric destination:
-      charmap_t *charmap = __gg__get_charmap(dest->codeset.encoding);
-
-      gg_memset(  location,
-                  build_int_cst_type(INT,
-                                     charmap->mapped_character(ascii_space)),
-                  length );
-      // Copy the alphanumeric result over.
-      gg_memcpy(  location,
-                  psz_source,
-                  min_length );
-      break;
-      }
-
-    case FldNumericDisplay:
-    case FldNumericEdited:
-    case FldNumericBinary:
-    case FldNumericBin5:
-    case FldPacked:
-    case FldIndex:
-      {
-      value   = gg_define_int128();
-      rdigits = gg_define_int();
-
-      gg_assign(value,
-                gg_call_expr( INT128,
-                              "__gg__dirty_to_binary",
-                              psz_source,
-                              build_int_cst_type(INT, dest->codeset.encoding),
-                              source_length,
-                              gg_get_address_of(rdigits),
-                              NULL_TREE));
-
-      gg_call(VOID,
-              "__gg__int128_to_qualified_field",
-              gg_get_address_of(dest->var_decl_node),
-              offset,
-              build_int_cst_type(SIZE_T, dest->data.capacity),
-              value,
-              rdigits,
-              build_int_cst_type(INT, truncation_e),
-              null_pointer_node,
-              NULL_TREE);
-      }
-    break;
-
-    case FldAlphaEdited:
-      {
-      gg_call(VOID,
-              "__gg__string_to_alpha_edited",
-              location,
-              build_int_cst_type(INT, DEFAULT_SOURCE_ENCODING),
-              psz_source,
-              min_length,
-              member(dest->var_decl_node, "picture"),
-              NULL_TREE);
-      break;
-      }
-
-    default:
-      moved = false;
-      break;
-    }
-
-  TRACE1
-    {
-    TRACE1_HEADER
-    gg_fprintf(trace_handle, 1, "source: \"%s\"", psz_source);
-    TRACE1_END
-    TRACE1_INDENT
-    TRACE1_FIELD(               "dest  : ", dest, "")
-    TRACE1_END
-    }
-
-  if( !moved )
-    {
-    dbgmsg("%10s in %s:%d", __func__, __FILE__, __LINE__ );
-    cbl_internal_error( "I don%'t know how to MOVE an alphabetical string to %s(%s)",
-                        cbl_field_type_str(dest->type),
-                        dest->name
-         );
-    return;
-    }
-  }
-
-static void
-move_tree_to_field(cbl_field_t *field, tree psz)
-  {
-  // psz has to be in the same encoding as field
-  move_tree(field, integer_zero_node, psz);
-  }
-
-static tree
-get_string_from(cbl_field_t *field)
-  {
-  // This returns a malloced copy of either a literal string or a
-  // an alphanumeric field.  The idea is that eventually free() will be
-  // called in the runtime space:
-
-  tree psz = gg_define_char_star();
-
-  if( field )
-    {
-    switch( field->type )
-      {
-      case FldLiteralA:
-        {
-        gg_assign(psz,
-                  gg_cast(CHAR_P,
-                          gg_malloc(build_int_cst_type(SIZE_T,
-                                                     field->data.capacity+1))));
-        const char *litstring = get_literal_string(field);
-        gg_memcpy(psz,
-                  gg_string_literal(litstring),
-                  build_int_cst_type(SIZE_T, field->data.capacity+1));
-        break;
-        }
-
-      case FldGroup:
-      case FldAlphanumeric:
-        // make a copy of .data:
-        gg_assign(psz,
-                  gg_cast(CHAR_P,
-                          gg_malloc(build_int_cst_type(SIZE_T,
-                                                    field->data.capacity+1))));
-        gg_memcpy(  psz,
-                    member(field, "data"),
-                    member(field, "capacity"));
-        // null-terminate it:
-        gg_assign(  gg_array_value(psz, member(field, "capacity")),
-                    char_nodes[0]);
-        break;
-
-      case FldForward:
-        {
-        // At the present time, we are assuming this happens when somebody
-        // specifies an unquoted file name in an ASSIGN statement:
-        //    SELECT file3 ASSIGN DISK.
-        //
-        // In that case, we just return DISK, which is field->name:
-        psz = gg_strdup(gg_string_literal(field->name));
-        break;
-        }
-
-      default:
-        cbl_internal_error(
-                "%s: %<field->type%> %s must be literal or alphanumeric",
-                __func__, cbl_field_type_str(field->type));
-      break;
-      }
-    }
-  else
-    {
-    gg_assign(psz, gg_cast(CHAR_P, null_pointer_node));
-    }
-  return psz;
-  }
-
 static char *
 combined_name(const cbl_label_t *label)
   {
@@ -4165,7 +3935,9 @@ parser_init_list()
   gg_call(VOID,
           "__gg__variables_to_init",
           gg_get_address_of(array),
-          wsclear() ? build_string_literal(1, (const char *)wsclear())
+          wsclear() ? build_string_literal(
+                                    1, 
+                                    reinterpret_cast<const char *>(wsclear()))
                     : null_pointer_node,
           NULL_TREE);
   }
@@ -4345,14 +4117,18 @@ psa_FldLiteralN(struct cbl_field_t *field )
   uint32_t digits;
   int32_t  rdigits;
   uint64_t attr;
-  FIXED_WIDE_INT(128) value = dirty_to_binary(field->data.original(),
+  //// DUBNERHACK.  Necessary to prevent UAT lockup:
+  const char *source_text = field->data.original()
+                          ? field->data.original()
+                          : field->data.initial;
+  FIXED_WIDE_INT(128) value = dirty_to_binary(source_text,
                                               capacity,
                                               digits,
                                               rdigits,
                                               attr);
   // This is a rare occurrence of a parser_xxx call changing the entry
   // in the symbol table.
-  field->data.capacity = capacity;
+  field->data.capacity(  capacity );
   field->data.digits   = digits;
   field->data.rdigits  = rdigits;
   field->attr         |= attr;
@@ -4370,7 +4146,7 @@ psa_FldLiteralN(struct cbl_field_t *field )
 
   // The value is 1, 2, 4, 8 or 16 bytes, so an ordinary constructor can be
   // used.
-  var_type = tree_type_from_size( field->data.capacity,
+  var_type = tree_type_from_size( field->data.capacity(),
                                   field->attr & signable_e);
   tree new_var_decl = gg_define_variable( var_type,
                                           base_name,
@@ -4914,16 +4690,10 @@ parser_accept_date_yymmdd( struct cbl_field_t *target )
 
   CHECK_FIELD(target);
 
-  tree pointer = gg_define_char_star();
-  gg_assign(pointer, gg_call_expr(CHAR_P,
-                                  "__gg__get_date_yymmdd",
-                                  gg_get_address_of(target->var_decl_node),
-                                  NULL_TREE));
-  move_tree_to_field( target,
-                      pointer);
-
-  gg_free(pointer);
-
+  gg_call(VOID,
+          "__gg__get_date_yymmdd",
+          gg_get_address_of(target->var_decl_node),
+          NULL_TREE);
   TRACE1
     {
     TRACE1_HEADER
@@ -4941,19 +4711,10 @@ parser_accept_date_yyyymmdd( struct cbl_field_t *target )
     SHOW_PARSE_HEADER
     SHOW_PARSE_END
     }
-
-  CHECK_FIELD(target);
-
-  tree pointer = gg_define_char_star();
-  gg_assign(pointer, gg_call_expr(CHAR_P,
-                                  "__gg__get_date_yyyymmdd",
-                                  gg_get_address_of(target->var_decl_node),
-                                  NULL_TREE));
-  move_tree_to_field( target,
-                      pointer);
-
-  gg_free(pointer);
-
+  gg_call(VOID,
+          "__gg__get_date_yyyymmdd",
+          gg_get_address_of(target->var_decl_node),
+          NULL_TREE);
   TRACE1
     {
     TRACE1_HEADER
@@ -4974,16 +4735,10 @@ parser_accept_date_yyddd( struct cbl_field_t *target )
 
   CHECK_FIELD(target);
 
-  tree pointer = gg_define_char_star();
-  gg_assign(pointer, gg_call_expr(CHAR_P,
-                                  "__gg__get_date_yyddd",
-                                  gg_get_address_of(target->var_decl_node),
-                                  NULL_TREE));
-  move_tree_to_field( target,
-                      pointer);
-
-  gg_free(pointer);
-
+  gg_call(VOID,
+          "__gg__get_date_yyddd",
+          gg_get_address_of(target->var_decl_node),
+          NULL_TREE);
   TRACE1
     {
     TRACE1_HEADER
@@ -5004,16 +4759,10 @@ parser_accept_date_yyyyddd( struct cbl_field_t *target )
 
   CHECK_FIELD(target);
 
-  tree pointer = gg_define_char_star();
-  gg_assign(pointer, gg_call_expr(CHAR_P,
-                                  "__gg__get_yyyyddd",
-                                  gg_get_address_of(target->var_decl_node),
-                                  NULL_TREE));
-  move_tree_to_field( target,
-                      pointer);
-
-  gg_free(pointer);
-
+  gg_call(VOID,
+          "__gg__get_yyyyddd",
+          gg_get_address_of(target->var_decl_node),
+          NULL_TREE);
   TRACE1
     {
     TRACE1_HEADER
@@ -5034,16 +4783,10 @@ parser_accept_date_dow( struct cbl_field_t *target )
 
   CHECK_FIELD(target);
 
-  tree pointer = gg_define_char_star();
-  gg_assign(pointer, gg_call_expr(CHAR_P,
-                                  "__gg__get_date_dow",
-                                  gg_get_address_of(target->var_decl_node),
-                                  NULL_TREE));
-  move_tree_to_field( target,
-                      pointer);
-
-  gg_free(pointer);
-
+  gg_call(VOID,
+          "__gg__get_date_dow",
+          gg_get_address_of(target->var_decl_node),
+          NULL_TREE);
   TRACE1
     {
     TRACE1_HEADER
@@ -5064,16 +4807,10 @@ parser_accept_date_hhmmssff( struct cbl_field_t *target )
 
   CHECK_FIELD(target);
 
-  tree pointer = gg_define_char_star();
-  gg_assign(pointer, gg_call_expr(CHAR_P,
-                                  "__gg__get_date_hhmmssff",
-                                  gg_get_address_of(target->var_decl_node),
-                                  NULL_TREE));
-  move_tree_to_field( target,
-                      pointer);
-
-  gg_free(pointer);
-
+  gg_call(VOID,
+          "__gg__get_date_hhmmssff",
+          gg_get_address_of(target->var_decl_node),
+          NULL_TREE);
   TRACE1
     {
     TRACE1_HEADER
@@ -5144,8 +4881,7 @@ parser_alphabet( const cbl_alphabet_t& alphabet )
 
     case custom_encoding_e:
       {
-#pragma message "Verify program-id is disambiguated"
-      size_t alphabet_index = symbol_unique_index(symbol_elem_of(&alphabet));
+      uint64_t alphabet_index = symbol_unique_index(symbol_elem_of(&alphabet));
 
       unsigned char ach[256];
 
@@ -5224,7 +4960,7 @@ parser_alphabet_use( cbl_alphabet_t& alphabet )
     SHOW_PARSE_END
     }
 
-  size_t alphabet_index = symbol_index(symbol_elem_of(&alphabet));
+  uint64_t alphabet_index = symbol_unique_index(symbol_elem_of(&alphabet));
 
   switch(alphabet.encoding)
     {
@@ -5316,18 +5052,6 @@ parser_display_internal(tree file_descriptor,
       ENDIF
       }
     }
-  else if( refer.field->type == FldLiteralA )
-    {
-    gg_call(VOID,
-            "__gg__display_string",
-            file_descriptor,
-            build_int_cst_type(INT, refer.field->codeset.encoding),
-            build_string_literal(refer.field->data.capacity,
-                                 refer.field->data.initial),
-            build_int_cst_type(SIZE_T, refer.field->data.capacity),
-            advance ? integer_one_node : integer_zero_node,
-            NULL_TREE );
-    }
   else if( refer.field->type == FldLiteralN )
     {
     // The parser found the string of digits from the source code and converted
@@ -5421,6 +5145,34 @@ parser_display_internal(tree file_descriptor,
                  integer_one_node);
       }
     }
+  else if(    refer.field->type == FldFloat
+           && refer.field->attr & constant_e
+           && !(refer.field->attr & intermediate_e) )
+    {
+    // We are going to output what we think the user typed in the first place
+    char * const to_print = xstrdup(refer.field->data.original());
+    char *p = to_print;
+    if( *p == ascii_plus )
+      {
+      p += 1;
+      }
+    size_t len = strlen(p);
+    if(len > 2 && p[len-2] == ascii_E && p[len-1] == ascii_0 )
+      {
+      len -= 2;
+      }
+    gg_write(  file_descriptor,
+               build_string_literal(len, p),
+               build_int_cst_type(SIZE_T, len));
+    free(to_print);
+
+    if( advance )
+      {
+      gg_write(  file_descriptor,
+                 gg_string_literal("\n"),
+                 integer_one_node);
+      }
+    }
   else
     {
     if( refer_is_clean(refer) )
@@ -5751,7 +5503,7 @@ get_literalN_value(cbl_field_t *var)
   {
   // Get the literal N value from the integer var_decl
   tree retval = NULL_TREE;
-  tree var_type = tree_type_from_size(var->data.capacity,
+  tree var_type = tree_type_from_size(var->data.capacity(),
                                       var->attr & signable_e);
   retval = gg_cast(var_type, var->data_decl_node);
   return retval;
@@ -6400,7 +6152,7 @@ tree_type_from_field_type(cbl_field_t *field, size_t &nbytes)
       case FldAlphaEdited:
       case FldNumericEdited:
         retval = CHAR_P;
-        nbytes = field->data.capacity;
+        nbytes = field->data.capacity();
         break;
 
       case FldNumericDisplay:
@@ -6421,7 +6173,7 @@ tree_type_from_field_type(cbl_field_t *field, size_t &nbytes)
       case FldNumericBin5:
       case FldIndex:
       case FldPointer:
-        if( field->data.capacity > 8 )
+        if( field->data.capacity() > 8 )
           {
           retval = UINT128;
           nbytes = 16;
@@ -6434,12 +6186,12 @@ tree_type_from_field_type(cbl_field_t *field, size_t &nbytes)
         break;
 
       case FldFloat:
-        if( field->data.capacity == 8 )
+        if( field->data.capacity() == 8 )
           {
           retval = DOUBLE;
           nbytes = 8;
           }
-        else if( field->data.capacity == 4 )
+        else if( field->data.capacity() == 4 )
           {
           retval = FLOAT;
           nbytes = 4;
@@ -6595,7 +6347,7 @@ program_end_stuff(cbl_refer_t refer, ec_type_t ec)
         gg_memcpy(gg_get_address_of(retval),
                   member(returner, "data"),
                   build_int_cst_type( SIZE_T,
-                                      std::min(nbytes, (size_t)returner->data.capacity)));
+                                      std::min(nbytes, (size_t)returner->data.capacity())));
         }
       else
         {
@@ -6622,7 +6374,7 @@ program_end_stuff(cbl_refer_t refer, ec_type_t ec)
       // error of returning a pointer to data on the stack.
 
       tree array_type = build_array_type_nelts(UCHAR,
-                                    returner->data.capacity);
+                                    returner->data.capacity());
       tree array     =  gg_define_variable(array_type, vs_static);
       gg_memcpy(gg_get_address_of(array),
                 member(returner->var_decl_node, "data"),
@@ -6778,7 +6530,7 @@ walk_initialization(cbl_field_t *field, bool initialized, bool deallocate)
             {
             gg_memset(member(this_one->var_decl_node, "data"),
                    integer_zero_node,
-                   build_int_cst_type(SIZE_T, this_one->data.capacity));
+                   build_int_cst_type(SIZE_T, this_one->data.capacity()));
             }
           }
         }
@@ -6941,11 +6693,9 @@ parser_xml_parse( cbl_label_t *instance,
           current_function->our_name,
           instance_counter++);
 
-  cbl_field_t for_entry = {};
-  for_entry.type = FldAlphanumeric;
-  for_entry.data.capacity = strlen(ach);
-  for_entry.data.initial = ach;
-  for_entry.codeset.encoding = iconv_CP1252_e;
+  cbl_field_data_t data( 0, strlen(ach), 0,0, ach );
+  cbl_field_t for_entry(FldAlphanumeric, 0, data, 0);
+  for_entry.codeset.set(iconv_CP1252_e);
 
   // build an island for the callback:
   tree island_goto;
@@ -7211,6 +6961,20 @@ initialize_the_data()
             "__gg__decimal_point_is_comma",
             NULL_TREE);
     }
+
+  // This is where we tell the library about this program's initialization
+  // values:
+  cbl_field_t *init_working = current_options().initial_working();
+  cbl_field_t *init_local   = current_options().initial_local();
+  gg_call(VOID,
+          "__gg__initialization_values",
+          build_int_cst_type(UINT, wsclear() ? *wsclear()
+                                    : static_cast<uint32_t>(NOT_A_CHARACTER)),
+          init_working ? gg_get_address_of(init_working->var_decl_node)
+                       : null_pointer_node,
+          init_local   ? gg_get_address_of(init_local->var_decl_node)
+                       : null_pointer_node,
+          NULL_TREE);
   }
 
 static
@@ -7423,7 +7187,7 @@ establish_using(size_t nusing,
                   gg_get_address_of(base),
                   build_int_cst_type(SIZE_T, nbytes));
 
-        tree array_type = build_array_type_nelts(UCHAR, new_var->data.capacity);
+        tree array_type = build_array_type_nelts(UCHAR, new_var->data.capacity());
         tree data_decl_node = gg_define_variable( array_type,
                                                   NULL,
                                                   vs_static);
@@ -8309,8 +8073,6 @@ parser_setop( struct cbl_field_t *tgt,
     }
 
   gcc_assert(tgt->type == FldConditional);
-  gcc_assert(domain->data.initial);
-  gcc_assert(strlen(domain->data.initial));
 
   switch(op)
     {
@@ -8322,11 +8084,8 @@ parser_setop( struct cbl_field_t *tgt,
           gg_assign(tgt->var_decl_node, gg_build_relational_expression(
                       gg_call_expr(INT,
                                    "__gg__setop_compare",
-                                   member(candidate, "data"),
-                                   member(candidate, "capacity"),
+                                   gg_get_address_of(candidate->var_decl_node),
                                    member(domain, "initial"),
-                                   build_int_cst_type(INT,
-                                                     domain->codeset.encoding),
                                    NULL_TREE),
                       ne_op,
                       integer_zero_node));
@@ -9689,23 +9448,20 @@ parser_set_conditional88( const cbl_refer_t& refer, bool which_way )
     {
     // We are dealing with an ordinary string.
 
-    // When Jim gets around to converting the domain to the target encoding,
-    // this code will have to be removed
-#if 1
-    char *fname = xstrdup(src->first.name());
-    charmap_t *charmap = __gg__get_charmap(tgt->codeset.encoding);
-    for(size_t i=0; i<strlen(fname); i++)
-      {
-      fname[i] = charmap->mapped_character(fname[i]);
-      }
-    move_tree_to_field( parent,
-                        build_string_literal(strlen(fname)+1, fname));
-    free(fname);
-#else
-    move_tree_to_field( parent,
-                        build_string_literal(src->first.size()+1,
-                                             src->first.name()));
-#endif
+    size_t converted_bytes;
+    const char *converted =
+                  __gg__iconverter(parent->codeset.default_encodings.source->type,
+                                   parent->codeset.encoding,
+                                   src->first.name(),
+                                   strlen(src->first.name())+1,
+                                   &converted_bytes);
+    gg_call(VOID,
+            "__gg__refer_from_string",
+            gg_get_address_of(parent->var_decl_node),
+            size_t_zero_node,
+            build_int_cst_type(SIZE_T, parent->data.capacity()),
+            build_string_literal(converted_bytes, converted),
+            NULL_TREE);
     }
   else
     {
@@ -9899,14 +9655,37 @@ parser_file_add(struct cbl_file_t *file)
           __func__);
     }
 
-#pragma message "Verify program-id is disambiguated"
-  size_t symbol_table_index = symbol_unique_index(symbol_elem_of(file));
+  // This code is a hack needed until the parser sets the varies.min/max
+  // properly when they are not equal:
+  if(    varies.min != varies.max
+      && current_encoding(display_encoding_e) == iconv_UTF16LE_e
+      && varies.max == symbol_file_record(file)->data.capacity() )
+    {
+    fprintf(stderr,
+        "There is a hack in genapi.cc to take into account a parser error,\n"
+        "namely the fact that when there is a RECORD VARYING clause, the\n"
+        "min/max values reflect the values in the source code, while when\n"
+        "there is no VARYING clause the min/max values are the same as the\n"
+        "default_record's data.capacity().  If you are seeing this message,\n"
+        "it would appear the parser has been updated to supply the stride-\n"
+        "corrected min/max, and the hack should be removed.\n");
+    gcc_assert(false);
+    }
+  if( varies.max < symbol_file_record(file)->data.capacity())
+    {
+    const charmap_t *charmap = 
+                     __gg__get_charmap(current_encoding(display_encoding_e));
+    varies.min *= charmap->stride();
+    varies.max *= charmap->stride();
+    }
+
+  uint64_t symbol_table_index = symbol_unique_index(symbol_elem_of(file));
 
   gg_call(VOID,
           "__gg__file_init",
           gg_get_address_of(new_var_decl),
           gg_string_literal(file->name),
-          build_int_cst_type(SIZE_T, symbol_table_index),
+          build_int_cst_type(ULONGLONG, symbol_table_index),
           array_of_keys,
           key_numbers,
           unique_flags,
@@ -9923,8 +9702,12 @@ parser_file_add(struct cbl_file_t *file)
           build_int_cst_type(INT, (int)file->optional),
           build_int_cst_type(SIZE_T, varies.min),
           build_int_cst_type(SIZE_T, varies.max),
-/*  Right now, file->codeset.encoding is not being set properly.  Remove this
-    comment and fix the following code when that's repaired.  */
+/*  Right now, file->codeset.encoding is not being set properly.  For example,
+    when the exec-charset is EBCDIC, file->codeset is coming through as CP1252.
+    However, when exec-charset is UTF32LE, file->codeset is arriving here as
+    UTF32LE.  Go figure.
+
+    Remove this comment and fix the following code when that's repaired.  */
 //          build_int_cst_type(INT, (int)file->codeset.encoding),
           build_int_cst_type(INT, current_encoding(display_encoding_e)),
           build_int_cst_type(INT, (int)file->codeset.alphabet),
@@ -9942,44 +9725,6 @@ parser_file_open( size_t nfiles, struct cbl_file_t *files[], int mode_char )
     }
   }
 
-static
-tree get_the_filename(bool &quoted_name, const cbl_file_t *file)
-  {
-  // The cbl_file_t has a cbl_field_t *filename.  This can be a FldAlphanumeric.
-  // The runtime has a (char *)filename, so we need to
-  // do a runtime conversion.
-
-  tree psz;   // This is going to be either the name of the file, or the
-  // possible run-time environment variable that will contain
-  // the name of the file.
-
-  cbl_field_t *field_of_name = symbol_field_forward(file->filename);
-  quoted_name = false;
-  if( field_of_name->type == FldForward )
-    {
-    // The target of ASSIGN TO was unquoted, but didn't resolve to a
-    // cbl_field_t.  This means that the name of the field is an
-    // environment variable that will hold the file name
-    psz = gg_define_char_star();
-    gg_assign(psz, gg_strdup(gg_string_literal(field_of_name->name)));
-    }
-  else
-    {
-    // The name is coming from a presumably FldAlphaNumeric variable
-    psz = get_string_from(field_of_name);
-    gg_call( CHAR_P,
-             "__gg__convert_encoding",
-             psz,
-             build_int_cst_type(INT,
-                                field_of_name->codeset.encoding),
-             build_int_cst_type(INT,
-                                DEFAULT_SOURCE_ENCODING),
-             NULL_TREE);
-    quoted_name = true;
-    }
-  return psz;
-  }
-
 void
 parser_file_open( struct cbl_file_t *file, int mode_char )
   {
@@ -10032,17 +9777,30 @@ parser_file_open( struct cbl_file_t *file, int mode_char )
     TRACE1_END
     }
 
-  bool quoted_name;
-  tree pszFilename = get_the_filename(quoted_name, file);
+  tree pszFilename = gg_define_char_star();
+  cbl_field_t *field_of_name = symbol_field_forward(file->filename);
+  if( field_of_name->type == FldForward )
+    {
+    // The target of ASSIGN TO was unquoted, but didn't resolve to a
+    // cbl_field_t.  This means that the name of the field is an
+    // environment variable that will hold the file name
+    gg_assign(pszFilename, gg_strdup(gg_string_literal(field_of_name->name)));
+    }
+  else
+    {
+    gg_assign(pszFilename, gg_cast(CHAR_P, null_pointer_node));
+    }
 
   sv_is_i_o = true;
   store_location_stuff("OPEN");
   gg_call(VOID,
           "__gg__file_open",
           gg_get_address_of(file->var_decl_node),
+          field_of_name->var_decl_node
+                  ? gg_get_address_of(field_of_name->var_decl_node)
+                  : null_pointer_node,
           pszFilename,
           build_int_cst_type(INT, mode_char),
-          quoted_name ? integer_one_node : integer_zero_node,
           NULL_TREE);
   set_user_status(file);
   }
@@ -10450,16 +10208,30 @@ parser_file_delete_file( cbl_label_t *name,
   tree there_was_an_error = gg_define_int(0);
   for(size_t i=0; i<filenames.size(); i++)
     {
-    bool quoted_name;
-    tree pszFilename = get_the_filename(quoted_name, filenames[i]);
+    tree pszFilename = gg_define_char_star();
+    cbl_field_t *field_of_name = symbol_field_forward(filenames[i]->filename);
+    if( field_of_name->type == FldForward )
+      {
+      // The target of ASSIGN TO was unquoted, but didn't resolve to a
+      // cbl_field_t.  This means that the name of the field is an
+      // environment variable that will hold the file name
+      gg_assign(pszFilename,
+                gg_strdup(gg_string_literal(field_of_name->name)));
+      }
+    else
+      {
+      gg_assign(pszFilename, gg_cast(CHAR_P, null_pointer_node));
+      }
     gg_assign(there_was_an_error,
               gg_bitwise_or(there_was_an_error,
                             gg_call_expr(
                             INT,
                             "__gg__file_remove",
                             gg_get_address_of(filenames[i]->var_decl_node),
+                            field_of_name->var_decl_node
+                              ? gg_get_address_of(field_of_name->var_decl_node)
+                              : null_pointer_node,
                             pszFilename,
-                            quoted_name ? integer_one_node : integer_zero_node,
                             NULL_TREE)));
     set_user_status(filenames[i]);
     }
@@ -10653,7 +10425,7 @@ parser_file_start(struct cbl_file_t *file,
       {
       size_t nfield = file->keys[key_number].fields[ifield];
       cbl_field_t *field = cbl_field_of(symbol_at(nfield));
-      combined_length += field->data.capacity;
+      combined_length += field->data.capacity();
       }
     gg_assign(length, build_int_cst_type(SIZE_T, combined_length));
     }
@@ -11151,7 +10923,12 @@ parser_inspect_conv(cbl_refer_t input,
   }
 
 void
-parser_module_name( cbl_field_t *tgt, module_type_t type )
+parser_intrinsic_find_string(cbl_field_t *tgt,
+                             const cbl_refer_t& haystack,
+                             const cbl_refer_t& needle,
+                             const cbl_refer_t *after,
+                             bool last,
+                             bool anycase)
   {
   SHOW_PARSE
     {
@@ -11159,33 +10936,82 @@ parser_module_name( cbl_field_t *tgt, module_type_t type )
     SHOW_PARSE_END
     }
   gg_call(VOID,
-          "__gg__module_name",
-          gg_get_address_of(tgt->var_decl_node),
-          build_int_cst_type(INT, type),
-          NULL_TREE);
+          "__gg__find_string",
+          gg_get_address_of( tgt->var_decl_node),
+          gg_get_address_of( haystack.field->var_decl_node),
+          refer_offset(      haystack),
+          refer_size_source( haystack),
+          gg_get_address_of( needle.field->var_decl_node),
+          refer_offset(      needle),
+          refer_size_source( needle),
+          after ? gg_get_address_of( after->field->var_decl_node)
+                : null_pointer_node,
+          after ? refer_offset(*after) : size_t_zero_node,
+          after ? refer_size_source(*after) : size_t_zero_node,
+          last   ?integer_one_node:integer_zero_node,
+          anycase?integer_one_node:integer_zero_node,
+          NULL_TREE
+          );
   }
 
 void
-parser_intrinsic_numval_c( cbl_field_t *f,
-                           cbl_refer_t& input,
-                           bool locale,
-                           cbl_refer_t& currency,
-                           bool anycase,
-                           bool test_numval_c ) // true for TEST-NUMVAL-C
+parser_intrinsic_convert(cbl_field_t *tgt,
+                         const cbl_refer_t& input,
+                         convert_type_t src_fmt,
+                         unsigned int   dst_fmt )
   {
-  Analyze();
   SHOW_PARSE
     {
     SHOW_PARSE_HEADER
     SHOW_PARSE_END
     }
-  TRACE1
-    {
-    TRACE1_HEADER
-    TRACE1_END
-    }
-  if( locale || anycase )
-    {
+  gg_call(VOID,
+          "__gg__convert",
+          gg_get_address_of(tgt->var_decl_node),
+          gg_get_address_of(input.field->var_decl_node),
+          refer_offset(input),
+          refer_size_source(input),
+          build_int_cst_type(INT, src_fmt),
+          build_int_cst_type(INT, dst_fmt),
+          NULL_TREE);
+  }
+
+void
+parser_module_name( cbl_field_t *tgt, module_type_t type )
+  {
+  SHOW_PARSE
+    {
+    SHOW_PARSE_HEADER
+    SHOW_PARSE_END
+    }
+  gg_call(VOID,
+          "__gg__module_name",
+          gg_get_address_of(tgt->var_decl_node),
+          build_int_cst_type(INT, type),
+          NULL_TREE);
+  }
+
+void
+parser_intrinsic_numval_c( cbl_field_t *f,
+                           cbl_refer_t& input,
+                           bool locale,
+                           cbl_refer_t& currency,
+                           bool anycase,
+                           bool test_numval_c ) // true for TEST-NUMVAL-C
+  {
+  Analyze();
+  SHOW_PARSE
+    {
+    SHOW_PARSE_HEADER
+    SHOW_PARSE_END
+    }
+  TRACE1
+    {
+    TRACE1_HEADER
+    TRACE1_END
+    }
+  if( locale || anycase )
+    {
     gcc_unreachable();
     }
   if( test_numval_c )
@@ -11428,6 +11254,43 @@ parser_intrinsic_call_1( cbl_field_t *tgt,
       TRACE1_INDENT
       TRACE1_REFER("parameter: ", ref1, "")
       }
+
+    const charmap_t *charmap = __gg__get_charmap(ref1.field->codeset.encoding);
+    tree stride = gg_cast(LONG, integer_one_node);
+
+    switch(ref1.field->type)
+      {
+      case FldInvalid:
+      case FldGroup:
+      case FldAlphanumeric:
+      case FldNumericDisplay:
+      case FldNumericEdited:
+      case FldAlphaEdited:
+      case FldLiteralA:
+        stride = build_int_cst_type(LONG, charmap->stride());
+        break;
+
+      case FldNumericBinary:
+      case FldFloat:
+      case FldPacked:
+      case FldNumericBin5:
+      case FldLiteralN:
+      case FldClass:
+      case FldConditional:
+      case FldForward:
+      case FldIndex:
+      case FldSwitch:
+      case FldDisplay:
+      case FldPointer:
+        stride = gg_cast(LONG, integer_one_node);
+        break;
+      }
+
+    if( ref1.field->attr & hex_encoded_e )
+      {
+      stride = gg_cast(LONG, integer_one_node);
+      }
+
     size_t upper = ref1.field->occurs.bounds.upper
                                     ? ref1.field->occurs.bounds.upper : 1;
     if( ref1.nsubscript() )
@@ -11443,8 +11306,9 @@ parser_intrinsic_call_1( cbl_field_t *tgt,
               "__gg__int128_to_field",
               gg_get_address_of(tgt->var_decl_node),
               gg_cast(INT128,
-                      gg_multiply(refer_size_source(ref1),
-                                  depending_on)),
+                      gg_divide(gg_multiply(refer_size_source(ref1),
+                                            depending_on),
+                                stride)),
               integer_zero_node,
               build_int_cst_type(INT, truncation_e),
               null_pointer_node,
@@ -11458,7 +11322,8 @@ parser_intrinsic_call_1( cbl_field_t *tgt,
                 "__gg__int128_to_field",
                 gg_get_address_of(tgt->var_decl_node),
                 gg_cast(INT128,
-                        refer_size_source(ref1)),
+                        gg_divide(refer_size_source(ref1),
+                                  stride)),
                 integer_zero_node,
                 build_int_cst_type(INT, truncation_e),
                 null_pointer_node,
@@ -11470,8 +11335,9 @@ parser_intrinsic_call_1( cbl_field_t *tgt,
                 "__gg__int128_to_field",
                 gg_get_address_of(tgt->var_decl_node),
                 gg_cast(INT128,
-                        gg_multiply(refer_size_source(ref1),
-                                    build_int_cst_type(SIZE_T, upper))),
+                        gg_divide(gg_multiply(refer_size_source(ref1),
+                                            build_int_cst_type(LONG, upper)),
+                                  stride)),
                 integer_zero_node,
                 build_int_cst_type(INT, truncation_e),
                 null_pointer_node,
@@ -11500,6 +11366,11 @@ parser_intrinsic_call_1( cbl_field_t *tgt,
       TRACE1_INDENT
       TRACE1_REFER("parameter: ", ref1, "")
       }
+            gg_get_address_of(tgt->var_decl_node);
+            gg_get_address_of(ref1.field->var_decl_node);
+            refer_offset(ref1);
+            refer_size_source(ref1);
+
     gg_call(VOID,
             function_name,
             gg_get_address_of(tgt->var_decl_node),
@@ -13309,11 +13180,11 @@ create_and_call(size_t narg,
         {
         crv = by_content_e;
         gg_assign(location,
-                  gg_cast(UCHAR_P, build_string_literal(args[i].refer.field->data.capacity,
-                                       args[i].refer.field->data.initial)));
+                  gg_cast(UCHAR_P, build_string_literal(args[i].refer.field->data.capacity(),
+                                       args[i].refer.field->data.original())));
         gg_assign(length,
                   build_int_cst_type( SIZE_T,
-                                      args[i].refer.field->data.capacity));
+                                      args[i].refer.field->data.capacity()));
         }
       else
         {
@@ -13430,13 +13301,13 @@ create_and_call(size_t narg,
               {
               // All temporaries are SIZE_T
               if( args[i].refer.field->type == FldFloat
-                  && args[i].refer.field->data.capacity == 16 )
+                  && args[i].refer.field->data.capacity() == 16 )
                 {
                 as_int128 = true;
                 }
               else if(   args[i].refer.field->type == FldNumericBin5
                       && args[i].refer.field->data.digits   == 0
-                      && args[i].refer.field->data.capacity == 16 )
+                      && args[i].refer.field->data.capacity() == 16 )
                 {
                 as_int128 = true;
                 }
@@ -13543,46 +13414,27 @@ create_and_call(size_t narg,
     // We expect the return value to be a 64-bit or 128-bit integer.  How
     // we treat that returned value depends on the target.
 
-    // Pick up that value:
+    // Create a variable of the type expected from the called function
     returned_value = gg_define_variable(returned_value_type);
+
+    // Actually call the function, assigning the returned value to that
+    // variable:
     push_program_state();
     gg_assign(returned_value, gg_cast(returned_value_type, call_expr));
     pop_program_state();
 
+    // Now we decided what to do with the returned value, based on its type.
     if( returned_value_type == CHAR_P )
       {
-      tree returned_location = gg_define_uchar_star();
-      tree returned_length   = gg_define_size_t();
-      // we were given a returned::field, so find its location and length:
-      gg_assign(returned_location,
-                gg_add( member(returned.field->var_decl_node, "data"),
-                        refer_offset(returned)));
-      gg_assign(returned_length,
-                gg_cast(TREE_TYPE(returned_length), refer_size_dest(returned)));
-
-      // The returned value is a string of nbytes, which by specification
-      // has to be at least as long as the returned_length of the target:
-      IF( returned_value,
-          eq_op,
-          gg_cast(returned_value_type, null_pointer_node ) )
-        {
-        // Somebody was discourteous enough to return a NULL pointer
-        // We'll jam in spaces:
-        charmap_t *charmap = __gg__get_charmap(returned.field->codeset.encoding);
-        int dest_space = charmap->mapped_character(ascii_space);
-        gg_memset(  returned_location,
-                    char_nodes[(unsigned char)dest_space],
-                    returned_length );
-        }
-      ELSE
-        {
-        // There is a valid pointer.  Do the assignment.
-        move_tree(returned.field,
-                  refer_offset(returned),
-                  returned_value,
-                  integer_one_node);
-        }
-      ENDIF
+      // Let the library do the assignment of the 'char *returned_value' to the
+      // target 'refer returned'
+      gg_call(VOID,
+              "__gg__refer_from_psz",
+              gg_get_address_of(returned.field->var_decl_node),
+              refer_offset(returned),
+              refer_size_dest(returned),
+              returned_value,
+              NULL_TREE);
       TRACE1
         {
         TRACE1_HEADER
@@ -13643,6 +13495,8 @@ create_and_call(size_t narg,
       }
     else
       {
+      // Getting here should be impossible; it means we didn't anticipate
+      // the type of the returned value:
       cbl_internal_error(
             "%s: What in the name of Nero are we doing here?",
             __func__);
@@ -14716,7 +14570,7 @@ mh_identical(const cbl_refer_t &destref,
   // for a simple byte-for-byte copy of the data areas:
   bool moved = false;
   if(     destref.field->type          == sourceref.field->type
-      &&  destref.field->data.capacity == sourceref.field->data.capacity
+      &&  destref.field->data.capacity() == sourceref.field->data.capacity()
       &&  destref.field->data.digits   == sourceref.field->data.digits
       &&  destref.field->data.rdigits  == sourceref.field->data.rdigits
       &&       (destref.field->attr   & (signable_e|separate_e|leading_e))
@@ -14746,7 +14600,7 @@ mh_identical(const cbl_refer_t &destref,
                        refer_offset(destref)),
                 gg_add(member(sourceref.field->var_decl_node, "data"),
                        tsource.offset),
-                build_int_cst_type(SIZE_T, sourceref.field->data.capacity));
+                build_int_cst_type(SIZE_T, sourceref.field->data.capacity()));
       moved = true;
       }
     }
@@ -14776,21 +14630,23 @@ mh_source_is_literalN(cbl_refer_t &destref,
           }
 
         // We need the data sent to __gg__psz_to_alpha_move to be in the
-        // encoding of the destination
+        // encoding of the destination.  In accordance with the rules of
+        // cbl_field_t::internalize, the FldLiteralN is in source-code
+        // encoding, so we have to convert.
 
         size_t charsout;
         const char *converted = __gg__iconverter(
-                                         sourceref.field->codeset.encoding,
+                                         DEFAULT_SOURCE_ENCODING,
                                          destref.field->codeset.encoding,
-                                         sourceref.field->data.initial,
-                                         strlen(sourceref.field->data.initial),
+                                         sourceref.field->data.original(),
+                                         strlen(sourceref.field->data.original()),
                                          &charsout);
         gg_call(VOID,
                 "__gg__psz_to_alpha_move",
                 gg_get_address_of(destref.field->var_decl_node),
                 refer_offset(destref),
                 refer_size_dest(destref),
-                gg_string_literal(converted),
+                build_string_literal(charsout, converted),
                 build_int_cst_type(SIZE_T, charsout),
                 NULL_TREE);
         moved = true;
@@ -14807,7 +14663,7 @@ mh_source_is_literalN(cbl_refer_t &destref,
           SHOW_PARSE_TEXT("mh_source_is_literalN: pointer/index")
           }
 
-        if( sourceref.field->data.capacity < 8 )
+        if( sourceref.field->data.capacity() < 8 )
           {
           // There are too few bytes in sourceref
           if( sourceref.field->attr & signable_e )
@@ -14818,7 +14674,7 @@ mh_source_is_literalN(cbl_refer_t &destref,
                       gg_get_indirect_reference(gg_add(member(sourceref.field->var_decl_node,
                                                               "data"),
                                                 build_int_cst_type(SIZE_T,
-                                                                   sourceref.field->data.capacity-1)),
+                                                                   sourceref.field->data.capacity()-1)),
                       integer_zero_node));
             IF( gg_bitwise_and(highbyte, build_int_cst_type(UCHAR, 0x80)),
                 eq_op,
@@ -14852,7 +14708,7 @@ mh_source_is_literalN(cbl_refer_t &destref,
         gg_memcpy(gg_add(member(destref.field->var_decl_node, "data"),
                                refer_offset(destref)),
                   gg_get_address_of(literalN_value),
-                  build_int_cst_type(SIZE_T, sourceref.field->data.capacity));
+                  build_int_cst_type(SIZE_T, sourceref.field->data.capacity()));
         moved = true;
 
         break;
@@ -14872,11 +14728,11 @@ mh_source_is_literalN(cbl_refer_t &destref,
         // For now, we are ignoring intermediates:
         assert( !(destref.field->attr & intermediate_e) );
 
-        int bytes_needed = std::max(destref.field->data.capacity,
-                                    sourceref.field->data.capacity);
+        int bytes_needed = std::max(destref.field->data.capacity(),
+                                    sourceref.field->data.capacity());
         tree calc_type = tree_type_from_size(bytes_needed,
                                             sourceref.field->attr & signable_e);
-        tree dest_type = tree_type_from_size( destref.field->data.capacity,
+        tree dest_type = tree_type_from_size( destref.field->data.capacity(),
                                               destref.field->attr & signable_e);
 
         // Pick up the source data.
@@ -14970,31 +14826,31 @@ mh_source_is_literalN(cbl_refer_t &destref,
           }
 
         // __gg__string_to_alpha_edited expects the source string to be in
-        // the same encoding as the target:
-        size_t len = strlen(sourceref.field->data.initial);
-        char *src =
-         static_cast<char *>(xmalloc(len+1));
-        memcpy( src,
-                sourceref.field->data.initial,
-                strlen(sourceref.field->data.initial));
+        // the same encoding as the target.  The rule in internalize is that
+        // a FldLiteralN::data.initial is left in source-code space, so it
+        // needs to be converted to the destination encoding.
         size_t charsout;
-        const char *converted = __gg__iconverter(
-                                            sourceref.field->codeset.encoding,
+        const char *converted_ = __gg__iconverter(
+                                            DEFAULT_SOURCE_ENCODING,
                                             destref.field->codeset.encoding,
-                                            src,
-                                            len,
+                                            sourceref.field->data.original(),
+                                            strlen(sourceref.field->data.original()),
                                             &charsout);
+        // Copy converted, because __gg__string_to_alpha_edited might have its
+        // own reasons to use charmap_t, which could mess up the static buffer
+        // used by __gg__iconverter:
+        char *converted = xstrdup(converted_);
         gg_call(VOID,
                 "__gg__string_to_alpha_edited",
                 gg_add( member(destref.field->var_decl_node, "data"),
                         refer_offset(destref) ),
                 build_int_cst_type(INT, destref.field->codeset.encoding),
                 gg_string_literal(converted),
-                build_int_cst_type(INT, len),
+                build_int_cst_type(INT, strlen(converted)),
                 gg_string_literal(destref.field->data.picture),
                 NULL_TREE);
-        free(src);
         moved = true;
+        free(converted);
         break;
         }
 
@@ -15002,13 +14858,8 @@ mh_source_is_literalN(cbl_refer_t &destref,
         {
         tree tdest = gg_add(member(destref.field->var_decl_node, "data"),
                             refer_offset(destref) );
-        switch( destref.field->data.capacity )
+        switch( destref.field->data.capacity() )
           {
-          // For some reason, using FLOAT128 in the build_pointer_type causes
-          // a SEGFAULT.  So, we'll use other types with equivalent sizes. I
-          // am speculating that the use of floating-point types causes the -O0
-          // compilation to move things using the mmx registers.  So, I am using
-          // intxx types in the hope that they are simpler.
           case 4:
             {
             // The following generated code is the exact equivalent
@@ -15069,7 +14920,7 @@ static tree
 float_type_of(const cbl_field_t *field)
   {
   gcc_assert(field->type == FldFloat);
-  return float_type_of(field->data.capacity);
+  return float_type_of(field->data.capacity());
   }
 
 static tree
@@ -15099,7 +14950,7 @@ mh_dest_is_float( cbl_refer_t &destref,
       case FldNumericEdited:
       case FldPacked:
         {
-        switch( destref.field->data.capacity )
+        switch( destref.field->data.capacity() )
           {
           case 4:
             gg_call(VOID,
@@ -15163,7 +15014,7 @@ mh_dest_is_float( cbl_refer_t &destref,
           // If the destination is bigger than the source, then we can
           // do an untested move:
 
-          if( destref.field->data.capacity >= sourceref.field->data.capacity )
+          if( destref.field->data.capacity() >= sourceref.field->data.capacity() )
             {
             tree dtype = float_type_of(&destref);
             tree stype = float_type_of(&sourceref);
@@ -15180,7 +15031,7 @@ mh_dest_is_float( cbl_refer_t &destref,
           else
             {
             // There are only three possible moves left:
-            if(destref.field->data.capacity == 8 )
+            if(destref.field->data.capacity() == 8 )
               {
               if( size_error )
                 {
@@ -15207,7 +15058,7 @@ mh_dest_is_float( cbl_refer_t &destref,
             else
               {
               // The destination has to be float32
-              if( sourceref.field->data.capacity == 8 )
+              if( sourceref.field->data.capacity() == 8 )
                 {
                 if( size_error )
                   {
@@ -15267,6 +15118,7 @@ mh_dest_is_float( cbl_refer_t &destref,
 
       case FldLiteralA:
       case FldAlphanumeric:
+      case FldGroup:
         {
         // Alphanumeric to float is inherently slow.  Send it off to the library
         break;
@@ -15337,11 +15189,19 @@ mh_numeric_display( const cbl_refer_t &destref,
   {
   bool moved = false;
 
+  charmap_t *charmap_source =
+                       __gg__get_charmap(sourceref.field->codeset.encoding);
   if(     destref.field->type   == FldNumericDisplay
       &&  sourceref.field->type == FldNumericDisplay
       &&  !(destref.field->attr   & scaled_e)
-      &&  !(sourceref.field->attr & scaled_e) )
+      &&  !(sourceref.field->attr & scaled_e)
+      &&  charmap_source->stride() == 1
+      &&  sourceref.field->codeset.encoding == destref.field->codeset.encoding
+      )
     {
+    // We can do simple moves of single-byte same-encoding numeric display.
+    // More complex ones get sent to __gg__move
+
     Analyze();
     // I believe that there are 450 pathways through the following code.
     // That's because there are five different valid combination of signable_e,
@@ -15367,10 +15227,8 @@ mh_numeric_display( const cbl_refer_t &destref,
     // nybble is 0xC0 for positive values, and 0xD0 for negative; all other
     // digits are 0x70.
 
-    charmap_t *charmap_source =
-                         __gg__get_charmap(sourceref.field->codeset.encoding);
     charmap_t *charmap_dest   =
-                         __gg__get_charmap(  destref.field->codeset.encoding);
+                       __gg__get_charmap(  destref.field->codeset.encoding);
 
     static tree source_sign_loc  = gg_define_variable(UCHAR_P,
                                                       "..mhnd_sign_loc",
@@ -15435,7 +15293,7 @@ mh_numeric_display( const cbl_refer_t &destref,
         gg_assign(source_sign_loc,
                   gg_add(source_p,
                         build_int_cst_type(SIZE_T,
-                                          sourceref.field->data.capacity-1)));
+                                          sourceref.field->data.capacity()-1)));
         break;
       case 5:
         //     signable, not leading,     separate
@@ -15443,7 +15301,7 @@ mh_numeric_display( const cbl_refer_t &destref,
         gg_assign(source_sign_loc,
                   gg_add(source_p,
                         build_int_cst_type(SIZE_T,
-                                          sourceref.field->data.capacity-1)));
+                                          sourceref.field->data.capacity()-1)));
         break;
       case 6:
         //     signable,     leading, not separate
@@ -15529,7 +15387,7 @@ mh_numeric_display( const cbl_refer_t &destref,
         gg_assign(dest_sign_loc,
                   gg_add(dest_p,
                         build_int_cst_type(SIZE_T,
-                                          destref.field->data.capacity-1)));
+                                          destref.field->data.capacity()-1)));
         break;
       case 5:
         //     signable, not leading,     separate
@@ -15537,7 +15395,7 @@ mh_numeric_display( const cbl_refer_t &destref,
         gg_assign(dest_sign_loc,
                   gg_add(dest_p,
                         build_int_cst_type(SIZE_T,
-                                          destref.field->data.capacity-1)));
+                                          destref.field->data.capacity()-1)));
         break;
       case 6:
         //     signable,     leading, not separate
@@ -15725,12 +15583,13 @@ mh_little_endian( const cbl_refer_t &destref,
   {
   bool moved = false;
 
-  cbl_figconst_t figconst = cbl_figconst_of( sourceref.field->data.initial);
+  cbl_figconst_t figconst = cbl_figconst_of( sourceref.field->data.original());
 
   if(     !figconst
       &&  !(destref.field->attr    & scaled_e)
       &&  !(destref.field->attr    & (intermediate_e  ))
       &&  !(sourceref.field->attr  & (intermediate_e  ))
+      &&  sourceref.field->type     != FldGroup
       &&  sourceref.field->type     != FldLiteralA
       &&  sourceref.field->type     != FldAlphanumeric
       &&  sourceref.field->type     != FldNumericEdited
@@ -15794,7 +15653,10 @@ mh_source_is_group( const cbl_refer_t &destref,
                     const TREEPLET    &tsrc)
   {
   bool retval = false;
-  if( sourceref.field->type == FldGroup && !(destref.field->attr & rjust_e) )
+ charmap_t *charmap = __gg__get_charmap(destref.field->codeset.encoding);
+  if(   sourceref.field->type == FldGroup && !(destref.field->attr & rjust_e)
+     && sourceref.field->codeset.encoding == destref.field->codeset.encoding
+     && charmap->stride() == 1)
     {
     Analyze();
     // We are moving a group to a something.  The rule here is just move as
@@ -15814,7 +15676,6 @@ mh_source_is_group( const cbl_refer_t &destref,
     ELSE
       {
       // There are too-few source bytes:
-      charmap_t *charmap = __gg__get_charmap(destref.field->codeset.encoding);
       int dest_space = charmap->mapped_character(ascii_space);
       gg_memset(tdest, build_int_cst_type(INT, dest_space), dbytes);
       gg_memcpy(tdest, tsource, sbytes);
@@ -15840,8 +15701,8 @@ mh_source_is_literalA(const cbl_refer_t &destref,
     // construct a string with the same number of characters as the source, but
     // in the target variable's encoding.
 
-    // We will then call a library routine that will be in charge of trimming
-    // and space filling.
+    // We will then call a library routine that will be in charge of run-time
+    // trimming or space filling, as necessary.
 
     cbl_encoding_t encoding_dest =   destref.field->codeset.encoding;
     charmap_t *charmap_dest = __gg__get_charmap(encoding_dest);
@@ -15855,16 +15716,35 @@ mh_source_is_literalA(const cbl_refer_t &destref,
 
     static char *buffer = NULL;
     static size_t buffer_size = 0;
-    size_t source_length = sourceref.field->data.capacity;
+    size_t source_length;
+    size_t dest_length;
+    if( sourceref.field->attr & hex_encoded_e )
+      {
+      // Hex-encoded data is moved as-is
+      source_length = sourceref.field->data.capacity();
+      dest_length   = std::min(source_length,
+                          static_cast<size_t>(destref.field->data.capacity()));
+      }
+    else
+      {
+      // Otherwise, data.initial prevails:
+      size_t source_based_on_strlen = strlen(sourceref.field->data.original());
+      size_t source_based_on_capacity = sourceref.field->data.capacity() /
+                                        sourceref.field->codeset.stride() ;
+      source_length = std::max( source_based_on_strlen ,
+                                source_based_on_capacity );
+      dest_length   = source_length * charmap_dest->stride();
+      }
 
-    if( buffer_size < source_length )
+    if( buffer_size < dest_length )
       {
-      buffer_size = source_length;
-      buffer = static_cast<char *>(xrealloc(buffer, source_length));
+      buffer_size = dest_length;
+      buffer = static_cast<char *>(xrealloc(buffer, buffer_size));
       }
     gcc_assert(buffer);
 
-    cbl_figconst_t figconst = cbl_figconst_of( sourceref.field->data.initial);
+    cbl_figconst_t figconst = cbl_figconst_of( sourceref.field->data.original());
+    size_t outlength;
     if( figconst )
       {
       // We are going to fill 'buffer' with a solid run of the figurative
@@ -15875,7 +15755,7 @@ mh_source_is_literalA(const cbl_refer_t &destref,
         {
         case normal_value_e :
           // This is not possible, it says here in the fine print.
-          abort();
+          gcc_unreachable();
           break;
         case low_value_e    :
           const_char = charmap_dest->low_value_character();
@@ -15900,27 +15780,39 @@ mh_source_is_literalA(const cbl_refer_t &destref,
       }
      else
       {
-      // We are going to convert the source string to the destination codeset,
-      // and then copy it to 'buffer', trimming if necessary, and space-filling
-      // to the right if necessary:
-      cbl_encoding_t encoding_src  = sourceref.field->codeset.encoding;
-
-      size_t outlength;
-      const char *source_string = __gg__iconverter( encoding_src,
-                                                 encoding_dest,
-                                                 sourceref.field->data.initial,
-                                                 source_length,
-                                                 &outlength );
-      // Copy over the converted string
-      memcpy( buffer,
-              source_string,
-              outlength );
+      if( sourceref.field->attr & hex_encoded_e )
+        {
+        // hex_encoded data goes as is:
+        memcpy(buffer, sourceref.field->data.original(), dest_length);
+        outlength = dest_length;
+        }
+      else
+        {
+        // We are going to convert the source string to the destination
+        // codeset, and then copy it to 'buffer', trimming if necessary, and
+        // space-filling to the right if necessary:
+        const char *source_string =
+        __gg__iconverter(
+                       sourceref.field->codeset.default_encodings.source->type,
+                       encoding_dest,
+                       sourceref.field->data.original(),
+                       source_length,
+                       &outlength );
+        if( outlength > dest_length )
+          {
+          outlength = dest_length;
+          }
+        // Copy over the converted string
+        memcpy( buffer,
+                source_string,
+                outlength );
+        }
       }
 
     // If the source is flagged ALL, or if we are setting the destination to
     // a figurative constant, pass along the ALL bit:
     int rounded_parameter = rounded
-                            | ((sourceref.all || figconst ) ? REFER_ALL_BIT : 0);
+                           | ((sourceref.all || figconst ) ? REFER_ALL_BIT : 0);
 
     if( size_error )
       {
@@ -15931,9 +15823,9 @@ mh_source_is_literalA(const cbl_refer_t &destref,
                               refer_offset(destref),
                               refer_size_dest(destref),
                               build_int_cst_type(INT, rounded_parameter),
-                              build_string_literal(source_length,
+                              build_string_literal(outlength,
                                                    buffer),
-                              build_int_cst_type( SIZE_T, source_length),
+                              build_int_cst_type( SIZE_T, outlength),
                               NULL_TREE));
       }
     else
@@ -15944,9 +15836,9 @@ mh_source_is_literalA(const cbl_refer_t &destref,
                               refer_offset(destref),
                               refer_size_dest(destref),
                               build_int_cst_type(INT, rounded_parameter),
-                              build_string_literal(source_length,
+                              build_string_literal(outlength,
                                                    buffer),
-                              build_int_cst_type( SIZE_T, source_length),
+                              build_int_cst_type( SIZE_T, outlength),
                               NULL_TREE);
       }
     if(    destref.refmod.from
@@ -16000,9 +15892,9 @@ move_helper(tree size_error,        // This is an INT
       first_time = false;
       gg_assign(stash, gg_cast(UCHAR_P, gg_malloc(stash_size)));
       }
-    if( stash_size < destref.field->data.capacity )
+    if( stash_size < destref.field->data.capacity() )
       {
-      stash_size = destref.field->data.capacity;
+      stash_size = destref.field->data.capacity();
       gg_assign(stash, gg_cast(UCHAR_P, gg_realloc(stash, stash_size)));
       }
     st_data = qualified_data_location(destref);
@@ -16063,7 +15955,7 @@ move_helper(tree size_error,        // This is an INT
                               size_error);
     }
 
-  if( !moved && sourceref.field->type == FldLiteralA)
+  if( !moved )
     {
     moved = mh_source_is_literalA(destref,
                                   sourceref,
@@ -16218,596 +16110,30 @@ real_powi10 (uint32_t x)
   return pow10;
 }
 
-static
-char *
-binary_initial(cbl_field_t *field)
-  {
-  // This routine returns an xmalloced buffer designed to replace the
-  // data.initial member of the incoming field
-  char *retval = NULL;
-
-  uint32_t capacity;
-  uint32_t ddigits;
-  int32_t  drdigits;
-  uint64_t attr;
-  FIXED_WIDE_INT(128) value128 = dirty_to_binary(field->data.original(),
-                                                 capacity,
-                                                 ddigits,
-                                                 drdigits,
-                                                 attr);
-  int scaled_rdigits = get_scaled_rdigits(field);
-
-  int i = field->data.rdigits;
-  while( i<0 )
-    {
-    value128 = value128/10;
-    i += 1;
-    }
-
-  // We take the digits of value128, and put them into ach.  We line up
-  // the rdigits, and we truncate the string after desired_digits
-  while(drdigits < scaled_rdigits)
-    {
-    value128 *= 10;
-    drdigits += 1;
-    }
-  while(drdigits > scaled_rdigits)
-    {
-    value128 = value128 / 10;
-    drdigits -= 1;
-    }
-
-  retval = static_cast<char *>(xmalloc(field->data.capacity));
-  gcc_assert(retval);
-  switch(field->data.capacity)
-    {
-    tree type;
-    case 1:
-    case 2:
-    case 4:
-    case 8:
-    case 16:
-      type = build_nonstandard_integer_type ( field->data.capacity
-                                              * BITS_PER_UNIT, 0);
-      native_encode_wide_int (type, value128, PTRCAST(unsigned char, retval),
-                              field->data.capacity);
-      break;
-    default:
-      fprintf(stderr,
-              "Trouble in binary_initial at %s() %s:%d\n",
-              __func__,
-              __FILE__,
-              __LINE__);
-      abort();
-      break;
-    }
-
-  return retval;
-  }
-
-static void
-digits_from_int128( char                *ach,
-                    cbl_field_t         *field,
-                    uint32_t             desired_digits,
-                    FIXED_WIDE_INT(128)  value128, // cppcheck-suppress unknownMacro
-                    int32_t              rdigits)
-  {
-  if( value128 < 0 )
-    {
-    value128 = -value128;
-    }
-
-  // 'rdigits' are the number of rdigits in value128.
-
-  int scaled_rdigits = get_scaled_rdigits(field);
-
-  int i = field->data.rdigits;
-  while( i<0 )
-    {
-    value128 = value128/10;
-    i += 1;
-    }
-
-  // We take the digits of value128, and put them into ach.  We line up
-  // the rdigits, and we truncate the string after desired_digits
-  while(rdigits < scaled_rdigits)
-    {
-    value128 *= 10;
-    rdigits += 1;
-    }
-  while(rdigits > scaled_rdigits)
-    {
-    value128 = value128 / 10;
-    rdigits -= 1;
-    }
-  char conv[128];
-  print_dec (value128, conv, SIGNED);
-  size_t len = strlen(conv);
-
-  if( len<desired_digits )
-    {
-    memset(ach, ascii_0, desired_digits - len);
-    strcpy(ach+desired_digits - len, conv);
-    }
-  else
-    {
-    strcpy(ach, conv + len-desired_digits);
-    }
-  }
-
-#if 0
-// This routine was replaced with digits_from_int1289.  However, I am choosing
-// to keep it around for a while, because it is a master class in manipulating
-// REAL_VALUE_TYPE and FIXED_WIDE_INT
-
-static void
-digits_from_float128(char *retval, cbl_field_t *field, size_t width, int rdigits, REAL_VALUE_TYPE value)
-  {
-  char ach[128];
-
-  // We need to adjust value so that it has no decimal places
-  if( rdigits )
-    {
-      REAL_VALUE_TYPE pow10 = real_powi10 (rdigits);
-      real_arithmetic (&value, MULT_EXPR, &value, &pow10);
-    }
-  // We need to make sure that the resulting string will fit into
-  // a number with 'digits' digits
-  REAL_VALUE_TYPE pow10 = real_powi10 (field->data.digits);
-  mpfr_t m0, m1;
-
-  mpfr_inits2 (FLOAT_MODE_FORMAT (TYPE_MODE (float128_type_node))->p, m0, m1,
-               NULL);
-  mpfr_from_real (m0, &value, MPFR_RNDN);
-  mpfr_from_real (m1, &pow10, MPFR_RNDN);
-  mpfr_clear_flags ();
-  mpfr_fmod (m0, m0, m1, MPFR_RNDN);
-  real_from_mpfr (&value, m0,
-                  REAL_MODE_FORMAT (TYPE_MODE (float128_type_node)),
-                  MPFR_RNDN);
-  real_convert (&value, TYPE_MODE (float128_type_node), &value);
-  mpfr_clears (m0, m1, NULL);
-  real_roundeven (&value, TYPE_MODE (float128_type_node), &value);
-
-  bool fail = false;
-  FIXED_WIDE_INT(128) i
-    = FIXED_WIDE_INT(128)::from (real_to_integer (&value, &fail, 128), SIGNED);
-
-  // We convert it to a integer string of digits:
-  print_dec (i, ach, SIGNED);
-
-  gcc_assert( strlen(ach) <= field->data.digits );
-  if( strlen(ach) < width )
-    {
-    memset(retval, '0', width-strlen(ach) );
-    }
-  strcpy(retval + (width-strlen(ach)), ach);
-  }
-#endif
-
-static char *
-initial_from_initial(cbl_field_t *field)
+static tree
+convert_data_initial(cbl_field_t * field)
   {
-  Analyze();
-  // This routine returns an xmalloced buffer that is intended to replace the
-  // data.initial member of the incoming field.
-
-  //fprintf(stderr, " %s\n", field->name);
-
-  char *retval = NULL;
-
-  // Let's handle the possibility of a figurative constant
-  cbl_figconst_t figconst = cbl_figconst_of(field->data.initial);
-  if( figconst )
-    {
-    charmap_t *charmap = __gg__get_charmap(field->codeset.encoding);
-    int const_char = charmap->figconst_character(figconst);
-    bool set_return = figconst != zero_value_e;
-    if( !set_return )
-      {
-      // The figconst is zero
-      switch(field->type)
-        {
-        case FldGroup:
-        case FldAlphanumeric:
-          set_return = true;
-          break;
-
-        default:
-          break;
-        }
-      }
-    if( set_return )
-      {
-      retval = static_cast<char *>(xmalloc(field->data.capacity+1));
-      gcc_assert(retval);
-      memset(retval, const_char, field->data.capacity);
-      retval[field->data.capacity] = '\0';
-      return retval;
-      }
-    }
-
-  // ???  Refactoring the cases below that do not need 'value' would
-  // make this less ugly
-  REAL_VALUE_TYPE value;
-  if( field->data.etc_type == cbl_field_data_t::value_e )
-    value = TREE_REAL_CST (field->data.value_of ());
-
-#if 0
-  int rdigits;
-  // There is always the infuriating possibility of a P-scaled number
-  if( field->attr & scaled_e )
-    {
-    rdigits = 0;
-    if( field->data.rdigits >= 0 )
-      {
-      // Suppose our PIC is PPPPPP999, meaning that field->digits
-      // is 3, and field->rdigits is 6.
-
-      // Our result has no decimal places, and we have to multiply the value
-      // by 10**9 to get the significant bdigits where they belong.
-
-      REAL_VALUE_TYPE pow10
-        = real_powi10 (field->data.digits + field->data.rdigits);
-      real_arithmetic (&value, MULT_EXPR, &value, &pow10);
-      }
-    else
-      {
-      // Suppose our target is 999PPPPPP, so there is a ->digits
-      // of 3 and field->rdigits of -6.
-
-      // If our caller gave us 123000000, we need to divide
-      // it by 1000000 to line up the 123 with where we want it to go:
-
-      REAL_VALUE_TYPE pow10 = real_powi10 (-field->data.rdigits);
-      real_arithmetic (&value, RDIV_EXPR, &value, &pow10);
-      }
-    // Either way, we now have everything aligned for the remainder of the
-    // processing to work:
-    }
-  else
-    {
-    // Not P-scaled
-    rdigits = field->data.rdigits;
-    }
-#endif
-
-  switch(field->type)
-    {
-    case FldNumericBin5:
-    case FldIndex:
-      retval = binary_initial(field);
-      break;
-
-    case FldNumericBinary:
-      {
-      retval = binary_initial(field);
-      size_t left = 0;
-      size_t right = field->data.capacity - 1;
-      while(left < right)
-        {
-        std::swap(retval[left++], retval[right--]);
-        }
-      break;
-      }
-
-    case FldNumericDisplay:
-      {
-      charmap_t *charmap = __gg__get_charmap(field->codeset.encoding);
-
-      retval = static_cast<char *>(xmalloc(field->data.capacity));
-      gcc_assert(retval);
-      char *pretval = retval;
-      char ach[128];
-
-      bool negative;
-      if( real_isneg (&value) )
-        {
-        negative = true;
-        value = real_value_negate (&value);
-        }
-      else
-        {
-        negative = false;
-        }
-
-      // Convert the data.initial to a __int128
-      uint32_t capacity;
-      uint32_t ddigits;
-      int32_t  drdigits;
-      uint64_t attr;
-      FIXED_WIDE_INT(128) value128 = dirty_to_binary(field->data.initial,
-                                                     capacity,
-                                                     ddigits,
-                                                     drdigits,
-                                                     attr);
-      digits_from_int128(ach, field, field->data.digits, value128, drdigits);
-
-      const char *digits = ach;
-      if(    (field->attr & signable_e)
-          && (field->attr & separate_e)
-          && (field->attr & leading_e ) )
-        {
-        // This zoned decimal value is signable, separate, and leading.
-        if( negative )
-          {
-          *pretval++ = charmap->mapped_character(ascii_minus);
-          }
-        else
-          {
-          *pretval++ = charmap->mapped_character(ascii_plus);
-          }
-        }
-      for(size_t i=0; i<field->data.digits; i++)
-        {
-        // Start by assuming it's an value that can't be signed
-        *pretval++ = charmap->mapped_character(ascii_0) + ((*digits++) & 0x0F);
-        }
-      if(     (field->attr & signable_e)
-          &&  (field->attr & separate_e)
-          && !(field->attr & leading_e ) )
-        {
-        // The value is signable, separate, and trailing
-        if( negative )
-          {
-          *pretval++ = charmap->mapped_character(ascii_minus);
-          }
-        else
-          {
-          *pretval++ = charmap->mapped_character(ascii_plus);
-          }
-        }
-      if(     (field->attr & signable_e)
-          && !(field->attr & separate_e) )
-        {
-        // This value is signable, and not separate.  So, the sign information
-        // goes into the first or last byte:
-        char *sign_location = field->attr & leading_e ?
-                              retval : retval + field->data.digits - 1 ;
-        *sign_location = charmap->set_digit_negative(*sign_location,
-                                                      negative);
-        }
-      break;
-      }
-
-    case FldPacked:
-      {
-      retval = static_cast<char *>(xmalloc(field->data.capacity));
-      gcc_assert(retval);
-      char *pretval = retval;
-      char ach[128];
-
-      bool negative;
-      if( real_isneg (&value) )
-        {
-          negative = true;
-          value = real_value_negate (&value);
-        }
-      else
-        {
-          negative = false;
-        }
-
-      // For COMP-6 (flagged by separate_e), the number of required digits is
-      // twice the capacity.
-
-      // For COMP-3, the number of digits is 2*capacity minus 1, because the
-      // the final "digit" is a sign nybble.
-
-      size_t ndigits =   (field->attr & separate_e)
-                       ? field->data.capacity * 2
-                       : field->data.capacity * 2 - 1;
-      uint32_t capacity;
-      uint32_t ddigits;
-      int32_t  drdigits;
-      uint64_t attr;
-      FIXED_WIDE_INT(128) value128 = dirty_to_binary(field->data.initial,
-                                                     capacity,
-                                                     ddigits,
-                                                     drdigits,
-                                                     attr);
-      digits_from_int128(ach, field, ndigits, value128, drdigits);
-
-      const char *digits = ach;
-      for(size_t i=0; i<ndigits; i++)
-        {
-        if( !(i & 0x01) )
-          {
-          *pretval    = ((*digits++) & 0x0F)<<4;;
-          }
-        else
-          {
-          *pretval++ += (*digits++) & 0x0F;
-          }
-        }
-      if( !(field->attr & separate_e) )
-        {
-        // This is COMP-3, so put in a sign nybble
-        if( (field->attr & signable_e) )
-          {
-          if( negative )
-            {
-            *pretval++ += 0x0D;   // Means signable and negative
-            }
-          else
-            {
-            *pretval++ += 0x0C;   // Means signable and non-negative
-            }
-          }
-        else
-          {
-          *pretval++ += 0x0F;     // Means not signable
-          }
-        }
-      break;
-      }
+  // This routine returns a tree from field->data.initial, extended with
+  // a NUL on the end.
+  size_t buffer_size = field->data.capacity() + field->codeset.stride();
+  char *buffer = static_cast<char *>(xmalloc(buffer_size));
+  gcc_assert(buffer);
 
-    case FldGroup:
-    case FldAlphanumeric:
-    case FldLiteralA:
-    case FldAlphaEdited:
-      {
-      if( field->data.initial )
-        {
-        retval = static_cast<char *>(xmalloc(field->data.capacity+1));
-        gcc_assert(retval);
-        if( field->attr & hex_encoded_e)
-          {
-          memcpy(retval, field->data.initial, field->data.capacity);
-          }
-        else
-          {
-          size_t length = field->data.capacity;
-          memcpy(retval, field->data.initial, length);
-          if( strlen(field->data.initial) < length )
-            {
-            // If this is true, then the initial string must've been Z'xyz'
-            retval[strlen(field->data.initial)] = '\0';
-            }
-          }
-        retval[field->data.capacity] = '\0';
-        }
-      break;
-      }
+  size_t nbytes = field->data.capacity();
 
-    case FldNumericEdited:
-      {
-      charmap_t *charmap = __gg__get_charmap(field->codeset.encoding);
-      retval = static_cast<char *>(xmalloc(field->data.capacity+1));
-      gcc_assert(retval);
-      if( field->data.initial && field->attr & quoted_e )
-        {
-        // What the programmer says the value is, the value becomes, no
-        // matter how wrong it might be.
-        size_t length = std::min( (size_t)field->data.capacity,
-                                  strlen(field->data.initial));
-        for(size_t i=0; i<length; i++)
-          {
-          retval[i] = field->data.initial[i];
-          }
-        if( length < (size_t)field->data.capacity )
-          {
-          memset( retval+length,
-                  charmap->mapped_character(ascii_space),
-                  (size_t)field->data.capacity - length);
-          }
-        }
-      else
-        {
-        // It's not a quoted string, so we use data.value:
-        bool negative;
-        if( real_isneg (&value) )
-          {
-          negative = true;
-          value = real_value_negate (&value);
-          }
-        else
-          {
-          negative = false;
-          }
+  const char *converted = field->data.initial;
 
-        char ach[128];
-        memset(ach, 0, sizeof(ach));
-        memset(retval, 0, field->data.capacity);
+  // Copy the converted bytes
+  gcc_assert(nbytes < buffer_size);
+  memcpy(buffer, converted, nbytes);
+  charmap_t *charmap = __gg__get_charmap(field->codeset.encoding);
 
-        if( (field->attr & blank_zero_e) && real_iszero (&value) )
-          {
-          memset( retval,
-                  charmap->mapped_character(ascii_space),
-                  field->data.capacity);
-          }
-        else
-          {
-          size_t ndigits = field->data.capacity;
-          uint32_t capacity;
-          uint32_t ddigits;
-          int32_t  drdigits;
-          uint64_t attr;
-          FIXED_WIDE_INT(128) value128 = dirty_to_binary(field->data.original(),
-                                                         capacity,
-                                                         ddigits,
-                                                         drdigits,
-                                                         attr);
-          digits_from_int128(ach, field, ndigits, value128, drdigits);
-
-          // __gg__string_to_numeric_edited operates in ASCII space:
-          __gg__string_to_numeric_edited( retval,
-                                          ach,
-                                          field->data.rdigits,
-                                          negative,
-                                          field->data.picture);
-          // So now we convert it to the target encoding:
-          size_t nbytes;
-          const char *converted = __gg__iconverter(DEFAULT_SOURCE_ENCODING,
-                                                   field->codeset.encoding,
-                                                   retval,
-                                                   strlen(retval),
-                                                   &nbytes);
-          strcpy(retval, converted);
-          }
-        }
-      break;
-      }
+  // Tack on a final NUL
+  charmap->putch(0, buffer, nbytes);
 
-    case FldFloat:
-      {
-      retval = static_cast<char *>(xmalloc(field->data.capacity));
-      gcc_assert(retval);
-      switch( field->data.capacity )
-        {
-        case 4:
-          value = real_value_truncate (TYPE_MODE (FLOAT), value);
-          native_encode_real (SCALAR_FLOAT_TYPE_MODE (FLOAT), &value,
-                              PTRCAST(unsigned char, retval), 4, 0);
-          break;
-        case 8:
-          value = real_value_truncate (TYPE_MODE (DOUBLE), value);
-          native_encode_real (SCALAR_FLOAT_TYPE_MODE (DOUBLE), &value,
-                              PTRCAST(unsigned char, retval), 8, 0);
-          break;
-        case 16:
-          value = real_value_truncate (TYPE_MODE (FLOAT128), value);
-          native_encode_real (SCALAR_FLOAT_TYPE_MODE (FLOAT128), &value,
-                              PTRCAST(unsigned char, retval), 16, 0);
-          break;
-        }
-      break;
-      }
-
-    case FldLiteralN:
-      {
-      // This requires annotation.
-
-      // The compiler originally used ASCII for field->data.initial. Later we
-      // expanded the field with the addition of the codeset.encoding
-      // For consistency in the parser processing, the FldLiteralN is arriving
-      // with the Object-Computer's character encoding, and field->data.initial
-      // is showing up encoded.
-
-      // But on the run-time side, if the initial string is needed, it is
-      // invariably more useful in ASCII.  Consider converting that string to
-      // a floating-point value, for example.
-
-      // So, we are going to convert the data.initial string back to ASCII
-      // here.  Later on, when we establish the run-time encoding, we will
-      // check for FldLiteralN and set that to ASCII as well. See
-      // actually_create_the_static_field().
-
-      size_t nbytes;
-      const char *converted = __gg__iconverter(field->codeset.encoding,
-                                               DEFAULT_SOURCE_ENCODING,
-                                               field->data.initial,
-                                               strlen(field->data.initial),
-                                               &nbytes);
-      retval = static_cast<char *>(xmalloc(strlen(field->data.initial)+1));
-      gcc_assert(retval);
-      strcpy(retval, converted);
-      break;
-      }
-
-    default:
-      break;
-    }
+  tree retval = build_string_literal( buffer_size,
+                                      buffer);
+  free(buffer);
   return retval;
   }
 
@@ -16839,7 +16165,7 @@ actually_create_the_static_field( cbl_field_t *new_var,
   CONSTRUCTOR_APPEND_ELT( CONSTRUCTOR_ELTS(constr),
                           next_field,
                           build_int_cst_type( SIZE_T,
-                                              new_var->data.capacity) );
+                                              new_var->data.capacity()) );
   next_field = TREE_CHAIN(next_field);
 
   //  SIZE_T,  "allocated",
@@ -16848,7 +16174,7 @@ actually_create_the_static_field( cbl_field_t *new_var,
     CONSTRUCTOR_APPEND_ELT( CONSTRUCTOR_ELTS(constr),
                             next_field,
                             build_int_cst_type( SIZE_T,
-                                                new_var->data.capacity) );
+                                                new_var->data.capacity()) );
     }
   else
     {
@@ -16861,24 +16187,9 @@ actually_create_the_static_field( cbl_field_t *new_var,
   next_field = TREE_CHAIN(next_field);
 
   //  SIZE_T,  "offset",
-
-  if( new_var->type == FldAlphanumeric &&
-      new_var->attr & intermediate_e )
-    {
-    // This is in support of FUNCTION TRIM.  That function can make the capacity
-    // of the intermediate target smaller so that TRIM("abc   ") returns
-    // "abc".  By putting the capacity here for such variables, we have a
-    // mechanism for restoring it the capacity to the original.
-    CONSTRUCTOR_APPEND_ELT( CONSTRUCTOR_ELTS(constr),
-                            next_field,
-                            build_int_cst_type(SIZE_T, new_var->data.capacity));
-    }
-  else
-    {
-    CONSTRUCTOR_APPEND_ELT( CONSTRUCTOR_ELTS(constr),
+  CONSTRUCTOR_APPEND_ELT( CONSTRUCTOR_ELTS(constr),
                             next_field,
                             build_int_cst_type(SIZE_T, new_var->offset) );
-    }
 
   next_field = TREE_CHAIN(next_field);
 
@@ -16895,7 +16206,7 @@ actually_create_the_static_field( cbl_field_t *new_var,
   next_field = TREE_CHAIN(next_field);
 
   //  CHAR_P,  "initial",
-  if( length_of_initial_string == 0 )
+  if( length_of_initial_string == 0 || !new_var->data.has_initial_value() )
     {
     CONSTRUCTOR_APPEND_ELT( CONSTRUCTOR_ELTS(constr),
                             next_field,
@@ -16960,12 +16271,25 @@ actually_create_the_static_field( cbl_field_t *new_var,
   //  INT,     "encoding",
   //  For FldLiteralN we force the encoding to be ASCII.
   //  See initial_from_initial() for an explanation.
+  //  For FldClass, we force the encoding to be UTF32; see
+
+  cbl_encoding_t encoding;
+  if( new_var->type == FldLiteralN )
+    {
+    encoding = new_var->codeset.default_encodings.source->type;
+    }
+  else if( new_var->type == FldClass )
+    {
+    encoding = DEFAULT_32_ENCODING;
+    }
+  else
+    {
+    encoding = new_var->codeset.encoding;
+    }
+
   CONSTRUCTOR_APPEND_ELT(CONSTRUCTOR_ELTS(constr),
                         next_field,
-                        build_int_cst_type(INT,
-                                           new_var->type == FldLiteralN ?
-                                                DEFAULT_SOURCE_ENCODING
-                                              : new_var->codeset.encoding));
+                        build_int_cst_type(INT, encoding));
   next_field = TREE_CHAIN(next_field);
 
   //  INT,     "alphabet",
@@ -17063,7 +16387,7 @@ psa_new_var_decl(cbl_field_t *new_var, const char *external_record_base)
     // to do a linear search of the symbol table for each symbol
 
     if(   !our_index
-          && new_var->type != FldLiteralN
+          && ! new_var->is_numeric_constant()
           && !(new_var->attr & intermediate_e))
       {
       our_index = field_index(new_var);
@@ -17164,7 +16488,6 @@ psa_new_var_decl(cbl_field_t *new_var, const char *external_record_base)
   return new_var_decl;
   }
 
-#if 1
 static void
 psa_FldLiteralA(struct cbl_field_t *field )
   {
@@ -17183,92 +16506,32 @@ psa_FldLiteralA(struct cbl_field_t *field )
     }
 
   // We are constructing a completely static constant structure.  We know the
-  // capacity.  We'll create it from the data.initial.  The cblc_field_t:data
-  // will be a copy of the .initial data. The var_decl_node will be an ordinary
-  // cblc_field_t, which means that at this point in time, a FldLiteralA can be
-  // used anywhere a FldGroup or FldAlphanumeric can be used.  We are counting
-  // on the parser not allowing a FldLiteralA to be a left-hand-side variable.
-
-  // First make room
-  static size_t buffer_size = 1024;
-  static char *buffer = static_cast<char *>(xmalloc(buffer_size));
-  if( buffer_size < field->data.capacity+1 )
-    {
-    buffer_size = field->data.capacity+1;
-    buffer = static_cast<char *>(xrealloc(buffer, buffer_size));
-    }
-  gcc_assert(buffer);
-
-  cbl_figconst_t figconst = cbl_figconst_of( field->data.initial );
-  gcc_assert(figconst == normal_value_e);
-
-  memcpy(buffer, field->data.initial, field->data.capacity);
-  buffer[field->data.capacity] = '\0';
+  // capacity.  We'll create it from the data.initial.
 
-  // We have the original nul-terminated text at data.initial.  We have a
-  // copy of it in buffer[] in the internal codeset.
+  tree converted = convert_data_initial(field);
 
   static const char name_base[] = "_literal_a_";
 
-  // We will reuse a single static structure for each string
-  static std::unordered_map<std::string, int> seen_before;
-
-  std::string field_string(buffer);
-
-#if 0
-  /*  This code is suppoed to re-use literals, and seems to work just fine in
-      x86_64-linux and on an Apple aarch64 M1 Macbook Pro.  But on an M1
-      mini, using -Os optimization, attempts were made in the generated
-      assembly language to define _literal_a_1 more than once.
-
-      I didn't know how to try to track this one down, so I decided simply to
-      punt by removing the code.
+  static int nvar = 0;
+  nvar += 1;
 
-      I am leaving the code here because of a conviction that it someday should
-      be tracked down. */
-
-  std::unordered_map<std::string, int>::const_iterator it =
-              seen_before.find(field_string);
-
-  if( it != seen_before.end() )
-    {
-    // We've seen that string before.
-    int nvar = it->second;
-    char ach[32];
-    sprintf(ach, "%s%d", name_base, nvar);
-    field->var_decl_node = gg_declare_variable(cblc_field_type_node,
-                                                  ach,
-                                                  NULL,
-                                                  vs_file_static);
-    }
-  else
-#endif
-    {
-    // We have not seen that string before
-    static int nvar = 0;
-    nvar += 1;
-    seen_before[field_string] = nvar;
-
-    char ach[32];
-    sprintf(ach, "%s%d", name_base, nvar);
-    field->var_decl_node  = gg_define_variable( cblc_field_type_node,
-                                                ach,
-                                                vs_file_static);
-    actually_create_the_static_field(
-                field,
-                build_string_literal(field->data.capacity,
-                                     buffer),
-                field->data.capacity,
-                field->data.initial,
-                NULL_TREE,
-                field->var_decl_node);
-    TREE_READONLY(field->var_decl_node) = 1;
-    TREE_USED(field->var_decl_node) = 1;
-    TREE_STATIC(field->var_decl_node) = 1;
-    DECL_PRESERVE_P (field->var_decl_node) = 1;
-    }
+  char ach[32];
+  sprintf(ach, "%s%d", name_base, nvar);
+  field->var_decl_node  = gg_define_variable( cblc_field_type_node,
+                                              ach,
+                                              vs_file_static);
+  actually_create_the_static_field(
+              field,
+              converted,
+              strlen(field->data.original())+1,
+              field->data.original(),
+              NULL_TREE,
+              field->var_decl_node);
+  TREE_READONLY(field->var_decl_node) = 1;
+  TREE_USED(field->var_decl_node) = 1;
+  TREE_STATIC(field->var_decl_node) = 1;
+  DECL_PRESERVE_P (field->var_decl_node) = 1;
   }
-#endif
 
 void
 parser_local_add(struct cbl_field_t *new_var )
@@ -17297,7 +16560,7 @@ parser_local_add(struct cbl_field_t *new_var )
   if( new_var->level == LEVEL01 || new_var->level == LEVEL77)
     {
     // We need to allocate memory on the stack for this variable
-    tree array_type = build_array_type_nelts(UCHAR, new_var->data.capacity);
+    tree array_type = build_array_type_nelts(UCHAR, new_var->data.capacity());
     tree data_decl_node = gg_define_variable( array_type,
                                                     NULL,
                                                     vs_stack);
@@ -17332,37 +16595,37 @@ parser_symbol_add(struct cbl_field_t *new_var )
   Analyze();
   SHOW_PARSE
     {
-    do
-      {
-      fprintf(stderr, "( %d ) %s:", CURRENT_LINE_NUMBER, __func__);
-      }
-    while(0);
+    char ach[1024];
+    SHOW_PARSE_HEADER
 
-    fprintf(stderr, " %2.2u %s<%s> off:" HOST_SIZE_T_PRINT_UNSIGNED " "
+    sprintf(ach, " %2.2u %s<%s> off:" HOST_SIZE_T_PRINT_UNSIGNED " "
                     "msiz:%u cap:%u dig:%u rdig:%d attr:0x" HOST_SIZE_T_PRINT_HEX_PURE " loc:%p",
             new_var->level,
             new_var->name,
             cbl_field_type_str(new_var->type),
             (fmt_size_t)new_var->offset,
             new_var->data.memsize,
-            new_var->data.capacity,
+            new_var->data.capacity(),
             new_var->data.digits,
             new_var->data.rdigits,
             (fmt_size_t)new_var->attr,
             static_cast<void*>(new_var));
+    SHOW_PARSE_TEXT(ach)
 
     if( is_table(new_var) )
       {
-      fprintf(stderr," OCCURS:" HOST_SIZE_T_PRINT_DEC,
+      sprintf(ach, " OCCURS:" HOST_SIZE_T_PRINT_DEC,
               (fmt_size_t)new_var->occurs.ntimes());
+      SHOW_PARSE_TEXT(ach)
       }
     const cbl_field_t *parent = parent_of(new_var);
     if( parent )
       {
-      fprintf(stderr,
+      sprintf(ach,
               " parent:(" HOST_SIZE_T_PRINT_DEC ")%s",
               (fmt_size_t)new_var->parent,
               parent->name);
+      SHOW_PARSE_TEXT(ach)
       }
     else
       {
@@ -17373,13 +16636,15 @@ parser_symbol_add(struct cbl_field_t *new_var )
         const symbol_elem_t *e = symbol_at(parent_index);
         if( e->type == SymFile )
           {
-          fprintf(stderr,
+          sprintf(ach,
                   " parent_file:(" HOST_SIZE_T_PRINT_DEC ")%s",
                   (fmt_size_t)new_var->parent,
                   e->elem.file.name);
+          SHOW_PARSE_TEXT(ach)
           if( e->elem.file.attr & external_e )
             {
-            fprintf(stderr, " (flagged external)");
+            sprintf(ach, " (flagged external)");
+            SHOW_PARSE_TEXT(ach)
             }
           }
         }
@@ -17387,18 +16652,44 @@ parser_symbol_add(struct cbl_field_t *new_var )
 
     if( symbol_redefines(new_var) )
       {
-      fprintf(stderr,
+      sprintf(ach,
               " redefines:(%p)%s",
               static_cast<void*>(symbol_redefines(new_var)),
               symbol_redefines(new_var)->name);
+      SHOW_PARSE_TEXT(ach)
       }
 
+    if(    new_var->type == FldGroup
+        || new_var->type == FldAlphanumeric
+        || new_var->type == FldNumericEdited
+        || new_var->type == FldAlphaEdited
+        || new_var->type == FldLiteralA
+        )
+      {
+      if( new_var->data.initial && new_var->data.capacity() )
+        {
+        SHOW_PARSE_INDENT
+        for(size_t i=0; i<new_var->data.capacity(); i++)
+          {
+          fprintf(stderr, "%2.2X ", static_cast<unsigned char>(new_var->data.initial[i]));
+          }
+        }
+      }
+    if( new_var->data.original() && strlen(new_var->data.original()) )
+      {
+      SHOW_PARSE_INDENT
+      sprintf(ach,
+              "\"%s\" (%d)",
+              new_var->data.original(),
+              static_cast<int>(strlen(new_var->data.original())));
+      SHOW_PARSE_TEXT(ach);
+      }
     SHOW_PARSE_END
     }
 
   if( new_var->level == 1  && new_var->occurs.bounds.upper )
     {
-    if( new_var->data.memsize < new_var->data.capacity * new_var->occurs.bounds.upper )
+    if( new_var->data.memsize < new_var->data.capacity() * new_var->occurs.bounds.upper )
       {
       cbl_internal_error("LEVEL 01 (%s) OCCURS "
                          "has insufficient data.memsize", new_var->name);
@@ -17419,9 +16710,9 @@ parser_symbol_add(struct cbl_field_t *new_var )
           {
           gg_free(member(new_var, "data"));
           gg_assign(member(new_var, "data"),
-                    gg_cast(UCHAR_P, gg_malloc(new_var->data.capacity)));
+                    gg_cast(UCHAR_P, gg_malloc(new_var->data.capacity())));
           gg_assign(member(new_var, "allocated"),
-                    build_int_cst_type(SIZE_T, new_var->data.capacity));
+                    build_int_cst_type(SIZE_T, new_var->data.capacity()));
           }
         ELSE
           {
@@ -17455,7 +16746,7 @@ parser_symbol_add(struct cbl_field_t *new_var )
       }
 
     size_t length_of_initial_string = 0;
-    char *new_initial = NULL;
+    const char *new_initial = NULL;
 
     //  Make sure we have a new variable to work with.
     if( !new_var )
@@ -17487,7 +16778,7 @@ parser_symbol_add(struct cbl_field_t *new_var )
       TRACE1_END
       }
 
-    if( is_table(new_var) && new_var->data.capacity == 0)
+    if( is_table(new_var) && new_var->data.capacity() == 0)
       {
       cbl_internal_error(
           "%s: %d %s is a table, but it improperly has a capacity of zero",
@@ -17546,7 +16837,7 @@ parser_symbol_add(struct cbl_field_t *new_var )
                           "%<var_decl_node%>", __func__, new_var->name);
       }
 
-    switch( new_var->type )
+    switch( new_var->type ) // Trap_here for ordinary variables.
       {
       static int counter=1;
       char ach[2*sizeof(cbl_name_t)];
@@ -17566,7 +16857,7 @@ parser_symbol_add(struct cbl_field_t *new_var )
     if(    new_var->type == FldNumericBinary
         || new_var->type == FldNumericBin5 )
       {
-      switch( new_var->data.capacity )
+      switch( new_var->data.capacity() )
         {
         case 1:
         case 2:
@@ -17579,7 +16870,7 @@ parser_symbol_add(struct cbl_field_t *new_var )
                   "%s is type %s and has capacity %u\n",
                   new_var->name,
                   cbl_field_type_str(new_var->type),
-                  new_var->data.capacity);
+                  new_var->data.capacity());
           gcc_unreachable();
           break;
         }
@@ -17587,9 +16878,33 @@ parser_symbol_add(struct cbl_field_t *new_var )
 
     size_t level_88_string_size = 0;
     char *level_88_string = NULL;
+    char *class_string = NULL;
     if( ancestor )
       {
-      level_88_string = get_level_88_domain(ancestor->data.capacity, new_var, level_88_string_size);
+      level_88_string = get_level_88_domain(ancestor->data.capacity(),
+                                            new_var,
+                                            level_88_string_size);
+      if( level_88_string )
+        {
+        // At this point, the string is in source_code encoding, no matter what
+        // the variable's encoding might be.  In the run-time, we will be doing
+        // any comparisons of text strings using UTF32 (because that's how we
+        // handle somebody specifying a UTF-8 exec-charset.)  Rather than
+        // convert this string at run-time, we convert it here:
+        size_t converted_length;
+        const char *converted = __gg__iconverter(
+                                 new_var->codeset.default_encodings.source->type,
+                                 DEFAULT_32_ENCODING,
+                                 level_88_string,
+                                 level_88_string_size,  // Convert the NUL
+                                 &converted_length);
+        level_88_string_size = converted_length;
+        level_88_string = static_cast<char *>(xrealloc(level_88_string,
+                                                       level_88_string_size));
+        memcpy(level_88_string, converted, level_88_string_size);
+        // level_88_string is now a UTF32 string with a terminating four-byte
+        // NUL.
+        }
       }
 
     if( !new_var->data.picture )
@@ -17609,30 +16924,27 @@ parser_symbol_add(struct cbl_field_t *new_var )
 
     if( new_var->type == FldClass && new_var->level != 88 )
       {
-      new_var->data.initial = get_class_condition_string(new_var);
+      class_string = get_class_condition_string(new_var);
+      length_of_initial_string = strlen(class_string)+1;
+      new_initial = class_string;
       }
-
-    if( new_var->type == FldLiteralA )
+    else if( new_var->type == FldLiteralA )
       {
-      length_of_initial_string = new_var->data.capacity;
+      length_of_initial_string = new_var->data.capacity();
       }
-    else if( new_var->data.initial && new_var->data.initial[0] != '\0' )
+    else if( new_var->data.original() && new_var->data.original()[0] != '\0' )
       {
       if( new_var->type == FldClass )
         {
-        length_of_initial_string = strlen(new_var->data.initial)+1;
+        length_of_initial_string = strlen(new_var->data.original())+1;
         }
       else if( new_var->type == FldNumericDisplay )
         {
-        length_of_initial_string = strlen(new_var->data.initial)+1;
+        length_of_initial_string = strlen(new_var->data.original())+1;
         }
       else
         {
-        // This is an ordinary string
-        // fprintf(stderr, ">>>>>>> parser_symbol_add %s %s \n", cbl_field_type_str(new_var->type), new_var->name);
-        // fprintf(stderr, "        %d %d\n", (int)strlen(new_var->data.initial), (int)new_var->data.capacity);
-        //length_of_initial_string = strlen(new_var->data.initial) + 1;
-        length_of_initial_string = new_var->data.capacity + 1;
+        length_of_initial_string = new_var->data.capacity() + 1;
         }
       }
     else
@@ -17650,11 +16962,14 @@ parser_symbol_add(struct cbl_field_t *new_var )
           && new_var->type != FldLiteralN
           && !(new_var->attr & intermediate_e))
       {
-      // During the early stages of implementing cbl_field_t::our_index, there
-      // were execution paths in parse.y and parser.cc that resulted in
-      // our_index not being set.  Those should be gone.
-      fprintf(stderr, "our_index is NULL under unanticipated circumstances");
-      gcc_assert(false);
+      if( ! (new_var->type == FldFloat && new_var->has_attr(constant_e)) )
+        {
+        // N.B. If level is 0 then we're not participating in a hierarchy.
+        // During the early stages of implementing cbl_field_t::our_index, there
+        // were execution paths in parse.y and parser.cc that resulted in
+        // our_index not being set.  Those should be gone.
+        cbl_errx("%<our_index%> is NULL under unanticipated circumstances");
+        }
       }
 
     // When we create the cblc_field_t structure, we need a data pointer
@@ -17727,11 +17042,11 @@ parser_symbol_add(struct cbl_field_t *new_var )
      * As of Tue Apr  4 10:29:35 2023, we support 01 CONSTANT numeric values as follows:
      * 1.  FldNumericBin5
      * 2.  always constant_e, also potentially global_e
-     * 3.  compile-time value in cbl_field_data_t::value
+     * 3.  compile-time value in cbl_field_data_t::valuer
      * 4.  cbl_field_data_t::capacity is 0 because it requires no working storage
      */
 
-    if( new_var->data.capacity == 0
+    if( new_var->data.capacity() == 0
         && new_var->level != 88
         && new_var->type  != FldClass
         && new_var->type  != FldLiteralN
@@ -17748,7 +17063,8 @@ parser_symbol_add(struct cbl_field_t *new_var )
 
     if( level_88_string )
       {
-      new_var->data.initial = level_88_string;
+      new_var->data.original(level_88_string);
+      new_initial = level_88_string;
       length_of_initial_string = level_88_string_size;
       }
 
@@ -17758,7 +17074,7 @@ parser_symbol_add(struct cbl_field_t *new_var )
       {
       char achDataName[256];
       sprintf(achDataName, "__%s_vardata", external_record_base);
-      tree array_type = build_array_type_nelts(UCHAR, new_var->data.capacity);
+      tree array_type = build_array_type_nelts(UCHAR, new_var->data.capacity());
       new_var->data_decl_node = gg_define_variable(
                           array_type,
                           achDataName,
@@ -17789,9 +17105,9 @@ parser_symbol_add(struct cbl_field_t *new_var )
         {
         // Create a static array of UCHAR, and make that the data_decl_node
         // size_t bytes_to_allocate = new_var->data.memsize ?
-                                // new_var->data.memsize : new_var->data.capacity;
+                                // new_var->data.memsize : new_var->data.capacity();
         size_t bytes_to_allocate = std::max(new_var->data.memsize,
-                                            new_var->data.capacity);
+                                            new_var->data.capacity());
 
         // A FldClass actually doesn't need any bytes, because the only important
         // thing about it is the .initial field.  We will allocate a single byte,
@@ -17876,34 +17192,12 @@ parser_symbol_add(struct cbl_field_t *new_var )
         }
       }
 
-    if( new_var->data.initial )
+    // At this point, new_initial might have been set by
+    // get_class_condition_string.  If not, we set it another way:
+    if( !level_88_string && !class_string)
       {
-      new_initial = initial_from_initial(new_var);
-      }
-    if( new_initial )
-      {
-      switch(new_var->type)
-        {
-        case FldGroup:
-        case FldAlphanumeric:
-        case FldLiteralA:
-          length_of_initial_string = new_var->data.capacity+1;
-          break;
-
-        case FldLiteralN:
-          length_of_initial_string = strlen(new_initial)+1;
-          break;
-
-        default:
-          length_of_initial_string = new_var->data.capacity;
-          break;
-        }
-      }
-    else
-      {
-      new_initial = static_cast<char *>(xmalloc(length_of_initial_string));
-      gcc_assert(new_initial);
-      memcpy(new_initial, new_var->data.initial, length_of_initial_string);
+      new_initial = const_cast<char *>(new_var->data.initial);
+      length_of_initial_string = new_var->data.capacity();
       }
 
     actual_allocate:
@@ -17913,12 +17207,8 @@ parser_symbol_add(struct cbl_field_t *new_var )
                                       new_initial,
                                       immediate_parent,
                                       new_var_decl);
-    free(new_initial);
-
-    if( level_88_string )
-      {
-      free(level_88_string);
-      }
+    free(level_88_string);
+    free(class_string);
 
     if(  !(new_var->attr & ( linkage_e | based_e)) )
       {
index 802bba7594468ac18606ea0b4307d67dae30d4b3..162cb572ed732061126b32f4b15d2f7de79a7242 100644 (file)
@@ -469,6 +469,20 @@ parser_release( cbl_field_t *record_area );
 void
 parser_exception_file( cbl_field_t *tgt, cbl_file_t* file = NULL );
 
+void
+parser_intrinsic_convert(cbl_field_t *tgt,
+                         const cbl_refer_t& input,                           
+                         convert_type_t src_fmt, 
+                         unsigned int   dst_fmt );
+
+void
+parser_intrinsic_find_string(cbl_field_t *tgt,
+                             const cbl_refer_t& haystack,
+                             const cbl_refer_t& needle,
+                             const cbl_refer_t *after,
+                             bool last,
+                             bool anycase);
+
 void
 parser_module_name( cbl_field_t *tgt, module_type_t type );
 
index 7d6ae8c93d48538d5b262c90949b6473bba659f7..535a7740c093cab58824bfd0438017c4d4247711 100644 (file)
@@ -173,13 +173,8 @@ arithmetic_operation(size_t nC, cbl_num_result_t *C,
     temp_field.type = remainder->field->type;
     temp_field.attr = (remainder->field->attr | intermediate_e) & ~initialized_e;
     temp_field.level = 1;
-    temp_field.data.memsize   = remainder->field->data.memsize ;
-    temp_field.data.capacity  = remainder->field->data.capacity;
-    temp_field.data.digits    = remainder->field->data.digits  ;
-    temp_field.data.rdigits   = remainder->field->data.rdigits ;
-    temp_field.data.initial   = remainder->field->data.initial ;
-    temp_field.data.picture   = remainder->field->data.picture ;
-    temp_field.codeset        = remainder->field->codeset ;
+    temp_field.data = remainder->field->data;
+    temp_field.codeset = remainder->field->codeset ;
     parser_symbol_add(&temp_field);
     temp_remainder.field = &temp_field;
 
@@ -373,7 +368,7 @@ largest_binary_term(size_t nA, cbl_refer_t *A)
       {
       // This is an integer type that can be worked with quickly
       is_negative |= ( A[i].field->attr & signable_e );
-      max_capacity = std::max(max_capacity, A[i].field->data.capacity);
+      max_capacity = std::max(max_capacity, A[i].field->data.capacity());
       retval = tree_type_from_size(max_capacity, is_negative);
       }
     else
@@ -425,7 +420,7 @@ fast_add( size_t nC, cbl_num_result_t *C,
       // We now either accumulate into C[n] or assign to C[n]:
       for(size_t i=0; i<nC; i++ )
         {
-        tree dest_type = tree_type_from_size(C[i].refer.field->data.capacity, 0);
+        tree dest_type = tree_type_from_size(C[i].refer.field->data.capacity(), 0);
         tree dest_addr = gg_add(member(C[i].refer.field->var_decl_node, "data"),
                                 refer_offset(C[i].refer));
         tree ptr = gg_cast(build_pointer_type(dest_type), dest_addr);
@@ -511,7 +506,7 @@ fast_subtract(size_t nC, cbl_num_result_t *C,
       // We now either accumulate into C[n] or assign to C[n]:
       for(size_t i=0; i<nC; i++ )
         {
-        tree dest_type = tree_type_from_size(C[i].refer.field->data.capacity, 0);
+        tree dest_type = tree_type_from_size(C[i].refer.field->data.capacity(), 0);
         tree dest_addr = gg_add(member(C[i].refer.field->var_decl_node, "data"),
                                 refer_offset(C[i].refer));
         tree ptr = gg_cast(build_pointer_type(dest_type), dest_addr);
@@ -583,7 +578,7 @@ fast_multiply(size_t nC, cbl_num_result_t *C,
       // We now either multiply into C[n] or assign A * B to C[n]:
       for(size_t i=0; i<nC; i++ )
         {
-        tree dest_type = tree_type_from_size(C[i].refer.field->data.capacity, 0);
+        tree dest_type = tree_type_from_size(C[i].refer.field->data.capacity(), 0);
         tree dest_addr = gg_add(member(C[i].refer.field->var_decl_node, "data"),
                                 refer_offset(C[i].refer));
         tree ptr = gg_cast(build_pointer_type(dest_type), dest_addr);
@@ -663,7 +658,7 @@ fast_divide(size_t nC, cbl_num_result_t *C,
       for(size_t i=0; i<nC; i++ )
         {
         tree dest_type =
-                       tree_type_from_size(C[i].refer.field->data.capacity, 0);
+                       tree_type_from_size(C[i].refer.field->data.capacity(), 0);
         tree dest_addr = gg_add(member( C[i].refer.field->var_decl_node,
                                         "data"),
                                 refer_offset(C[i].refer));
@@ -690,7 +685,7 @@ fast_divide(size_t nC, cbl_num_result_t *C,
           {
           dest_addr = gg_add( member(remainder.field->var_decl_node, "data"),
                               refer_offset(remainder));
-          dest_type = tree_type_from_size(remainder.field->data.capacity, 0);
+          dest_type = tree_type_from_size(remainder.field->data.capacity(), 0);
           ptr = gg_cast(build_pointer_type(dest_type), dest_addr);
 
           gg_assign(gg_indirect(ptr),
@@ -1536,7 +1531,7 @@ parser_subtract(size_t nC, cbl_num_result_t *C, // C = B - A
                                   error,
                                   not_error,
                                   compute_error,
-                                  "__gg__fixed_phase2_assign_to_c");
+                                  "__gg__float_phase2_assign_to_c");
             }
           arithmetic_error_handler( error,
                                     not_error,
index 63f37f68806b6f600f4889200f3f65e82c7fb25b..b384cd14939b8936f7a895d025d9d4ea10b7762b 100644 (file)
@@ -295,7 +295,7 @@ get_any_capacity(cbl_field_t *field)
     }
   else
     {
-    return build_int_cst_type(SIZE_T, field->data.capacity);
+    return build_int_cst_type(SIZE_T, field->data.capacity());
     }
   }
 
@@ -316,6 +316,11 @@ get_and_check_refstart_and_reflen(  tree         refstart,// LONG returned value
   const cbl_enabled_exceptions_t&
                                 enabled_exceptions( cdf_enabled_exceptions() );
 
+  // These calculations are based on position within the field, so offset and
+  // length have to be multiplied by the stride of the encoding:
+  const charmap_t *charmap = __gg__get_charmap(refer.field->codeset.encoding);
+  tree stride = build_int_cst_type(LONG, charmap->stride());
+
   if( !enabled_exceptions.match(ec_bound_ref_mod_e) )
     {
     // This is normal operation -- no exception checking.  Thus, we won't
@@ -327,6 +332,7 @@ get_and_check_refstart_and_reflen(  tree         refstart,// LONG returned value
                       refer.refmod.from->field,
                       refer_offset(*refer.refmod.from));
     gg_decrement(refstart);
+    gg_assign(refstart, gg_multiply(refstart, stride));
 
     if( refer.refmod.len )
       {
@@ -334,12 +340,16 @@ get_and_check_refstart_and_reflen(  tree         refstart,// LONG returned value
       get_integer_value(reflen,
                         refer.refmod.len->field,
                         refer_offset(*refer.refmod.len));
+      // Modify refer.length by stride:
+      gg_assign(reflen, gg_multiply(reflen, stride));
       }
     else
       {
       // The length was not specified, so we need to return the distance
       // between refmod.from and the end of the field:
-      gg_assign(reflen, gg_subtract( get_any_capacity(refer.field), refstart) );
+      gg_assign(reflen,
+                gg_subtract( get_any_capacity(refer.field),
+                                     refstart) );
       }
     return;
     }
@@ -366,6 +376,7 @@ get_and_check_refstart_and_reflen(  tree         refstart,// LONG returned value
 
   // Make refstart zero-based:
   gg_decrement(refstart);
+  gg_assign(refstart, gg_multiply(refstart, stride));
 
   IF( refstart, lt_op, build_int_cst_type(LONG, 0 ) )
     {
@@ -374,16 +385,18 @@ get_and_check_refstart_and_reflen(  tree         refstart,// LONG returned value
     gg_assign(refstart, gg_cast(LONG, integer_zero_node));
     // Set reflen to one here, because otherwise it won't be established.
     gg_assign(reflen, gg_cast(TREE_TYPE(reflen), integer_one_node));
+    gg_assign(reflen, gg_multiply(reflen, stride));
     }
   ELSE
     {
     IF( refstart, gt_op, gg_cast(TREE_TYPE(refstart), get_any_capacity(refer.field)) )
       {
-      // refstart greater than zero is an error condition:
+      // refstart greater than capacity is an error condition:
       set_exception_code(ec_bound_ref_mod_e);
       gg_assign(refstart, gg_cast(LONG, integer_zero_node));
       // Set reflen to one here, because otherwise it won't be established.
       gg_assign(reflen, gg_cast(TREE_TYPE(reflen), integer_one_node));
+      gg_assign(reflen, gg_multiply(reflen, stride));
       }
     ELSE
       {
@@ -393,6 +406,7 @@ get_and_check_refstart_and_reflen(  tree         refstart,// LONG returned value
                           refer.refmod.len->field,
                           refer_offset(*refer.refmod.len),
                           CHECK_FOR_FRACTIONAL_DIGITS);
+        gg_assign(reflen, gg_multiply(reflen, stride));
         IF( var_decl_rdigits,
             ne_op,
             integer_zero_node )
@@ -400,6 +414,7 @@ get_and_check_refstart_and_reflen(  tree         refstart,// LONG returned value
           // length is not an integer, which is an error condition
           set_exception_code(ec_bound_ref_mod_e);
           gg_assign(reflen, gg_cast(LONG, integer_one_node));
+          gg_assign(reflen, gg_multiply(reflen, stride));
           gg_assign(var_decl_rdigits, integer_zero_node);
           }
         ELSE
@@ -410,6 +425,7 @@ get_and_check_refstart_and_reflen(  tree         refstart,// LONG returned value
             // length is too small, which is an error condition.
             set_exception_code(ec_bound_ref_mod_e);
             gg_assign(reflen, gg_cast(LONG, integer_one_node));
+            gg_assign(reflen, gg_multiply(reflen, stride));
             }
           ELSE
             {
@@ -431,6 +447,7 @@ get_and_check_refstart_and_reflen(  tree         refstart,// LONG returned value
               // as the TODO item.
               gg_assign(refstart, gg_cast(LONG, integer_zero_node));
               gg_assign(reflen, gg_cast(LONG, integer_one_node));
+              gg_assign(reflen, gg_multiply(reflen, stride));
               }
             ELSE
               {
@@ -442,6 +459,8 @@ get_and_check_refstart_and_reflen(  tree         refstart,// LONG returned value
             ENDIF
           }
           ENDIF
+        // Modify the length calculation for stride:
+        //gg_assign(reflen, gg_multiply(reflen, stride));
         }
       else
         {
@@ -578,7 +597,7 @@ get_data_offset(const cbl_refer_t &refer,
 
     // We need to start with the rightmost subscript, and work our way up through
     // our parents.  As we find each parent with an OCCURS, we increment qual_data
-    // by (subscript-1)*An->data.capacity
+    // by (subscript-1)*An->data.capacity()
 
     // Establish the field_t pointer for walking up through our ancestors:
     cbl_field_t *parent = refer.field;
@@ -788,6 +807,9 @@ get_binary_value( tree value,
     case FldNumericDisplay:
       {
       Analyzer.Message("FldNumericDisplay");
+      const charmap_t *charmap = __gg__get_charmap(field->codeset.encoding);
+      int stride = charmap->stride();
+
       // Establish the source
       tree source_address = get_data_address(field, field_offset);
 
@@ -859,8 +881,8 @@ get_binary_value( tree value,
                 // The final byte is '+' or '-'
                 gg_assign(signp,
                           gg_add(source_address,
-                                build_int_cst_type( SIZE_T,
-                                                    field->data.digits)));
+                                build_int_cst_type(SIZE_T,
+                                                  field->data.digits*stride)));
                 }
               }
             else
@@ -877,7 +899,7 @@ get_binary_value( tree value,
                 gg_assign(signp,
                           gg_add(source_address,
                                 build_int_cst_type( SIZE_T,
-                                                    field->data.digits-1)));
+                                              (field->data.digits-1)*stride)));
                 }
               }
             }
@@ -913,7 +935,7 @@ get_binary_value( tree value,
       tree source = get_data_address(field, field_offset);
 
       size_t dest_nbytes   = gg_sizeof(value);
-      size_t source_nbytes = field->data.capacity;
+      size_t source_nbytes = field->data.capacity();
 
       if( debugging )
         {
@@ -1014,7 +1036,7 @@ get_binary_value( tree value,
         }
       tree source_address = get_data_address(field, field_offset);
       tree dest_type = TREE_TYPE(value);
-      tree source_type = tree_type_from_size( field->data.capacity,
+      tree source_type = tree_type_from_size( field->data.capacity(),
                                               field->attr & signable_e);
       if( debugging && rdigits)
         {
@@ -1045,7 +1067,7 @@ get_binary_value( tree value,
                                     get_data_address( field,
                                                       field_offset),
                                     build_int_cst_type(INT,
-                                                      field->data.capacity),
+                                                      field->data.capacity()),
                                     NULL_TREE)));
       break;
       }
@@ -1096,7 +1118,7 @@ static tree
 tree_type_from_field(const cbl_field_t *field)
   {
   gcc_assert(field);
-  return tree_type_from_size(field->data.capacity, field->attr & signable_e);
+  return tree_type_from_size(field->data.capacity(), field->attr & signable_e);
   }
 
 tree
@@ -1566,7 +1588,7 @@ copy_little_endian_into_place(cbl_field_t *dest,
     }
   scale_by_power_of_ten_N(value, dest->data.rdigits - rhs_rdigits);
 
-  tree dest_type = tree_type_from_size( dest->data.capacity,
+  tree dest_type = tree_type_from_size( dest->data.capacity(),
                                         dest->attr & signable_e);
   tree dest_pointer = gg_add(member(dest->var_decl_node, "data"),
                              dest_offset);
@@ -1740,23 +1762,23 @@ char *
 get_literal_string(cbl_field_t *field)
   {
   assert(field->type == FldLiteralA);
-  size_t buffer_length = field->data.capacity+1;
+  size_t buffer_length = field->data.capacity()+1;
   char *buffer = static_cast<char *>(xcalloc(1, buffer_length));
 
   size_t charsout;
   const char *converted = __gg__iconverter(DEFAULT_SOURCE_ENCODING,
                                      field->codeset.encoding,
-                                     field->data.initial,
-                                     field->data.capacity,
+                                     field->data.original(),
+                                     field->data.capacity(),
                                      &charsout);
-  memcpy(buffer, converted, field->data.capacity+1);
+  memcpy(buffer, converted, field->data.capacity()+1);
   return buffer;
   }
 
 bool
 refer_is_clean(const cbl_refer_t &refer)
   {
-  if( !refer.field || refer.field->type == FldLiteralN )
+    if( !refer.field || refer.field->is_numeric_constant() )
     {
     // It is routine for a refer to have no field.  It happens when the parser
     // passes us a refer for an optional parameter that has been omitted, for
@@ -1766,7 +1788,7 @@ refer_is_clean(const cbl_refer_t &refer)
     // like.
     return true;
     }
-
+    
   return     !refer.all
           && !refer.addr_of
           && !refer.nsubscript()
@@ -1818,7 +1840,8 @@ refer_fill_depends(const cbl_refer_t &refer)
   // We multiply the ODO value by the size of the data capacity to get the
   // shortened length:
 
-  tree mult_expr = gg_multiply( build_int_cst_type(TREE_TYPE(value64), odo->data.capacity),
+  tree mult_expr = gg_multiply( build_int_cst_type(TREE_TYPE(value64),
+                                                   odo->data.capacity()),
                                 value64 );
 
   // And we add that to the distance from the requested variable to the odo
@@ -1918,8 +1941,6 @@ refer_size_source(const cbl_refer_t &refer)
 
       */
 
-  // This test has to be here, otherwise there are failures in regression
-  // testing.
   if( !refer.field )
     {
     return size_t_zero_node;
index 0032b631ac7ce1a67d2560ef7e18227bdb53adf9..cd27cdf68cd49d83e1146163e037b7a63d7c4e72 100644 (file)
         "%{D*} %{E} %{I*} %{M} %{fmax-errors*} %{fsyntax-only*} "
         "%{fcobol-exceptions*} "
         "%{copyext} "
+        "%{fexec-charset*} "
+        "%{fexec-national-charset*} "
         "%{fstatic-call} %{fdefaultbyte} "
         "%{ffixed-form} %{ffree-form} %{indicator-column*} "
         "%{preprocess} "
         "%{dialect} "
         "%{include} "
        "%{Wno-apply-commit} "
+       "%{Wno-any-length} "
        "%{Wno-file-code-set} "
        "%{Wno-high-order-bit} "
        "%{Wno-bad-line-directive} "
        "%{Wno-bad-numeric} "
        "%{Wno-binary-long-long} "
        "%{Wno-call-giving} "
+       "%{Wno-call-literal} "
        "%{Wno-cdf-dollar} "
        "%{Wno-cdf-invalid-parameter} "
        "%{Wno-cdf-name-not-found} "
@@ -71,6 +75,7 @@
        "%{Wno-literal-concat} "
        "%{Wno-locale-error} "
        "%{Wno-move-corresponding} "
+       "%{Wno-move-index} "
        "%{Wno-move-pointer} "
        "%{Wno-nllanginfo-error} "
        "%{Wno-operator-space} "
index 906ee9afc2f401cadbd473a42576b754f186be9d..e04ba86dad99c8a6150d2a5494b2a73218864cc5 100644 (file)
@@ -73,6 +73,14 @@ copyext
 Cobol Joined Separate Var(cobol_copyext) Init(0)
 Define alternative implicit copybook filename extension.
 
+fexec-charset=
+Cobol Joined Var(cobol_charset) RejectNegative
+; Documented in c.opt
+
+fexec-national-charset=
+Cobol Joined Var(cobol_national_charset) RejectNegative
+Set the default execution character set for NATIONAL data items
+
 ;; warnings
 
 ; Par78CdfDefinedW
@@ -90,6 +98,11 @@ Wcall-giving
 Cobol Warning Var(call_giving, 1) Init(1)
 Warn if CALL ... GIVING is used.
 
+; MfCallLiteral
+Wcall-literal
+Cobol Warning Var(call_literal, 1) Init(1)
+Warn if CALL is used is used with a literal parameter by reference.
+
 ; MfCdfDollar
 Wcdf-dollar
 Cobol Warning Var(cdf_dollar, 1) Init(1)
@@ -120,6 +133,16 @@ Wlevel-78
 Cobol Warning Var(level_78, 1) Init(1)
 Warn if Level 78 is used.
 
+; MfAnyLengthContained
+Wany-length
+Cobol Warning Var(cobol_any_length, 1) Init(1)
+Warn if ANY LENGTH is used in outermost program.
+
+; MfMoveIndex
+Wmove-index
+Cobol Warning Var(move_index, 1) Init(1)
+Warn if MOVE INDEX is used
+
 ; MfMovePointer
 Wmove-pointer
 Cobol Warning Var(move_pointer, 1) Init(1)
@@ -336,10 +359,6 @@ findicator-column
 Cobol RejectNegative Joined Separate UInteger Var(indicator_column) Init(0) IntegerRange(0, 8)
 -findicator-column=<n> Column after which Region A begins.
 
-finternal-ebcdic
-Cobol Var(cobol_ebcdic, 1) Init(0)
--finternal-ebcdic      Internal processing is in EBCDIC Code Page 1140.
-
 fstatic-call
 Cobol Var(cobol_static_call, 1) Init(1)
 Enable/disable static linkage for CALL literals.
index e103303227aeeb6359474e2bb31ee6280e566528..3911dd0fc3e0ba77921fc70e40fcbe2d045c4d3e 100644 (file)
@@ -991,9 +991,9 @@ parse_copy_directive( filespan_t& mfile ) {
       if( yy_flex_debug ) {
         size_t nnl = 1 + count_newlines(mfile.data, copy_stmt.p);
         size_t nst = 1 + count_newlines(copy_stmt.p, copy_stmt.pend);
-        dbgmsg("%s:%d: line " HOST_SIZE_T_PRINT_UNSIGNED
+        dbgmsg("%s:%d: %s:" HOST_SIZE_T_PRINT_UNSIGNED
                ": COPY directive is " HOST_SIZE_T_PRINT_UNSIGNED " lines '%.*s'",
-               __func__, __LINE__,
+               __func__, __LINE__, cobol_filename(), 
                (fmt_size_t)nnl, (fmt_size_t)nst, copy_stmt.size(), copy_stmt.p);
       }
     }
index 423b53a8c34d53e262acba10711b1d1fa3ba7e2e..3f9b7735bf01420c9356520e1fb94aead7354108 100644 (file)
@@ -138,11 +138,14 @@ std::set<cbl_diag_t> cbl_diagnostics {
 
   { MfBinaryLongLong, "-Wbinary-long-long", diagnostics::kind::error, dialect_mf_gnu },
   { MfCallGiving, "-Wcall-giving", diagnostics::kind::error, dialect_mf_gnu },
+  { MfCallLiteral, "-Wcall-literal", diagnostics::kind::error, dialect_mf_e },
   { MfCdfDollar, "-Wcdf-dollar", diagnostics::kind::error, dialect_mf_gnu },
   { MfComp6, "-Wcomp-6", diagnostics::kind::error, dialect_mf_gnu },
   { MfCompX, "-Wcomp-x", diagnostics::kind::error, dialect_mf_gnu },
-  { MfLevel_1_Occurs, "Wlevel-1-occurs", diagnostics::kind::error, dialect_mf_gnu },
+  { MfLevel_1_Occurs, "-Wlevel-1-occurs", diagnostics::kind::error, dialect_mf_gnu },
   { MfLevel78, "-Wlevel-78", diagnostics::kind::error, dialect_mf_gnu },
+  { MfAnyLength, "-Wany-length", diagnostics::kind::error, dialect_mf_gnu },
+  { MfMoveIndex, "-Wmove-index", diagnostics::kind::error, dialect_gnu_e },
   { MfMovePointer, "-Wmove-pointer", diagnostics::kind::error, dialect_mf_gnu },
   { MfReturningNum, "-Wreturning-number", diagnostics::kind::error, dialect_mf_gnu },
   { MfUsageTypename, "-Wusage-typename", diagnostics::kind::error, dialect_mf_gnu },
@@ -221,17 +224,6 @@ cbl_diagnostic_kind( cbl_diag_id_t id, diagnostics::kind kind ) {
   return false;
 }
 
-bool
-cbl_diagnostic_kind( cbl_dialect_t dialect, diagnostics::kind kind ) {
-  bool ok = true;
-  for( auto diag : cbl_diagnostics ) {
-    if( diag.dialect == dialect ) {
-      if( ! cbl_diagnostic_kind(diag.id, kind) ) ok = false;
-    }
-  }
-  return ok;
-}
-
 void
 cobol_warning( cbl_diag_id_t id, int yn, bool warning_as_error ) {
   gcc_assert( 0 <= yn && yn <= 1 );
@@ -246,6 +238,33 @@ cobol_warning( cbl_diag_id_t id, int yn, bool warning_as_error ) {
   cbl_diagnostic_kind(id, kind);
 }
 
+/*
+ * Set diagnostics associated with a dialog to be ignored, because the
+ * constructs are valid for that dialog.  We cannot use cbl_diagnostic_kind()
+ * for this purpose because it modified the std::set that we're iterating over.
+ */
+void
+cobol_warning_suppress( cbl_dialect_t dialect ) {
+  std::set<cbl_diag_t> modified;
+
+  for( auto diag : cbl_diagnostics ) {
+    if( diag.dialect & dialect ) {
+      switch(diag.id) {
+      case IbmSectionNegE:
+      case IbmSectionRangeE:
+      case IbmSectionSegmentW:
+        break; // do not suppress
+      default:
+        diag.kind = diagnostics::kind::ignored;
+        break;
+      }
+    }
+    modified.insert(diag);
+  }
+  cbl_diagnostics.clear();
+  cbl_diagnostics.insert(modified.begin(), modified.end());
+}
+
 static inline const char *
 option_of( cbl_diag_id_t id ) {
   auto diag = cbl_diagnostics.find(cbl_diag_t(id));
index ad292b9b03ab83e15a290547d720783428b12d2e..bcd8d6f3105036e9724c271e935d2c67fc4d19a5 100644 (file)
@@ -132,6 +132,23 @@ class locale_tgt_t {
       }
       return true;
     }
+    cbl_encoding_t encode_as() const {
+      switch(prefix[0]) {
+      case '\0':
+      case 'X': 
+      case 'Z': 
+        return current_encoding('A');
+      case 'N': 
+        return current_encoding('N');
+      default:
+        dbgmsg("no such prefix '%s'", prefix);
+        if( prefix[0] != ftoupper(prefix[0]) ) {
+          gcc_unreachable();
+        }
+        break;
+      }
+      gcc_unreachable();
+    }
   };
 
   struct acrc_t { // Abbreviated combined relation condition
@@ -238,7 +255,7 @@ class locale_tgt_t {
 
   struct cbl_field_t;
   static inline cbl_field_t *
-  new_literal( const char initial[], enum radix_t radix );
+  new_literal( const cbl_loc_t loc, const char initial[], enum radix_t radix );
 #pragma GCC diagnostic pop
 
   enum select_clause_t {
@@ -716,7 +733,7 @@ class locale_tgt_t {
 
 %type   <field_data>    value78
 %type   <field>         literal name nume typename
-%type   <field>         num_literal signed_literal
+%type   <field>         num_constant num_literal signed_literal
 
 %type  <number>        perform_start
 %type   <refer>         perform_times
@@ -750,7 +767,7 @@ class locale_tgt_t {
 %type   <refer>         advancing  advance_by
 %type   <refer>         alphaval alpha_val numeref scalar scalar88
 %type   <refer>         tableref tableish
-%type   <refer>         varg varg1 varg1a
+%type   <refer>         varg varg1 varg1a start_after start_pos
 %type   <refer>         expr expr_term compute_expr free_tgt by_value_arg
 %type   <refer>         move_tgt selected_name read_key read_into vary_by
 %type   <refer>         accept_refer num_operand envar search_expr any_arg
@@ -791,7 +808,7 @@ class locale_tgt_t {
 %type   <field>         intrinsic0
 %type   <number>        intrinsic_v intrinsic_I intrinsic_N intrinsic_X
 %type   <number>        intrinsic_I2 intrinsic_N2 intrinsic_X2
-%type   <number>        lopper_case
+%type   <number>        lopper_case 
 %type   <number>        return_body return_file
 %type   <field>         trim_trailing function_udf
 
@@ -835,16 +852,18 @@ class locale_tgt_t {
 %type   <boolean>       io_invalid  read_eof  write_eop
                         global is_global anycase backward
                         end_display
-                        exh_changed exh_named
+                        exh_changed exh_named last
                         override
 %type   <number>        mistake globally first_last
-%type   <io_mode>   io_mode
+%type   <io_mode>       io_mode
 
 %type   <label_pair>    xmlprocs
 %type   <error>         xmlexcept xmlexcepts
 %type   <field>         xmlencoding xmlvalidating
+%type   <field>         xmlgen_count
 %type   <number>        xmlreturning
 %type   <label>         xmlparse_body
+%type   <xml_decl_attr> xmlgen_decl
 
 %type   <labels>        labels
 %type   <label>         label_1 section_name
@@ -863,7 +882,7 @@ class locale_tgt_t {
 %type   <replacement>   init_by
 %type   <replacements>  init_bys init_replace
 %type   <refer>         init_data exit_with stop_status
-%type   <float128>      cce_expr cce_factor const_value
+%type   <cce_type>      cce_expr cce_factor const_value
 %type   <prog_end>      end_program1
 %type   <substitution>  subst_input
 %type   <substitutions> subst_inputs
@@ -888,12 +907,13 @@ class locale_tgt_t {
 %type   <namelocs>      repo_func_names
 %type   <codeset>       codeset_name
 %type   <locale_phrase> locale_phrase
+%type   <number>        convert_hex convert_nat convert_alpha // convert_fmt
 
 %union {
     bool boolean;
     int number;
     char *string;
-    REAL_VALUE_TYPE float128;
+    struct { REAL_VALUE_TYPE r; char *s; } cce_type;
     literal_t literal;
     cbl_field_attr_t field_attr;
     ec_type_t ec_type;
@@ -902,7 +922,7 @@ class locale_tgt_t {
     cbl_namelocs_t *namelocs;
            declarative_list_t* dcl_list_t;
            isym_list_t* isym_list;
-    struct { radix_t radix; char *string; } numstr;
+    struct { bool is_float; radix_t radix; char *string; } numstr;
     struct { YYLTYPE loc; int token; literal_t name; } prog_end;
     struct { int token; special_name_t id; } special_type;
     struct { char locale_type; const char * name; } locale_phrase;
@@ -979,7 +999,7 @@ class locale_tgt_t {
     struct { enum select_clause_t clause; cbl_file_t *file; } select_clause;
     struct { size_t clauses; cbl_file_t *file; } select_clauses;
     struct { YYLTYPE loc; char *on, *off; } switches;
-    struct cbl_domain_t *false_domain;
+    struct { cbl_encoding_t encoding; cbl_domain_t *domain; } false_domain;
     struct { size_t also; unsigned char *low, *high; } colseq;
     struct { cbl_field_attr_t attr; int nbyte; } pic_part;
 
@@ -993,6 +1013,8 @@ class locale_tgt_t {
     struct { bool is_locale; cbl_refer_t *arg2; } numval_locale_t;
            locale_tgt_t *token_list;
 
+    struct xml_decl_attr_t { bool with_decl, with_attr; } xml_decl_attr;
+
     cbl_options_t::arith_t opt_arith;
     cbl_round_t opt_round;
     cbl_section_type_t opt_init_sect;
@@ -1000,16 +1022,25 @@ class locale_tgt_t {
     module_type_t module_type;
 }
 
+%printer { fprintf(yyo, "%s", $$->field? name_of($$->field) : "[omitted]"); } alloc_ret
 %printer { fprintf(yyo, "clauses: 0x%04x", $$); } data_clauses
-%printer { fprintf(yyo, "%s %s %s",
+                        
+%printer { fprintf(yyo, "%s{%u/%u} %s '%s' (%s)",
                         refer_type_str($$),
+                        $$ && $$->field? $$->field->char_capacity() : 0,
+                        $$ && $$->field? $$->field->data.capacity() : 0, 
                         $$? $$->name() : "<none>",
-                        $$ && $$->field? $$->field->codeset.name() : "<none>"); } <refer>
-%printer { fprintf(yyo, "%s", $$->field? name_of($$->field) : "[omitted]"); } alloc_ret
-%printer { fprintf(yyo, "%s %s '%s' (%s)",
+                        $$ && $$->field? $$->field->data.original()?
+                                         $$->field->data.original() : "<nil>" : "",
+                        $$ && $$->field? $$->field->value_str() : "" ); } <refer>
+
+%printer { fprintf(yyo, "%s{%u/%u} %s '%s' (%s)",
                         $$? cbl_field_type_str($$->type) : "<%empty>",
+                        $$? $$->char_capacity() : 0,
+                        $$? $$->data.capacity() : 0, 
                         $$? name_of($$) : "",
-                        $$? $$->data.initial? $$->data.initial : "<nil>" : "",
+                        $$? $$->data.original()?
+                            $$->data.original() : "<nil>" : "",
                         $$? $$->value_str() : "" ); } <field>
 
 %printer { fprintf(yyo, "%c %s",
@@ -1021,7 +1052,7 @@ class locale_tgt_t {
 %printer { fprintf(yyo, "%s'%.*s'{" HOST_SIZE_T_PRINT_UNSIGNED "} %s",
                         $$.prefix, int($$.len), $$.data, (fmt_size_t)$$.len,
                         $$.symbol_name()); } <literal>
-%printer { fprintf(yyo,"%s (1st of" HOST_SIZE_T_PRINT_UNSIGNED")",
+%printer { fprintf(yyo,"%s (1st of " HOST_SIZE_T_PRINT_UNSIGNED")",
                         $$->targets.empty()? "" : $$->targets.front().refer.field->name,
                         (fmt_size_t)$$->targets.size() ); } <targets>
 %printer { fprintf(yyo, "#" HOST_SIZE_T_PRINT_UNSIGNED ": %s",
@@ -1034,8 +1065,8 @@ class locale_tgt_t {
                         teed_up_names().front(), (fmt_size_t) teed_up_names().size() ); } qname
 %printer { fprintf(yyo, "{%d}", $$ ); } <number>
 %printer { fprintf(yyo, "'%s'", $$.string ); } <numstr>
-%printer { const char *s = string_of($$);
-           fprintf(yyo, "{%s}", s? s : "??" ); } <float128>
+%printer { const char *s = string_of($$.r);
+           fprintf(yyo, "{%s}", s? s : "??" ); } <cce_type>
 %printer { fprintf(yyo, "{%s %c%u}", cbl_field_type_str($$.type),
                                      $$.signable? '+' : ' ',
                                      $$.capacity ); } <computational>
@@ -1476,12 +1507,16 @@ class locale_tgt_t {
       return xstrdup(output);
   }
 
-  static inline char * string_of( tree cce ) {
-      return string_of (TREE_REAL_CST (cce));
+  static inline const char * string_of( tree cce ) {
+    tree_node *node = TREE_TYPE(cce);
+    if( INTEGRAL_TYPE_P(node) ) {
+                        return "integer";
+    } 
+    return string_of (TREE_REAL_CST (cce));
   }
 
   cbl_field_t *
-  new_literal( const literal_t& lit, enum cbl_field_attr_t attr );
+  new_literal( const cbl_loc_t loc, const literal_t& lit, enum cbl_field_attr_t attr );
 
   static YYLTYPE first_line_of( YYLTYPE loc );
 %}
@@ -1781,10 +1816,10 @@ opt_init_value: BINARY ZERO { $$ = constant_index(NULLS); }
                  if( $1.len != 1 ) {
                    error_msg(@1, "1-byte hexadecimal literal required");
                  }
-      char ach[16];
-      sprintf(ach, "%d", (int)($1.data[0]));
+                  char ach[16];
+                  sprintf(ach, "%d", (int)($1.data[0]));
                  //auto f = new_literal($1.data);
-                 auto f = new_literal(ach);
+                 auto f = new_literal(@1, ach);
                  f = field_add(@1, f);
                  $$ = field_index(f);
                }
@@ -1801,9 +1836,11 @@ namestr:        ctx_name {
                 }
         |       LITERAL {
                   if( $$.prefix[0] != '\0' ) {
-                    error_msg(@1, "literal cannot use %s prefix in this context",
-                              $$.prefix);
-                    YYERROR;
+                    if( $$.prefix[0] != 'N' ) {
+                      error_msg(@1, "literal cannot use %s prefix in this context",
+                                $$.prefix);
+                      YYERROR;
+                    }
                   }
                  if( !is_cobol_charset($$.data) ) {
                    error_msg(@1, "literal '%s' must be a COBOL or C identifier",
@@ -2662,15 +2699,30 @@ special_name:   dev_mnemonic
                   struct cbl_field_t field = { FldClass, 0, {}, 0, $NAME };
                   if( !namcpy(@NAME, field.name, $2) ) YYERROR;
 
+                  assert( ! domains.empty() );
+                  auto encoding = domains.front().encoding;
+
                   struct cbl_domain_t *domain =
                     new cbl_domain_t[ domains.size() + 1 ] ;
-
+                  auto p = std::find_if( domains.begin(), domains.end(),
+                                         [enc = encoding]
+                                         ( const auto& dom ) {
+                                           return ! dom.encoding_ok(enc);
+                                         } );
+                  if( p != domains.end() ) {
+                    error_msg( @domains, "%qs has encoding %qs "
+                               "but value %qs has encoding %qs",
+                               $NAME, 
+                               current_t::cbl_encoding_str(domains.front().encoding),
+                               p->first.name(), 
+                               current_t::cbl_encoding_str(p->encoding) );
+                  }
                   std::copy(domains.begin(), domains.end(), domain);
+                  domains.clear();
 
-                  field.data.false_value_as($domains);
+                  field.data.false_value_as($domains.domain);
                   field.data.domain_as(domain);
-                  field.codeset.set();
-                  domains.clear();
+                  field.codeset.set(encoding);
 
                   if( field_add(@2, &field) == NULL ) {
                     dbgmsg("failed class");
@@ -3018,72 +3070,83 @@ picture_sym:    %empty                { $$ = NULL; }
                  * "CLASS NAME is domains".
                  */
 domains:        domain
-        |       domains domain { $$ = $1? $1 : $2; }
+        |       domains domain { $$ = $1.domain? $1 : $2; }
                 ;
 
 domain:         all LITERAL[a]
                 {
-                  $$ = NULL;
-                  cbl_domain_t domain(@a, $all, $a.len, $a.data);
-                  domains.push_back(domain);
+                  $$.domain = nullptr;
+                  cbl_domain_t domain($all, $a.len, $a.data);
+                  domains.push_back( domain_t($a.encode_as(), domain) );
                 }
         |       all[a_all] LITERAL[a] THRU all[z_all] LITERAL[z]
                 {
-                  $$ = NULL;
-                  cbl_domain_elem_t first(@a, $a_all, $a.len, $a.data),
-                                     last(@z, $z_all, $z.len, $z.data);
-                  domains.push_back(cbl_domain_t(first, last));
+                  $$.domain = nullptr;
+                  cbl_domain_elem_t first($a_all, $a.len, $a.data),
+                                     last($z_all, $z.len, $z.data);
+                  if( $a.encode_as() == $z.encode_as() ) {
+                    domains.push_back( domain_t($a.encode_as(),
+                                                cbl_domain_t(first, last)) );
+                  } else {
+                    error_msg(@z, "encooding of %qs differs from that of %qs",
+                              $a.data, $z.data);
+                  }
                 }
         |       all NUMSTR[n]
                 {
-                  $$ = NULL;
-                  cbl_domain_t dom(@n, $all, strlen($n.string), $n.string, true);
-                  domains.push_back(dom);
+                  $$.domain = nullptr;
+                  cbl_domain_t dom($all, strlen($n.string), $n.string, true);
+                  domains.push_back( domain_t(dom) );
                 }
         |       all[n_all] NUMSTR[n] THRU all[m_all] NUMSTR[m]
                 {
-                  $$ = NULL;
-                  cbl_domain_elem_t first(@n, $n_all, strlen($n.string), $n.string, true),
-                                    last(@m, $m_all, strlen($m.string), $m.string, true);
-                  domains.push_back(cbl_domain_t(first, last));
+                  $$.domain = nullptr;
+                  cbl_domain_elem_t first($n_all, strlen($n.string), $n.string, true),
+                                    last($m_all, strlen($m.string), $m.string, true);
+                  domains.push_back( domain_t(cbl_domain_t(first, last)) );
                 }
         |       all reserved_value {
-                  $$ = NULL;
+                  $$.domain = nullptr;
                   if( $2 == NULLS ) YYERROR;
                   auto value = constant_of(constant_index($2))->data.initial;
-                  struct cbl_domain_t domain( @2, $all, strlen(value), value );
-                  domains.push_back(domain);
+                  struct cbl_domain_t domain( $all, strlen(value), value );
+                  domains.push_back(domain_t(domain));
                 }
         |       all[a_all] reserved_value[a] THRU all[z_all] LITERAL[z] {
-                  $$ = NULL;
+                  $$.domain = nullptr;
                   if( $a == NULLS ) YYERROR;
                   auto value = constant_of(constant_index($a))->data.initial;
-                  cbl_domain_elem_t first(@a, $a_all, strlen(value), value),
-                                     last(@z, $z_all, $z.len, $z.data);
-                  domains.push_back(cbl_domain_t(first, last));
+                  cbl_domain_elem_t first($a_all, strlen(value), value),
+                                     last($z_all, $z.len, $z.data);
+                  domains.push_back( domain_t($z.encode_as(),
+                                              cbl_domain_t(first, last)) );
                 }
         |       all[a_all] reserved_value[a] THRU all[z_all] NUMSTR[z] {
-                  $$ = NULL;
+                  $$.domain = nullptr;
                   if( $a == NULLS ) YYERROR;
                   auto value = constant_of(constant_index($a))->data.initial;
-                  cbl_domain_elem_t first(@a, $a_all, strlen(value), value, true),
-                                     last(@z, $z_all, strlen($z.string), $z.string, true);
-                  domains.push_back(cbl_domain_t(first, last));
+                  cbl_domain_elem_t first($a_all, strlen(value), value, true),
+                                     last($z_all, strlen($z.string), $z.string, true);
+                  domains.push_back( domain_t(cbl_domain_t(first, last)) );
                 }
         |       when_set_to FALSE_kw is LITERAL[value]
                 {
                   const char *dom = $value.data;
-                  $$ = new cbl_domain_t(@value, false, $value.len, dom);
+                  $$.domain = new cbl_domain_t(false, $value.len, dom);
+                  $$.encoding = $value.encode_as();
                 }
         |       when_set_to FALSE_kw is reserved_value
                 {
                   if( $4 == NULLS ) YYERROR;
                   auto value = constant_of(constant_index($4))->data.initial;
-                  $$ = new cbl_domain_t(@4, false, strlen(value), value );
+                  $$.domain = new cbl_domain_t(false, strlen(value), value );
+                  $$.encoding = no_encoding_e;
                 }
         |       when_set_to FALSE_kw is NUMSTR[n]
                 {
-                 $$ = new cbl_domain_t(@n, false, strlen($n.string), $n.string, true);
+                 $$.domain = new cbl_domain_t(false,
+                                               strlen($n.string), $n.string, true);
+                  $$.encoding = current_encoding('A');
                 }
                 ;
 when_set_to:    %empty
@@ -3096,7 +3159,7 @@ when_set_to:    %empty
         |       WHEN SET TO
         ;
 
-data_div:       %empty
+data_div:       %empty 
         |       DATA_DIV
         |       DATA_DIV { current_division = data_div_e; } data_sections
                 {
@@ -3146,8 +3209,9 @@ fd_clause:      record_desc
                   auto f = cbl_file_of(symbol_at(file_section_fd));
                   f->varying_size.min = $1.min;
                   f->varying_size.max = $1.max;
-                  auto& cap = cbl_field_of(symbol_at(f->default_record))->data.capacity;
-                  cap = std::max(cap, uint32_t(f->varying_size.max));
+                  auto& data = cbl_field_of(symbol_at(f->default_record))->data;
+                  data.capacity( std::max(data.capacity(),
+                                          uint32_t(f->varying_size.max)) );
                   // If min != max now, we know varying is explicitly defined.
                   f->varying_size.explicitly = f->varies();
                   if( f->varying_size.max != 0 ) {
@@ -3225,6 +3289,10 @@ fd_clause:      record_desc
                   f->attr |= external_e;
                   cbl_unimplemented("AS LITERAL");
                 }
+        |       is error
+                {
+                  error_msg(@1, "invalid FD phrase");
+                }
         |       fd_linage { cbl_unimplemented("LINAGE"); }
         |       fd_report {
                   cbl_unimplemented("REPORT WRITER");
@@ -3456,6 +3524,7 @@ field:          cdf
                   }
                   field_done();
 
+#if 0
                   const auto& field(*$data_descr);
 
                   // Format data.initial per picture
@@ -3492,6 +3561,7 @@ field:          cdf
                       }
                     }
                   }
+#endif
                 }
                 ;
 
@@ -3651,33 +3721,46 @@ data_descr:     data_descr1
                 ;
 
 const_value:    cce_expr
-        |       BYTE_LENGTH of name { set_real_from_capacity(@name, $name, &$$); }
-        |       LENGTH      of name { set_real_from_capacity(@name, $name, &$$); }
-        |       LENGTH_OF   of name { set_real_from_capacity(@name, $name, &$$); }
+        |       BYTE_LENGTH of name {
+                  $$.s = nullptr;
+                  set_real_from_capacity(@name, $name, &$$.r);
+                }
+        |       LENGTH      of name { 
+                  $$.s = nullptr;
+                  set_real_from_capacity(@name, $name, &$$.r);
+                }
+        |       LENGTH_OF   of name { 
+                  $$.s = nullptr;
+                  set_real_from_capacity(@name, $name, &$$.r);
+                }
         |       LENGTH_OF   of binary_type[type] {
-                               real_from_integer(&$$, VOIDmode, $type, SIGNED); }
+                  $$.s = nullptr;
+                  real_from_integer(&$$.r, VOIDmode, $type, SIGNED);
+                }
                 ;
 
 value78:        literalism
                 {
                   cbl_field_data_t data;
-                 data.capacity = capacity_cast(strlen($1.data));
-                  data.initial = $1.data;
+                  data.capacity( capacity_cast(strlen($1.data)) );
+                  data.original($1.data);
                   $$.encoding = $1.encoding;
                   $$.data = new cbl_field_data_t(data);
                 }
         |       const_value
                 {
                   cbl_field_data_t data;
-                 data = build_real (float128_type_node, $1);
-                  $$.encoding = current_encoding('A');
+                 data = build_real (float128_type_node, $1.r);
+                  auto s = $1.s ? $1.s : reinterpret_cast<char*>(data.etc.value);
+                  data.original(s);
+                  $$.encoding = no_encoding_e;
                   $$.data = new cbl_field_data_t(data);
                 }
         |       reserved_value[value]
                 {
-                 const auto field = constant_of(constant_index($value));
+                 const auto figconst = constant_of(constant_index($value));
                   $$.encoding = current_encoding('A');
-                  $$.data = new cbl_field_data_t(field->data);
+                  $$.data = new cbl_field_data_t(figconst->data);
                 }
 
         |       true_false
@@ -3695,7 +3778,7 @@ data_descr1:    level_name
                   }
                 }
 
-        |       level_name CONSTANT is_global as const_value
+        |       level_name CONSTANT is_global as const_value[cce]
                 {
                   cbl_field_t& field = *$1;
                   if( field.level != 1 ) {
@@ -3706,12 +3789,15 @@ data_descr1:    level_name
                   field.attr |= constant_e;
                   if( $is_global ) field.attr |= global_e;
                   field.type = FldLiteralN;
-                 field.data = build_real (float128_type_node, $const_value);
-                  field.data.initial = string_of($const_value);
+                 field.data = build_real (float128_type_node, $cce.r);
+                  const char *s = $cce.s? $cce.s : string_of($cce.r);
+                  field.data.original( s );
                   field.codeset.set();
+                  field.set_initial(@cce);
 
-                  if( !cdf_value(field.name, cdfval_t($const_value)) ) {
-                    error_msg(@1, "%s was defined by CDF", field.name);
+                  if( cdf_value(field.name) ) {
+                    cbl_message(@1, Par78CdfDefinedW,
+                                "%s was defined by CDF", field.name);
                   }
                 }
 
@@ -3727,6 +3813,8 @@ data_descr1:    level_name
                   field.type = FldLiteralA;
                  auto fig = constant_of(constant_index($value));
                   field.data = fig->data;
+                  field.codeset.set();
+                  field.set_initial(@value);
                 }
 
         |       level_name CONSTANT is_global as literalism[lit]
@@ -3735,66 +3823,74 @@ data_descr1:    level_name
                   field.attr |= constant_e;
                   if( $is_global ) field.attr |= global_e;
                   field.type = FldLiteralA;
-                  field.data.capacity = $lit.len;
-                  field.data.initial  = $lit.data;
                   field.attr |= literal_attr($lit.prefix);
+
+                  if( ! field.codeset.set($lit.encode_as()) ) {
+                    error_msg(@lit, "CONSTANT inconsistent with encoding %s",
+                              cbl_alphabet_t::encoding_str(field.codeset.encoding));
+                  }
+                  field.data.original( $lit.data );
+                  field.set_initial(@lit);
+
                   if( field.level != 1 ) {
                     error_msg(@lit, "%s must be an 01-level data item", field.name);
                     YYERROR;
                   }
-                  if( !cdf_value(field.name, $lit.data) ) {
-                    error_msg(@1, "%s was defined by CDF", field.name);
-                  }
-                  if( ! field.codeset.set() ) {
-                    error_msg(@lit, "CONSTANT inconsistent with encoding %s",
-                              cbl_alphabet_t::encoding_str(field.codeset.encoding));
+                  if( cdf_value(field.name) ) {
+                    cbl_message(@1, Par78CdfDefinedW,
+                                "%s was defined by CDF", field.name);
                   }
-                  
-                  value_encoding_check(@lit, $1);
                 }
         |       level_name CONSTANT is_global FROM NAME
                 {
                   assert($1 == current_field());
+                  cbl_field_t& field(*$1);
+                  if( cdf_value(field.name) ) {
+                    cbl_message(@1, Par78CdfDefinedW,
+                                "%s was defined by CDF", field.name);
+                  }
                   const cdfval_t *cdfval = cdf_value($NAME);
                   if( !cdfval ) {
-                    error_msg(@1, "%s was defined by CDF", $NAME);
+                    error_msg(@NAME, "%s was not defined by CDF", $NAME);
                     YYERROR;
                   }
-                  cbl_field_t& field = *$1;
                   field.attr |= ($is_global | constant_e);
-                  field.data.capacity = cdfval->string ? strlen(cdfval->string)
-                                                  : sizeof(field.data.value_of());
-                  field.data.initial  = cdfval->string;
-                  field.data = cdfval->number;
-                  if( !cdf_value(field.name, *cdfval) ) {
-                    error_msg(@1, "%s was defined by CDF", field.name);
+                  field.codeset.set();
+                // Does a const field want an initial string for a numeric value? --jkl
+                  if( cdfval->string ) {
+                    field.data.original( cdfval->string );
+                    field.set_initial(@NAME);
+                  } else {
+                    field.data.capacity(sizeof(field.data.value_of()));
+                    field.data = cdfval->number;
                   }
                 }
-
         |       LEVEL78 NAME[name] VALUE is value78[data]
                 {
                   dialect_ok(@1, MfLevel78, "LEVEL 78");
                   cbl_field_t field = { FldLiteralA, constant_e, *$data.data,
                                         78, $name, @name.first_line };
-                  if( field.data.initial ) {
-                    field.attr |= quoted_e;
-                    field.codeset.set($data.encoding);
-                    if( !cdf_value(field.name, field.data.initial) ) {
-                      cbl_message(Par78CdfDefinedW,
-                                  "%s was defined by CDF", field.name);
-                    }
-                  } else {
+                  // cce reports no encoded initial value
+                  if( $data.encoding == no_encoding_e ) { 
                     field.type = FldLiteralN;
+                    field.codeset.set();
                     field.data.initial = string_of(field.data.value_of());
+                    if( cdf_value(field.name) ) {
+                      cbl_message(@name, Par78CdfDefinedW,
+                                  "%s was defined by CDF", field.name);
+                    }
+                  } else{ 
+                    field.attr |= quoted_e;
                     field.codeset.set($data.encoding);
-                    if( !cdf_value(field.name, field.as_integer()) ) {
-                      cbl_message(Par78CdfDefinedW,
+                    field.set_initial(@data);
+                    if( cdf_value(field.name) ) {
+                      cbl_message(@name, Par78CdfDefinedW,
                                   "%s was defined by CDF", field.name);
                     }
                   }
+
                   if( ($$ = field_add(@name, &field)) == NULL ) {
                     error_msg(@name, "failed level 78");
-                    YYERROR;
                   }
                 }
 
@@ -3807,7 +3903,7 @@ data_descr1:    level_name
                   auto fig = constant_of(constant_index(NULLS))->data.initial;
                   struct cbl_domain_t *domain = new cbl_domain_t[2];
 
-                  domain[0] = cbl_domain_t(@NAME, false, strlen(fig), fig);
+                  domain[0] = cbl_domain_t(false, strlen(fig), fig);
 
                   field.data.domain_as(domain);
 
@@ -3829,17 +3925,39 @@ data_descr1:    level_name
                   cbl_domain_t *domain =
                     new cbl_domain_t[ domains.size() + 1];
 
+                  auto enc = domains.front().encoding;
+                  auto p = std::find_if( domains.begin(), domains.end(),
+                                         [enc]( const auto& dom ) {
+                                           return !dom.encoding_ok(enc);
+                                         } );
+                  if( p != domains.end() ) {
+                    error_msg( @domains, "%qs has encoding %qs "
+                               "but value %qs has encoding %qs",
+                               $NAME, 
+                               current_t::cbl_encoding_str(domains.front().encoding),
+                               p->first.name(), 
+                               current_t::cbl_encoding_str(p->encoding) );
+                  }
                   std::copy(domains.begin(), domains.end(), domain);
+                  domains.clear();
 
                   field.data.domain_as(domain);
-                  field.data.false_value_as($domains);
+                  field.data.false_value_as($domains.domain);
                   field.codeset.set();
-                  domains.clear();
 
                   if( ($$ = field_add(@2, &field)) == NULL ) {
                     error_msg(@NAME, "failed level 88");
                     YYERROR;
                   }
+                  cbl_field_t *parent = parent_of($$);
+                  if( parent->codeset.valid() &&
+                      enc != parent->codeset.encoding ) {
+                    error_msg( @NAME, "%qs has encoding %qs "
+                               "but values have encoding %qs",
+                               $NAME, 
+                               current_t::cbl_encoding_str(parent->codeset.encoding),
+                               current_t::cbl_encoding_str(enc) );
+                  } 
                 }
 
         |       name66[alias] RENAMES name[orig]
@@ -3941,14 +4059,20 @@ data_descr1:    level_name
         |       level_name[field] data_clauses
                 {
                   gcc_assert($field == current_field());
+                  //// cbl_field_t& F(*$field);
+
                   if( $data_clauses == value_clause_e ) { // only VALUE, no PIC
                     // Error unless VALUE is a figurative constant or (quoted) string.
-                    if( $field->type != FldPointer &&
-                        ! $field->has_attr(quoted_e) &&
-                        normal_value_e == cbl_figconst_of($field->data.initial) )
-                    {
-                      error_msg(@field, "%s numeric VALUE %s requires PICTURE",
-                               $field->name, $field->data.initial);
+                    if( $field->type != FldPointer && ! $field->has_attr(quoted_e) ) {
+                      switch( cbl_figconst_of($field->data.initial) ) {
+                      case normal_value_e:
+                      case zero_value_e:
+                        break;
+                      default:
+                        error_msg(@field, "%qs numeric VALUE %qs requires PICTURE",
+                                  $field->name, $field->data.original());
+                        YYERROR; // do not attempt to set capacity
+                      }
                     }
                     if( null_value_e == cbl_figconst_of($field->data.initial) ) {
                       // don't change the type
@@ -3958,7 +4082,7 @@ data_descr1:    level_name
                       assert(FldPointer != $field->type);
                       $field->type = FldAlphanumeric;
                       if( $field->data.initial ) {
-                        $field->data.capacity = strlen($field->data.initial);
+                        $field->set_capacity(strlen($field->data.initial));
                       }
                     }
                   }
@@ -4000,65 +4124,104 @@ data_descr1:    level_name
 
                   // Increase numeric display capacity by 1 for SIGN SEPARATE.
                   if( $field->type == FldNumericDisplay &&
-                      is_signable($field) &&
-                      $field->has_attr(separate_e) ){
-                    $field->data.capacity++;
+                      $field->has_attr(separate_e) ) {
+                    gcc_assert(is_signable($field));
+                    $field->add_capacity(1);
+                    $field->blank_initial($field->char_capacity());
                   }
 
                   // Set Packed-Decimal capacity
                   if( $field->type == FldPacked ) {
-                    $field->data.capacity = type_capacity($field->type,
-                                                          $field->data.digits);
-                  if( $field->attr & separate_e )
-                    {
-                    // This is a gentle kludge required by the the belated
-                    // introduction of COMP-6, which is like COMP-3 but with no
-                    // sign nybble.  The code in type_capacity assumes a sign
-                    // nybble.
-                    $field->data.capacity = ($field->data.digits+1)/2;
-                    }
+                    // COMP-6 is like COMP-3 but with no sign nybble.  The
+                    // code in type_capacity assumes a sign nybble.
+                    auto one_less = $field->has_attr(separate_e) ? 1 : 0;
+
+                    $field->set_capacity(type_capacity($field->type,
+                                                       $field->data.digits - one_less));
                   }
 
                   // Check COMP-5 capacity
                  // No capacity means no PICTURE, valid only for a (potential) group
-                  if( $field->type == FldNumericBin5 && $field->data.capacity == 0 ) {
-                    if(  has_clause ($data_clauses, usage_clause_e) &&
-                        !has_clause ($data_clauses, picture_clause_e) ) {
-                       // invalidate until a child is born
-                     $field->type = FldInvalid;
+                  if( $field->type == FldNumericBin5 ) {
+                    if( $field->data.capacity() == 0 ) {
+                      if(  has_clause ($data_clauses, usage_clause_e) &&
+                          !has_clause ($data_clauses, picture_clause_e) ) {
+                         // invalidate until a child is born
+                       $field->type = FldInvalid;
+                      }
+                    } else {
+                      if( ($field->data.initial) ) {
+                        if( strlen($field->data.initial) < $field->data.capacity() ) {
+                          $field->blank_initial( $field->data.capacity() );
+                        }
+                      }
                     }
                   }
 
                   // Ensure signed initial VALUE is for signed numeric type
                   if( is_numeric($field) ) {
-                    if( $field->data.initial && $field->type != FldFloat ) {
-                      switch( $field->data.initial[0] ) {
+                    auto original = $field->data.original();
+                    if( original && $field->type != FldFloat ) {
+                      switch( original[0] ) {
                       case '-':
                         if( !$field->has_attr(signable_e) ) {
                           error_msg(@field, "%s is unsigned but has signed VALUE '%s'",
-                                    $field->name, $field->data.initial);
+                                    $field->name, original);
                         }
                       }
                     }
                   }
 
                   // Verify VALUE
-                  $field->report_invalid_initial_value(@data_clauses);
-
-                  bool numerical =
-                      $field->type == FldNumericDisplay || is_numeric($field);
-
-                  if( $field->data.initial && ! numerical ) {
-                    if( normal_value_e == cbl_figconst_of($field->data.initial) ) {
-                      value_encoding_check(@data_clauses, $field);
-                    }
-                  }
+                  bool bad_value = $field->report_invalid_initial_value(@data_clauses);
 
                   // verify REDEFINES
                   const auto parent = parent_of($field);
                   if( parent && $field->level == parent->level ) {
                     valid_redefine(@field, $field, parent); // calls yyerror
                   }
+                  
+                  // verify VALUE for Numeric Edited
+                  if( $field->type == FldNumericEdited ) {
+                    if( !validate_numeric_edited ($field) ) {
+                      error_msg(@data_clauses, "%s: PICTURE and VALUE are incompatible",
+                                $field->name);
+                    }
+                  }
+
+                  // verify VALUE for numeric
+                  if ($data_clauses & value_clause_e) {
+                    bool good_value =
+                        ($field->data.etc_ok() || $field->data.original()) && ! bad_value;
+                    if( good_value ) { // ensure VALUE had a value
+                      if( is_numeric($field) || $field->type == FldNumericEdited ) {
+                        if( zero_value_e == cbl_figconst_of($field->data.original()) ) {
+                          $field->blank_initial($field->char_capacity());
+                        }
+                        $field->encode_numeric($field->data.original(), 
+                                               data_clause_locations[value_clause_e],
+                                               $field->data.original_numeric());
+                      }
+                    }
+                  } else { // no VALUE clause
+                    if( false && $field->data.initial ) {
+                      free(const_cast<char*>($field->data.initial));
+                      $field->data.initial = nullptr;
+                    }                      
+                  }
+
+                  // Any field may become a group, so may have VALUE with no PICTURE
+                  const auto stooges3 = (picture_clause_e |
+                                         value_clause_e |
+                                         usage_clause_e);
+                  if( ($data_clauses & stooges3) == value_clause_e ) { // only
+                    $field->type = FldInvalid;
+                    auto fig = cbl_figconst_of($field->data.original());
+                    if( null_value_e != fig ) {
+                      $field->set_initial( $field->data.capacity(),
+                                           data_clause_locations[value_clause_e]);
+                    }
+                  }
                 }
                 ;
 
@@ -4136,6 +4299,12 @@ data_clauses:   data_clause
                     YYERROR;
                   }
 
+                 // We could be more judicious. We could clear the map when
+                 // the first clause is encountered, and e.g. set the location
+                 // to just the VALUE string, not the whole clause.  As of now
+                 // the map isn't used, though.
+                  data_clause_locations[data_clause_t($2)] = @data_clause;
+
                   if( $data_clause == redefines_clause_e ) {
                     error_msg(@2, "REDEFINES must appear "
                              "immediately after LEVEL and NAME");
@@ -4185,7 +4354,7 @@ data_clauses:   data_clause
                   }
 
                   if( gcobol_feature_embiggen() ) {
-                    if( field->is_binary_integer() && field->data.capacity == 4) {
+                    if( field->is_binary_integer() && field->data.capacity() == 4) {
                       auto redefined = symbol_redefines(field);
                       if( redefined && redefined->type == FldPointer ) {
                         dbgmsg("expanding %s size from %u bytes to %lu "
@@ -4248,23 +4417,14 @@ data_clause:    any_length        { $$ = any_length_e; }
         |       value_clause      { $$ = value_clause_e;
                   cbl_field_t *field = current_field();
 
-                  if( field->type != FldAlphanumeric &&
-                      field->data.initial && field->data.initial[0] )
-                  {
-                    // Embedded NULs are valid only in FldAlphanumeric, and are
-                    // already handled.
-                    if( strlen(field->data.initial) < field->data.capacity ) {
-                      auto p = blank_pad_initial( field->data.initial,
-                                                  strlen(field->data.initial),
-                                                  field->data.capacity );
-                      if( !p ) YYERROR;
-                      field->data.initial = p;
-                    }
-                  }
                  const cbl_field_t *parent;
+                 if( (parent = parent_has_picture(field)) != NULL ) {
+                   error_msg(@1, "VALUE invalid because group %s (%s) has PICTURE clause",
+                             parent->name, 3 + cbl_field_type_str(parent->type));
+                 }
                  if( (parent = parent_has_value(field)) != NULL ) {
-                   error_msg(@1, "VALUE invalid because group %s has VALUE clause",
-                             parent->name);
+                   error_msg(@1, "VALUE invalid because group %s (%s) has VALUE clause",
+                             parent->name, 3 + cbl_field_type_str(parent->type));
                  }
                 }
         |       volatile_clause      { $$ = volatile_clause_e; }
@@ -4273,19 +4433,24 @@ data_clause:    any_length        { $$ = any_length_e; }
 picture_clause: PIC signed nps[fore] nines nps[aft]
                 {
                   cbl_field_t *field = current_field();
+                  if( ! field->codeset.set() ) {
+                    error_msg(@nines, "PICTURE inconsistent with encoding %s",
+                              cbl_alphabet_t::encoding_str(field->codeset.encoding));
+                  }
                   if( !field_type_update(field, FldNumericDisplay, @$) ) {
                     YYERROR;
                   }
                   ERROR_IF_CAPACITY(@PIC, field);
-                  field->attr |= $signed;
-                  field->data.capacity = type_capacity(field->type, $4);
-                  field->data.digits = $4;
-                  if( long(field->data.digits) != $4 ) {
-                    error_msg(@2, "indicated size would be %d bytes, "
-                             "maximum data item size is %u",
-                             $4, UINT32_MAX);
+                  // If signable_e is inherited from the group, it is effective
+                  // regardless of an 'S' in PICTURE.
+                  if( field->has_attr(signable_e) && ! $signed ) {
+                    dbgmsg("%s PICTURE must be signed for SIGN IS", field->name);
                   }
-
+                  field->attr |= $signed;
+                  field->data.digits = $nines;
+                  auto nchar = type_capacity(field->type, $nines);
+                  field->set_capacity(nchar);
+                  field->blank_initial(nchar);
                   if( $fore && $aft ) { // leading and trailing P's
                     error_msg(@2, "PIC cannot have both leading and trailing P");
                     YYERROR;
@@ -4296,59 +4461,76 @@ picture_clause: PIC signed nps[fore] nines nps[aft]
                   }
                   if( ! field->reasonable_capacity() ) {
                     error_msg(@2, "%s limited to capacity of %d (would need %u)",
-                            field->name, MAX_FIXED_POINT_DIGITS, field->data.capacity);
+                            field->name, MAX_FIXED_POINT_DIGITS, field->char_capacity());
                   }
                 }
 
         |       PIC signed NINEV[left] nine[rdigits]
                 {
                   cbl_field_t *field = current_field();
+                  if( ! field->codeset.set() ) {
+                    error_msg(@$, "PICTURE inconsistent with encoding %s",
+                              cbl_alphabet_t::encoding_str(field->codeset.encoding));
+                  }
                   field->data.digits = $left + $rdigits;
                   field->attr |= $signed;
 
                   if( field->is_binary_integer() ) {
-                    field->data.capacity = type_capacity(field->type,
-                                                         field->data.digits);
+                    field->set_capacity(type_capacity(field->type,
+                                                      field->data.digits));
                     field->data.rdigits = $rdigits;
                   } else {
                     if( !field_type_update(field, FldNumericDisplay, @$) ) {
                       YYERROR;
                     }
                     ERROR_IF_CAPACITY(@PIC, field);
-                    field->data.capacity = field->data.digits;
+                    field->set_capacity(field->data.digits);
                     field->data.rdigits = $rdigits;
                   }
+                  // data.initial has blanks for character-encoded data
+                 // data.capacity may reflect the binary size, if any. 
+                  field->blank_initial(field->data.digits);
+
                   if( ! field->reasonable_capacity() ) {
                     error_msg(@2, "%s limited to capacity of %d (would need %u)",
-                            field->name, MAX_FIXED_POINT_DIGITS, field->data.capacity);
+                            field->name, MAX_FIXED_POINT_DIGITS, field->char_capacity());
                   }
                 }
         |       PIC signed NINEDOT[left] nine[rdigits]
                 {
+                  cbl_field_t *field = current_field();
+                  if( ! field->codeset.set() ) {
+                    error_msg(@$, "PICTURE inconsistent with encoding %s",
+                              cbl_alphabet_t::encoding_str(field->codeset.encoding));
+                  }
+
                   uint32_t size = $left + $rdigits;
 
-                  cbl_field_t *field = current_field();
                   if( !field_type_update(field, FldNumericEdited, @$) ) {
                     YYERROR;
                   }
                   ERROR_IF_CAPACITY(@PIC, field);
                   field->attr |= $signed;
                   field->data.digits = size;
-                  field->data.capacity = ++size;
+                  field->set_capacity(++size);
                   field->data.rdigits = $rdigits;
+                  field->blank_initial(size);
 
                   if( ! field->reasonable_capacity() ) {
                     error_msg(@2, "%s limited to capacity of %d (would need %u)",
-                            field->name, MAX_FIXED_POINT_DIGITS, field->data.capacity);
+                            field->name, MAX_FIXED_POINT_DIGITS, field->char_capacity());
                   }
-                }
+                } 
 
-        |       PIC alphanum_pic[size]
+        |       PIC alphanum_pic[nchar]
                 {
                   cbl_field_t *field = current_field();
-
+                  if( ! field->codeset.valid() ) { // set by the picture
+                    dbgmsg("%s:%d: %s has invalid encoding",
+                           __FILE__, __LINE__, field->name);
+                  }
                  if( field->type == FldNumericBin5 &&
-                     field->data.capacity == 0xFF  &&
+                     field->data.capacity() == 0xFF  &&
                      dialect_ok(@2, MfCompX, "alphanumeric PICTURE with numeric USAGE") )
                  { // PIC X COMP-X or COMP-9
                    if( ! field->has_attr(all_x_e) ) {
@@ -4361,29 +4543,24 @@ picture_clause: PIC signed nps[fore] nines nps[aft]
                       YYERROR;
                     }
                  }
-                  assert(0 < $size);
-                  if( field->data.initial != NULL ) {
-                    if( 0 < field->data.capacity &&
-                            field->data.capacity < uint32_t($size) ) {
-                      auto p = blank_pad_initial(field->data.initial,
-                                                 field->data.capacity, $size );
-                      if( !p ) YYERROR;
-                      field->data.initial = p;
-                    }
+                  assert(0 < $nchar);
+                  field->data.picture = nullptr;
+                  auto nchar = std::min($nchar, MAXIMUM_ALPHA_LENGTH);
+                  if( nchar < $nchar ) {
+                    error_msg(@2, "alphanumeric data-item size (%d) "
+                                   "exceeds maximum of %d bytes",
+                              $nchar, MAXIMUM_ALPHA_LENGTH);
                   }
-
-                  charmap_t *charmap = 
-                                    __gg__get_charmap(field->codeset.encoding);
-                  field->data.capacity = $size * charmap->stride();
-                  field->data.picture = NULL;
-
-                  if( false ) dbgmsg("PIC alphanum_pic[size]:%d: %s",
-                                      field->line, field_str(field));
+                  field->set_initial(nchar, @nchar);
                 }
 
         |       PIC numed[picture]
                 {
                   cbl_field_t *field = current_field();
+                  if( ! field->codeset.set() ) {
+                    error_msg(@picture, "PICTURE inconsistent with encoding %s",
+                              cbl_alphabet_t::encoding_str(field->codeset.encoding));
+                  }
                   if( !field_type_update(field, FldNumericEdited, @$) ) {
                     YYERROR;
                   }
@@ -4393,27 +4570,26 @@ picture_clause: PIC signed nps[fore] nines nps[aft]
                     YYERROR;
                   }
                   field->data.picture = $picture;
-                  field->data.capacity =  length_of_picture($picture);
                   field->data.digits   =  digits_of_picture($picture, false);
                   field->data.rdigits  = rdigits_of_picture($picture);
                   if( is_picture_scaled($picture) ) field->attr |= scaled_e;
+                  auto nchar = length_of_picture($picture);
+                  field->set_capacity(nchar);
+                  field->blank_initial(nchar);
                 }
 
         |       PIC ALPHED[picture]
                 {
-                  bool is_alpha_edited( const char picture[] );
-
                   cbl_field_t *field = current_field();
                   ERROR_IF_CAPACITY(@PIC, field);
-                  field->data.capacity = length_of_picture($picture);
-                  field->data.picture = $picture;
-
                   // In case the lexer guesses wrong.
                   cbl_field_type_t type = is_numeric_edited($picture)?
                                           FldNumericEdited : FldAlphaEdited;
                   if( !field_type_update(field, type, @$) ) {
                     YYERROR;
                   }
+                  field->data.picture = $picture;
+                  field->data.capacity(length_of_picture($picture));
 
                   switch( type ) {
                   case FldNumericEdited:
@@ -4422,7 +4598,7 @@ picture_clause: PIC signed nps[fore] nines nps[aft]
                     if( is_picture_scaled($picture) ) field->attr |= scaled_e;
                     break;
                   case FldAlphaEdited:
-                    if( !is_alpha_edited(field->data.picture) ) {
+                    if( ! field->data.is_alpha_edited() ) {
                       error_msg(@picture, "invalid picture for Alphanumeric-edited");
                       YYERROR;
                     }
@@ -4430,6 +4606,11 @@ picture_clause: PIC signed nps[fore] nines nps[aft]
                   default:
                     gcc_unreachable();
                   }
+                  if( ! field->codeset.set() ) {
+                    error_msg(@picture, "PICTURE inconsistent with encoding %s",
+                              cbl_alphabet_t::encoding_str(field->codeset.encoding));
+                  }
+                  field->set_initial(@picture);
                 }
        |       PIC ones
                 ;
@@ -4468,6 +4649,10 @@ alphanum_part:  ALNUM[picture] count
                   $$.attr = uniform_picture($picture);
                   $$.nbyte = strlen($picture);
                  auto count($count);
+                 if( false && count == 0 ) { // zero count has no effect
+                   error_msg(@2, "PICTURE count %<(%d)%> is zero", count );
+                   YYERROR;
+                 }
                   if( count > 0 ) {
                     --count;
                     $$.nbyte += count; // AX9(3) has count 5
@@ -4511,29 +4696,39 @@ count:          %empty           { $$ = 0; }
                 }
        |       '(' NAME ')'
                 {
+                  int64_t output = 1;
                  auto value = cdf_value($NAME);
-                 if( ! (value && value->is_numeric()) ) {
-                   error_msg(@NAME, "PICTURE %qs requires a CONSTANT value", $NAME );
-                   YYERROR;
-                 }
-                 int nmsg = 0;
-                 auto e = symbol_field(PROGRAM, 0, $NAME);
-                 if( e ) { // verify not floating point with nonzero fraction
-                   auto field = cbl_field_of(e);
-                   assert(is_literal(field));
-                   REAL_VALUE_TYPE vi;
-                   real_from_integer (&vi, VOIDmode, field->as_integer(), SIGNED);
-                   if( !real_identical (TREE_REAL_CST_PTR (field->data.value_of()),
-                                        &vi) ) {
-                     nmsg++;
-                     error_msg(@NAME, "invalid PICTURE count %<(%s)%>",
-                               field->data.initial );
-                   }
-                 }
-                 $$ = value->as_number();
-                 if( $$ <= 0 && !nmsg) {
-                   error_msg(@NAME, "invalid PICTURE count %<(%s)%>", $NAME );
-                 }
+                 if( value && ! value->is_numeric() ) {
+                   error_msg(@NAME,
+                              "PICTURE %qs requires a CONSTANT NUMERIC value",
+                              $NAME );
+                    output = value->as_number();
+                 } else {
+                   auto e = symbol_field(PROGRAM, 0, $NAME);
+                   if( ! e ) {
+                     error_msg(@NAME, "%qs not defined", $NAME );
+                    } else { // verify it's integer-like
+                     auto field = cbl_field_of(e);
+                      if( ! field->has_attr(constant_e) ) {
+                        error_msg(@NAME, "count %qs must be CONSTANT", $NAME);
+                      }
+                      if( ! is_numeric(field) ) {
+                        auto s = field->data.original();
+                        error_msg(@NAME, "%qs invalid as PICTURE count %<(%s)%>",
+                                  $NAME, s? s : "" );
+                      } else {
+                        auto ok = field->data.int64_of();
+                        if( ! ok.second ) {
+                          error_msg(@NAME, "%qs invalid as PICTURE count %<(%ld)%>",
+                                    $NAME, long(ok.first) );
+                        } else {
+                          output = ok.first;
+                        }
+                     }
+                    }
+                  }
+                  $$ = output;
+                  dbgmsg("%s:%d: count: (%s) is %ld", __FILE__, __LINE__, $NAME, long($$));
                 }
                 ;
 
@@ -4557,169 +4752,51 @@ usage_clause1:  usage BIT
                }
         |       usage BINARY_INTEGER [comp] is_signed
                 {
-                 // action for BINARY_INTEGER is repeated for COMPUTATIONAL, below. 
-                 // If it changes, consolidate in a function. 
-                  bool infer = true;
-                  cbl_field_t *field = current_field();
+                  bool signable = $is_signed? $comp.signable : false;
 
-                 if( ! $is_signed ) {
-                   $comp.signable = false;
-                 }
-
-                  // Some binary types have defined capacity;
-                  switch($comp.type) {
-                  // COMPUTATIONAL and COMP-5 rely on PICTURE.
-                  case FldNumericBinary:
-                    field->attr |= big_endian_e;
-                    __attribute__((fallthrough));
-                  case FldNumericBin5:
-                   // If no capacity yet, then no picture, infer $comp.capacity.
-                   // If field has capacity, ensure USAGE is compatible.
-                   if( field->data.capacity > 0 ) { // PICTURE before USAGE
-                     infer = false;
-                     switch( field->type ) {
-                     case FldAlphanumeric:   // PIC X COMP-5 or COMP-X
-                       assert( field->data.digits == 0 );
-                       assert( field->data.rdigits == 0 );
-                       dialect_ok(@2, MfCompX, "alphanumeric PICTURE with numeric USAGE");
-
-                        field->type = $comp.type;
-                       field->clear_attr(signable_e);
-                        break;
-                     case FldNumericDisplay: // PIC 9 COMP-5 or COMP-X
-                       if( $comp.capacity == 0xFF ) { // comp-x is a bit like comp-5
-                         assert( field->data.digits == field->data.capacity );
-                         dialect_ok(@2, MfCompX, "alphanumeric PICTURE with numeric USAGE");
-                       }
-                        field->type = $comp.type;
-                        field->data.capacity = type_capacity(field->type,
-                                                             field->data.digits);
-                       break;
-                     default: break;
-                      }
-                   }
-                    break;
-                 case FldPacked: // comp-6 is unsigned comp-3
-                   assert(! $comp.signable);  // else PACKED_DECIMAL from scanner
-                   field->attr |= separate_e;
-                    dialect_ok(@2, MfComp6, "COMP-6");
-                    if( field->type == FldNumericDisplay ) {// PICTURE before USAGE
-                      infer = false;
-                      assert(field->data.capacity > 0);
-                      field->type = $comp.type;
-                      field->data.capacity = type_capacity(field->type,
-                                                           field->data.digits);
-                    }
-                   break;
-                  default:
-                    break;
-                  }
-
-                  if( infer ) {
-                    if( $comp.capacity > 0 ) {
-                      if( field->data.capacity > 0 ) {
-                        error_msg(@comp, "%s is BINARY type, incompatible with PICTURE",
-                               field->name);
-                        YYERROR;
-                      }
-                      field->data.capacity = $comp.capacity;
-                      field->type = $comp.type;
-                      if( $comp.signable ) {
-                        field->attr = (field->attr | signable_e);
-                      }
-                    }
-                  }
-                  $$ = $comp.type;
+                  $$ = field_binary_usage( @comp, current_field(), 
+                                           $comp.type, $comp.capacity,
+                                           signable );
                 }
+
        |       usage COMPUTATIONAL[comp] native
                 { 
-                 // logic below duplicates BINARY_INTEGER, above.
-                 // If it changes, consolidate in a function. 
-                  bool infer = true;
-                  cbl_field_t *field = current_field();
-
-                  // Some binary types have defined capacity;
-                  switch($comp.type) {
-                  // COMPUTATIONAL and COMP-5 rely on PICTURE.
-                  case FldNumericBinary:
-                    field->attr |= big_endian_e;
-                    __attribute__((fallthrough));
-                  case FldNumericBin5:
-                   // If no capacity yet, then no picture, infer $comp.capacity.
-                   // If field has capacity, ensure USAGE is compatible.
-                   if( field->data.capacity > 0 ) { // PICTURE before USAGE
-                     infer = false;
-                     switch( field->type ) {
-                     case FldAlphanumeric:   // PIC X COMP-5 or COMP-X
-                       assert( field->data.digits == 0 );
-                       assert( field->data.rdigits == 0 );
-                       dialect_ok(@2, MfCompX, "alphanumeric PICTURE with numeric USAGE");
-                        field->type = $comp.type;
-                       field->clear_attr(signable_e);
-                        break;
-                     case FldNumericDisplay: // PIC 9 COMP-5 or COMP-X
-                       if( $comp.capacity == 0xFF ) { // comp-x is a bit like comp-5
-                         assert( field->data.digits == field->data.capacity );
-                          dialect_ok(@2, MfCompX, "alphanumeric PICTURE with numeric USAGE");
-                       }
-                        field->type = $comp.type;
-                        field->data.capacity = type_capacity(field->type,
-                                                             field->data.digits);
-                       break;
-                     default: break;
-                      }
-                   }
-                    break;
-                 case FldPacked: // comp-6 is unsigned comp-3
-                   assert(! $comp.signable);  // else PACKED_DECIMAL from scanner
-                   field->attr |= separate_e;
-                    dialect_ok(@2, MfComp6, "COMP-6");
-                    if( field->type == FldNumericDisplay ) {// PICTURE before USAGE
-                      infer = false;
-                      assert(field->data.capacity > 0);
-                      field->type = $comp.type;
-                      field->data.capacity = type_capacity(field->type,
-                                                           field->data.digits);
-                    }
-                   break;
-                  default:
-                    break;
-                  }
-
-                  if( infer ) {
-                    if( $comp.capacity > 0 ) {
-                      if( field->data.capacity > 0 ) {
-                        error_msg(@comp, "%s is BINARY type, incompatible with PICTURE",
-                               field->name);
-                        YYERROR;
-                      }
-                      field->data.capacity = $comp.capacity;
-                      field->type = $comp.type;
-                      if( $comp.signable ) {
-                        field->attr = (field->attr | signable_e);
-                      }
-                    }
+                  $$ = field_binary_usage( @comp, current_field(), 
+                                           $comp.type, $comp.capacity,
+                                           $comp.signable );
+                } 
+        |       usage DISPLAY         native {
+                  auto field = current_field();
+                  if( ! field->codeset.set() ) {
+                    error_msg(@2, "USAGE DISPLAY conflicts with PICTURE");
                   }
-                  $$ = $comp.type;
+                  $$ = FldDisplay;
+                }
+        |       usage PACKED_DECIMAL  native {
+                  cbl_field_t *field = current_field();
+                 if( field->data.capacity() > 0 &&
+                     field->type != FldNumericDisplay) {
+                   error_msg(@2, "USAGE PACKED DECIMAL conflicts with PICTURE");
+                   YYERROR;
+                 }
+                  $$ = FldPacked;
                 }
-        |       usage DISPLAY         native { $$ = FldDisplay; }
-        |       usage PACKED_DECIMAL  native { $$ = FldPacked; }
         |       usage PACKED_DECIMAL  with NO SIGN
                {
                   cbl_field_t *field = current_field();
-                 if( field->data.capacity > 0 &&
+                 if( field->data.capacity() > 0 &&
                      field->type != FldNumericDisplay) {
-                   error_msg(@2, "%s PICTURE is incompatible with USAGE PACKED DECIMAL",
-                            field->name);
+                   error_msg(@2, "USAGE PACKED DECIMAL conflicts with PICTURE");
                    YYERROR;
                  }
+                 if( field->has_attr(signable_e) ) {
+                    error_msg(@$, "signed PICTURE conflicts with NO SIGN");
+                  }
+                 if( field->has_attr(separate_e) ) {
+                    error_msg(@$, "SIGN clause conflicts with NO SIGN");
+                  }
                  field->clear_attr(separate_e);
                  field->clear_attr(signable_e);
-                  if( field->type == FldNumericDisplay ) {// PICTURE before USAGE
-                    assert(field->data.capacity > 0);
-                    field->data.capacity = type_capacity(FldPacked,
-                                                         field->data.digits);
-                  }
                  $$ = field->type = FldPacked;
                }
         |       usage INDEX                  {
@@ -4727,10 +4804,11 @@ usage_clause1:  usage BIT
                 }
         |       usage NATIONAL {
                   auto field = current_field();
-                  if( ! field->codeset.set(EBCDIC_e) ) {
-                    error_msg(@2, "usage NATIONAL conflicts with PICTURE");
+                  auto encoding = current_encoding('N');
+                  if( ! field->codeset.set(encoding) ) {
+                    error_msg(@2, "USAGE NATIONAL conflicts with PICTURE");
                   }
-                  $$ = FldInvalid;
+                  $$ = FldDisplay;
                 } 
                 // We should enforce data/code pointers with a different type.
         |       usage POINTER
@@ -4744,12 +4822,12 @@ usage_clause1:  usage BIT
                  }
                   if( gcobol_feature_embiggen() && redefined &&
                       is_numeric(redefined->type) && redefined->size() == 4) {
-                    // For now, we allow POINTER to expand a 32-bit item to 64 bits.
-                    field->data.capacity = int_size_in_bytes(ptr_type_node);
+                    // Allow POINTER to expand a 32-bit item to 64 bits.
+                    field->data.capacity(int_size_in_bytes(ptr_type_node));
                     dbgmsg("%s: expanding #" HOST_SIZE_T_PRINT_UNSIGNED
                           " %s capacity %u => %u", __func__,
                           (fmt_size_t)field_index(redefined), redefined->name,
-                          redefined->data.capacity, field->data.capacity);
+                          redefined->data.capacity(), field->data.capacity());
 
                     redefined->embiggen();
 
@@ -4784,71 +4862,72 @@ value_clause:   VALUE all LITERAL[lit] {
                               $lit.prefix, $lit.data);
                   }
 
-                  field->data.initial  = $lit.data;
                   field->attr |= literal_attr($lit.prefix);
                   field->attr |= quoted_e;
 
-                  if( field->data.capacity == 0 ) {
-                    field->data.capacity = $lit.len;
-                  } else {
-                    if( $all ) {
-                      field_value_all(field);
-                    } else {
-                      if( $lit.len < field->data.capacity ) {
-                        auto p = blank_pad_initial( $lit.data, $lit.len,
-                                                    field->data.capacity );
-                        if( !p ) YYERROR;
-                        field->data.initial = p;
-                      }
+                  auto capacity = field->data.capacity();
+                  field->data.original($lit.data, $all); // oops, sets capacity to strlen
+
+                  if( capacity ) { // via PICTURE or USAGE
+                    field->data.capacity(capacity); // restore it (unnecessary)
+                    if (field->data.initial) { // PICTURE created blank initial
+                      field->encode($lit.len, @lit);
                     }
                   }
                 }
-        |       VALUE all cce_expr[value] {
+        |       VALUE all cce_expr[cce] {
+                 /*
+                   * cce has two parts: 
+                  * cce.r) Host binary value
+                  * cce.s) string value, if supplied as numeric literal 
+                   */
                   cbl_field_t *field = current_field();
-                  auto orig_str = original_number();
-                 REAL_VALUE_TYPE orig_val;
-                 real_from_string3 (&orig_val, orig_str,
-                                    TYPE_MODE (float128_type_node));
-                  char *initial = NULL;
-
-                  if( real_identical (&orig_val, &$value) ) {
-                    initial = orig_str;
-                    pristine_values.insert(initial);
+                  if( $cce.s ) {
+                    field->data.original($cce.s, $all);
                   } else {
-                    initial = string_of($value);
-                    gcc_assert(initial);
+                    field->data.original($cce.r);
                   }
-
-                  char decimal = symbol_decimal_point();
-                  std::replace(initial, initial + strlen(initial), '.', decimal);
-
-                  field->data.initial = initial;
-                  field->data = build_real (float128_type_node, $value);
-
-                  if( $all ) field_value_all(field);
                 }
         |       VALUE all reserved_value[value]
                 {
                   cbl_field_t *field = current_field();
-                  if( ! field->codeset.set() ) {
-                    error_msg(@value, "VALUE inconsistent with encoding %s",
-                              cbl_alphabet_t::encoding_str(field->codeset.encoding));
+                  field->codeset.set();
+                  assert( $value != NULLS );
+                  auto fig = constant_of(constant_index($value));
+                  auto orig_str = fig->data.initial;
+                  auto capacity = field->data.capacity();
+                  field->data.original(orig_str, $all);
+
+                  if( capacity ) { // via PICTURE or USAGE
+                    field->data.capacity(capacity); // restore it
+                    if (field->data.initial) { // PICTURE created blank initial
+                      field->encode( strlen(orig_str), @value );
+                    }
                   }
-                  if( $value != NULLS ) {
-                    auto fig = constant_of(constant_index($value));
-                    cbl_field_t *field = current_field();
-                    field->data.initial = fig->data.initial;
+                  if(  $value != ZERO ) {
+                    if( ($value != SPACES && field->type == FldNumericEdited)
+                      || is_numeric(field) )
+                    {
+                      error_msg(@value,
+                                "unexpected initial value for %s",
+                                cbl_field_type_name(field->type));
+                    }
                   }
                 }
         |       /* VALUE is */ NULLPTR
                 {
-                    auto fig = constant_of(constant_index(NULLS));
-                    cbl_field_t *field = current_field();
-                    field->data.initial = fig->data.initial;
+                  auto fig = constant_of(constant_index(NULLS));
+                  cbl_field_t *field = current_field();
+                  auto capacity = field->data.capacity();
+                  field->data.original(fig->data.initial);
+                  if( capacity ) { // via PICTURE or USAGE
+                    field->data.capacity(capacity); // restore it
+                    // do not encode
+                  }
                 }
         |       VALUE error
                 {
-                  error_msg(@2, "no valid VALUE supplied");
+                  error_msg(@2, "invalid VALUE");
                 }
                 ;
 
@@ -4936,14 +5015,15 @@ any_length:     ANY LENGTH
                   }
                   const char *prog_name = current.program()->name;
                   bool is_compat = 0 < compat_programs.count(prog_name);
-                  if( ! (field->level == 1 &&
-                         current_data_section == linkage_datasect_e &&
-                         (1 < current.program_level() ||
-                             current.program()->is_function() ||
-                              is_compat)) ) {
-                    error_msg(@1, "ANY LENGTH valid only for 01 "
-                            "in LINKAGE SECTION of a function or contained program");
-                    YYERROR;
+                  if( field->level != 1 || current_data_section != linkage_datasect_e ) {
+                    error_msg(@1, "ANY LENGTH valid only for 01 data-item "
+                                  "in LINKAGE SECTION");
+                  }
+                  if( ! current.program()->is_function() ) {
+                    if( 1 == current.program_level() && ! is_compat) {
+                        cbl_message(@$, MfAnyLength,
+                                    "ANY LENGTH not valid in outermost program");
+                    }
                   }
                   field->attr |= any_length_e;
                 }
@@ -5021,13 +5101,18 @@ same_clause:    SAME AS name
 sign_clause:    sign_is sign_leading sign_separate
                 {
                   cbl_field_t *field = current_field();
+                  if( 0 < field->data.capacity() ) { // PICTURE set size
+                    if( ! field->has_attr(signable_e) ) {
+                      error_msg(@1, "%qs must be signed for SIGN IS", field->name);
+                    }
+                  }
+                  field->set_attr(signable_e);
                   if( $sign_leading ) {
-                    field->attr |= leading_e;
+                    field->set_attr(leading_e);
                   } else {
-                    field->attr &= ~uint64_t(leading_e); // turn off in case inherited
-                    field->attr |= signable_e;
+                    field->clear_attr(leading_e); // turn off in case inherited
                   }
-                  if( $sign_separate ) field->attr |= separate_e;
+                  if( $sign_separate ) field->set_attr(separate_e);
                 }
                 ;
 sign_is:        %empty
@@ -5163,7 +5248,7 @@ procedure_use:  optional scalar {
                 }
                 ;
 by_value_arg:   scalar
-        |       LITERAL  { $$ = new_reference(new_literal($1, quoted_e)); }
+        |       LITERAL  { $$ = new_reference(new_literal(@1, $1, quoted_e)); }
         |       reserved_value
                 {
                   $$ = new_reference(constant_of(constant_index($1)));
@@ -5559,7 +5644,7 @@ accept_except:    EXCEPTION
 
 envar:          scalar { $$ = $1; $$->field->attr |= envar_e; }
         |       LITERAL {
-                  $$ = new_reference(new_literal($1, quoted_e));
+                  $$ = new_reference(new_literal(@1, $1, quoted_e));
                   $$->field->attr |= envar_e;
                 }
                 ;
@@ -5732,7 +5817,7 @@ name88:           NAME88 {
                   name_queue.qualify(@1, $1);
                  auto namelocs( name_queue.pop() );
                  auto names( name_queue.namelist_of(namelocs) );
-                  if( ($$ = field_find(names)) == NULL ) {
+                  if( ($$ = field_find(@1, names)) == NULL ) {
                     if( procedure_div_e == current_division  ) {
                      error_msg(namelocs.back().loc,
                                "DATA-ITEM '%s' not found", names.back() );
@@ -6455,7 +6540,7 @@ rel_term: rel_term1
 rel_term1:     all LITERAL
                 {
                  $$.invert = false;
-                  $$.term = new_reference(new_literal($2, quoted_e));
+                  $$.term = new_reference(new_literal(@2, $2, quoted_e));
                   $$.term->all = $all;
                 }
         |       all spaces_etc[value]
@@ -6925,7 +7010,7 @@ name:           qname
                   auto namelocs( name_queue.pop() );
                   auto names( name_queue.namelist_of(namelocs) );
                   auto inner = namelocs.back();
-                  if( ($$ = field_find(names)) == NULL ) {
+                  if( ($$ = field_find(@1, names)) == NULL ) {
                     if( procedure_div_e == current_division  ) {
                       error_msg(inner.loc,
                                 "DATA-ITEM '%s' not found", inner.name );
@@ -6993,6 +7078,8 @@ context_word:   APPLY                   { static char s[] ="APPLY";
                                          $$ = s; } // USAGE clause and FLOAT-DECIMAL clause
         |       BLINK                  { static char s[] ="BLINK";
                                          $$ = s; } // screen description entry and SET attribute statement
+        |       BYTE                   { static char s[] ="BYTE";
+                                         $$ = s; } // CONVERT intrinsic function
         |       BYTE_LENGTH            { static char s[] ="BYTE-LENGTH";
                                          $$ = s; } // constant entry
         |       CAPACITY               { static char s[] ="CAPACITY";
@@ -7147,16 +7234,16 @@ move:           MOVE scalar TO move_tgts[tgts]
                 {
                   statement_begin(@1, MOVE);
                   if( $scalar->field->type == FldIndex ) {
-                    error_msg(@1, "%qs cannot be MOVEd because it is an %<INDEX%>",
-                            name_of($scalar->field) );
-                    YYERROR;
+                    cbl_message(@1, MfMoveIndex,
+                                "cannot MOVE %qs because it is an %<INDEX%>",
+                                name_of($scalar->field) );
                   }
                   if( !parser_move2($tgts, *$scalar) ) { YYERROR; }
                 }
         |       MOVE all literalism[input] TO move_tgts[tgts]
                 {
                   statement_begin(@1, MOVE);
-                  struct cbl_refer_t *src = new_reference(new_literal($input,
+                  struct cbl_refer_t *src = new_reference(new_literal(@input, $input,
                                                                       quoted_e));
                   src->all = $all;
                   if( !parser_move2($tgts, *src) ) { YYERROR; }
@@ -7174,8 +7261,8 @@ move:           MOVE scalar TO move_tgts[tgts]
                                             } );
 
                   if( p != $tgts->targets.end() ) {
-                    error_msg(@src, "cannot MOVE %s "
-                                   "to numeric receiving field %s",
+                    error_msg(@src, "cannot MOVE %qs "
+                                   "to numeric receiving field %qs",
                              constant_of(constant_index($src))->name,
                              field->name );
                     YYERROR;
@@ -7251,7 +7338,7 @@ move_tgt: scalar[tgt] {
                  }
                  auto litcon = field.name[0] == '_'? "literal" : "constant";
                  error_msg(@literal, "%s is a %s", value_str, litcon);
-                 $$ = NULL;
+                 $$ = nullptr;
                }
         |       error
                 {
@@ -7260,7 +7347,7 @@ move_tgt: scalar[tgt] {
                    error_at = yytext;
                    error_msg(first_line_of(@1), "invalid receiving operand");
                  }
-                 $$ = NULL;
+                 $$ = nullptr;
                 }
                 ;
 
@@ -7431,7 +7518,7 @@ num_value:      scalar // might actually be a string
                   location_set(@1);
                   $$ = new cbl_refer_t( new_tempnumeric(none_e) );
                  dialect_ok(@1, IbmLengthOf, "LENGTH OF");
-                 parser_set_numeric($$->field, $val->data.capacity);
+                 parser_set_numeric($$->field, $val->data.capacity());
                 }
         |       LENGTH_OF name[val] subscripts[subs] {
                   location_set(@1);
@@ -7441,7 +7528,7 @@ num_value:      scalar // might actually be a string
                    cbl_refer_t r1($val);
                    subscript_dimension_error( @subs, $subs->refers.size(), &r1 );
                   }
-                 parser_set_numeric($$->field, $val->data.capacity);
+                 parser_set_numeric($$->field, $val->data.capacity());
                 }
                 ;
 
@@ -7472,33 +7559,38 @@ num_value:      scalar // might actually be a string
 
 cce_expr:       cce_factor
         |       cce_expr '+' cce_expr {
-                  real_arithmetic (&$$, PLUS_EXPR, &$1, &$3);
-                  real_convert (&$$, TYPE_MODE (float128_type_node), &$$);
+                  $$.s = nullptr;
+                  real_arithmetic (&$$.r, PLUS_EXPR, &$1.r, &$3.r);
+                  real_convert (&$$.r, TYPE_MODE (float128_type_node), &$$.r);
                 }
         |       cce_expr '-' cce_expr {
-                  real_arithmetic (&$$, MINUS_EXPR, &$1, &$3);
-                  real_convert (&$$, TYPE_MODE (float128_type_node), &$$);
+                  $$.s = nullptr;
+                  real_arithmetic (&$$.r, MINUS_EXPR, &$1.r, &$3.r);
+                  real_convert (&$$.r, TYPE_MODE (float128_type_node), &$$.r);
                 }
         |       cce_expr '*' cce_expr {
-                  real_arithmetic (&$$, MULT_EXPR, &$1, &$3);
-                  real_convert (&$$, TYPE_MODE (float128_type_node), &$$);
+                  $$.s = nullptr;
+                  real_arithmetic (&$$.r, MULT_EXPR, &$1.r, &$3.r);
+                  real_convert (&$$.r, TYPE_MODE (float128_type_node), &$$.r);
                 }
         |       cce_expr '/' cce_expr {
-                  real_arithmetic (&$$, RDIV_EXPR, &$1, &$3);
-                  real_convert (&$$, TYPE_MODE (float128_type_node), &$$);
+                  $$.s = nullptr;
+                  real_arithmetic (&$$.r, RDIV_EXPR, &$1.r, &$3.r);
+                  real_convert (&$$.r, TYPE_MODE (float128_type_node), &$$.r);
                 }
-        |                '+' cce_expr %prec NEG { $$ =  $2; }
-        |                '-' cce_expr %prec NEG { $$ = real_value_negate (&$2); }
-        |                '(' cce_expr ')'  { $$ = $2; }
+        |                '+' cce_expr %prec NEG { $$.r =  $2.r; }
+        |                '-' cce_expr %prec NEG { $$.r = real_value_negate (&$2.r); }
+        |                '(' cce_expr ')'  { $$.r = $2.r; }
         ;
 
 cce_factor:     NUMSTR {
                   /* real_from_string does not allow arbitrary radix.  */
                   // When DECIMAL IS COMMA, commas act as decimal points.
                  gcc_assert($1.radix == decimal_e);
-                 auto p = $1.string, pend = p + strlen(p);
-                 std::replace(p, pend, ',', '.');
-                 real_from_string3( &$$, $1.string,
+                  $$.s = $1.string;
+                  std::string numstr($1.string);
+                 std::replace(numstr.begin(), numstr.end(), ',', '.');
+                 real_from_string3( &$$.r, numstr.c_str(),
                                     TYPE_MODE (float128_type_node) );
                 }
                 ;
@@ -7560,7 +7652,7 @@ stop:           STOP RUN exit_with
                 {
                   statement_begin(@1, STOP);
                   dialect_ok(@2, IbmStopNumber, "STOP <number>");
-                  cbl_refer_t status( new_literal($status.string, $status.radix) );
+                  cbl_refer_t status( new_literal(@status, $status.string, $status.radix) );
                   parser_see_stop_run( status, NULL );
                 }
         |       STOP LITERAL[name] // CCVS-85 && IBM syntax
@@ -7577,7 +7669,7 @@ stop:           STOP RUN exit_with
 stop_status:    status         { $$ = NULL; }
         |       status scalar  { $$ = $2; }
         |       status NUMSTR {
-                  $$ = new_reference(new_literal($2.string, $2.radix));
+                  $$ = new_reference(new_literal(@2, $2.string, $2.radix));
                 }
                 ;
 
@@ -7625,7 +7717,7 @@ arg_list:                any_arg { $$ = new refer_list_t($1); }
         |       arg_list any_arg { $1->push_back($2); $$ = $1; }
                 ;
 any_arg:        expr
-        |       LITERAL {$$ = new_reference(new_literal($1, quoted_e)); }
+        |       LITERAL {$$ = new_reference(new_literal(@1, $1, quoted_e)); }
                 ;
 
                 /*
@@ -7650,7 +7742,7 @@ signed_literal:     num_literal
                   location_set(@1);
                   $$ = new_tempnumeric(none_e);
                  dialect_ok(@1, IbmLengthOf, "LENGTH OF");
-                  parser_set_numeric($$, $val->data.capacity);
+                  parser_set_numeric($$, $val->data.capacity());
                 }
         |       LENGTH_OF name[val] subscripts[subs] {
                   location_set(@1);
@@ -7660,11 +7752,18 @@ signed_literal:     num_literal
                    cbl_refer_t r1($val);
                    subscript_dimension_error( @subs, $subs->refers.size(), &r1 );
                   }
-                  parser_set_numeric($$, $val->data.capacity);
+                  parser_set_numeric($$, $val->data.capacity());
                 }
                 ;
 
-num_literal:    NUMSTR { $$ = new_literal($1.string, $1.radix); }
+num_constant:   NUMSTR {
+                  if( $1.is_float ) {
+                    $$ = new_literal_float(@1, $1.string);
+                  } else {
+                    $$ = new_literal(@1, $1.string, $1.radix); }
+                }
+                ;
+num_literal:    num_constant
         |       ZERO   { $$ = constant_of(constant_index(ZERO)); }
                 ;
 
@@ -8222,7 +8321,7 @@ varg1a:         ADDRESS OF scalar {
                    cbl_refer_t r1($val);
                    subscript_dimension_error( @subs, $subs->refers.size(), &r1 );
                   }
-                 parser_set_numeric($$->field, $val->data.capacity);
+                 parser_set_numeric($$->field, $val->data.capacity());
                 }
                 ;
 
@@ -8235,12 +8334,9 @@ literal:        literalism
                   $$ = $1.isymbol()?
                     cbl_field_of(symbol_at($1.isymbol()))
                     :
-                    new_literal($1, quoted_e);
-                }
-        |       NUMSTR
-                {
-                  $$ = new_literal($1.string, $1.radix);
+                    new_literal(@1, $1, quoted_e);
                 }
+        |       num_constant
         |       DATETIME_FMT
                 {
                   $$ = new_literal(strlen($1), $1, quoted_e);
@@ -8546,7 +8642,7 @@ file_record:    NAME
                  auto namelocs( name_queue.pop() );
                  auto names( name_queue.namelist_of(namelocs) );
                  auto inner = namelocs.back();
-                  if( ($$ = field_find(names)) == NULL ) {
+                  if( ($$ = field_find(@1, names)) == NULL ) {
                     error_msg(inner.loc, "no record name '%s'", inner.name);
                     YYERROR;
                   }
@@ -8583,7 +8679,7 @@ advance_by:     scalar lines { $$ = $1; } /* BUG: should accept reference */
                    * number of lines is negative.  So, we use the
                    * negative Number Of The Beast as a PAGE flag.
                    */
-                  $$ = new_reference( new_literal(xstrdup("-666")) );
+                  $$ = new_reference( new_constant(xstrdup("-666")) );
                 }
         |       device_name { $$ = new_reference(literally_one); }
                 ;
@@ -9007,7 +9103,7 @@ set:            SET set_tgts[tgts] TO set_operand[src]
                   auto literal = $src.isymbol()?
                     cbl_field_of(symbol_at($src.isymbol()))
                     :
-                    new_literal($src, quoted_e);
+                    new_literal(@src, $src, quoted_e);
                   ast_set_pointers($tgts->targets, literal);
                 }
                 // Format 12 (save-locale):
@@ -9296,7 +9392,7 @@ search_term:    scalar[key] EQ search_expr[sarg]
                 }
                 ;
 search_expr:    expr
-        |       LITERAL { $$ = new_reference(new_literal($1, quoted_e)); }
+        |       LITERAL { $$ = new_reference(new_literal(@1, $1, quoted_e)); }
                 ;
 
 sort:           sort_table
@@ -9308,6 +9404,7 @@ sort_table:     SORT tableref[table] sort_keys sort_dup sort_seq {
                   std::vector <cbl_key_t> keys($sort_keys->key_list.size());
                  if( ! is_table($table->field) ) {
                    error_msg(@1, "%s has no OCCURS clause", $table->field->name);
+                    YYERROR;
                  }
                   // 23) If data-name-1 is omitted, the data item referenced by
                   // data-name-2 is the key data item.
@@ -9325,6 +9422,7 @@ sort_table:     SORT tableref[table] sort_keys sort_dup sort_seq {
                   statement_begin(@1, SORT);
                  if( ! is_table($table->field) ) {
                    error_msg(@1, "%s has no OCCURS clause", $table->field->name);
+                    YYERROR;
                  }
                   cbl_key_t
                     key = cbl_key_t($table->field->occurs.keys[0]),
@@ -9601,10 +9699,11 @@ inspect:        INSPECT backward inspected TALLYING tallies
                 {
                  if( $all ) {
                    $replace_oper->all = true;
-                   if( is_literal($replace_oper->field) ) {
-                     if( $replace_oper->field->data.capacity != 1 ) {
+                    cbl_field_t *field( $replace_oper->field );
+                   if( is_literal(field) ) {
+                     if( field->data.capacity() != field->codeset.stride() ) {
                        error_msg(@all, "ALL %s must be a single character",
-                                $replace_oper->field->data.initial);
+                                  field->data.initial);
                        YYERROR;
                      }
                    } else {
@@ -9616,12 +9715,12 @@ inspect:        INSPECT backward inspected TALLYING tallies
                                *replace = $replace_oper->field;
                    if( is_literal(match) && is_literal(replace) ) {
                      if( !$match->all && !$replace_oper->all) {
-                       if( match->data.capacity != replace->data.capacity ) {
+                       if( match->data.capacity() != replace->data.capacity() ) {
                          // Make a copy of replace, because nice_name returns a static
                          char *replace_name = xstrdup(nice_name_of(replace));
                          error_msg(@match, "%qs, size %u NOT EQUAL %qs, size %u",
-                                   nice_name_of(match), match->data.capacity
-                                   replace_name, replace->data.capacity);
+                                   nice_name_of(match), match->char_capacity()
+                                   replace_name, replace->char_capacity());
                                    free(replace_name);
                          YYERROR;
                        }
@@ -9856,7 +9955,7 @@ first_leading:  FIRST      { $$ = bound_first_e; }
                }
                 ;
 
-alphaval:       LITERAL { $$ = new_reference(new_literal($1, quoted_e)); }
+alphaval:       LITERAL { $$ = new_reference(new_literal(@1, $1, quoted_e)); }
         |       reserved_value
                 {
                   $$ = new_reference( constant_of(constant_index($1)) );
@@ -9966,9 +10065,7 @@ init_by:        init_category data BY init_data
                 }
                 ;
 init_data:      alpha_val
-        |       NUMSTR   {
-                  $$ = new_reference(new_literal($1.string, $1.radix));
-                }
+        |       num_constant { $$ = new_reference($1); }
                 ;
 
 call:           call_impl end_call
@@ -10048,12 +10145,12 @@ call_returning:       RETURNING
 
 entry:          ENTRY LITERAL
                 { statement_begin(@1, ENTRY);
-                  auto name = new_literal($2, quoted_e);
+                  auto name = new_literal(@2, $2, quoted_e);
                   parser_entry( name );
                 }
         |       ENTRY LITERAL USING parameters
                 { statement_begin(@1, ENTRY);
-                  auto name = new_literal($2, quoted_e);
+                  auto name = new_literal(@2, $2, quoted_e);
                   ffi_args_t *params = $parameters;
                   size_t narg = params? params->elems.size() : 0;
                  cbl_ffi_arg_t *pargs = NULL;
@@ -10086,7 +10183,7 @@ ffi_name:       scalar
                 {
                   // Pretend hex-encoded because that means use verbatim.
                   auto attr = cbl_field_attr_t(quoted_e | hex_encoded_e);
-                  $$ = new_reference(new_literal($1, attr));
+                  $$ = new_reference(new_literal(@1, $1, attr));
                 }
                 ;
 
@@ -10106,6 +10203,13 @@ ffi_by_ref:     scalar_arg[refer]
                 {
                   $$ = new cbl_ffi_arg_t(by_reference_e, $refer);
                 }
+        |       LITERAL
+                {
+                  cbl_message(@1, MfCallLiteral,
+                              "cannot pass %qs BY REFERENCE", $1.data);
+                  cbl_refer_t *r = new_reference(new_literal(@1, $1, quoted_e));
+                  $$ = new cbl_ffi_arg_t(by_content_e, r);
+                }
         |       ADDRESS OF scalar_arg[refer]
                 {
                   $$ = new cbl_ffi_arg_t(by_reference_e, $refer, address_of_e);
@@ -10124,7 +10228,7 @@ ffi_by_con:     expr
                 }
         |       LITERAL
                 {
-                  cbl_refer_t *r = new_reference(new_literal($1, quoted_e));
+                  cbl_refer_t *r = new_reference(new_literal(@1, $1, quoted_e));
                   $$ = new cbl_ffi_arg_t(by_content_e, r);
                 }
         |       OMITTED
@@ -10140,7 +10244,8 @@ ffi_by_val:     by_value_arg
                 }
         |       cce_expr %prec NAME
                 {
-                  auto r = new_reference(new_literal(string_of($1)));
+                  const char *s = $1.s? $1.s : string_of($1.r);
+                  auto r = new_reference(new_literal(@1, s));
                   $$ = new cbl_ffi_arg_t(by_value_e, r);
                 }
         |       ADDRESS OF scalar
@@ -10405,7 +10510,7 @@ str_delimited:  str_input DELIMITED by str_size
                 ;
 
 str_input:      scalar
-        |       LITERAL { $$ = new_reference(new_literal($1, quoted_e)); }
+        |       LITERAL { $$ = new_reference(new_literal(@1, $1, quoted_e)); }
         |       reserved_value
                 {
                   $$ = new_reference(constant_of(constant_index($1)));
@@ -10414,7 +10519,7 @@ str_input:      scalar
                 ;
 
 str_size:       SIZE   { $$ = new_reference(NULL); }
-        |       LITERAL { $$ = new_reference(new_literal($1, quoted_e)); }
+        |       LITERAL { $$ = new_reference(new_literal(@1, $1, quoted_e)); }
         |       scalar
         |       reserved_value
                 {
@@ -10520,7 +10625,7 @@ unstring_src:   scalar
         |       intrinsic_call
         |       LITERAL
                 {
-                  $$ = new_reference(new_literal($1, quoted_e));
+                  $$ = new_reference(new_literal(@1, $1, quoted_e));
                 }
                 ;
 
@@ -10656,6 +10761,7 @@ function_udf:   FUNCTION_UDF '(' arg_list[args] ')' {
                   // Pretend hex-encoded because that means use verbatim.
                   auto attr = cbl_field_attr_t(quoted_e | hex_encoded_e);
                   auto name = new_literal(strlen(L->name), L->name, attr);
+                  symbol_temporary_location(name, @1);
                  ast_call( @1, name, $$, args.size(), args.data(), NULL, NULL, true );
                 }
         |       FUNCTION_UDF_0 {
@@ -10670,6 +10776,7 @@ function_udf:   FUNCTION_UDF '(' arg_list[args] ')' {
                   // Pretend hex-encoded because that means use verbatim.
                   auto attr = cbl_field_attr_t(quoted_e | hex_encoded_e);
                   auto name = new_literal(strlen(L->name), L->name, attr);
+                  symbol_temporary_location(name, @1);
                   ast_call( @1, name, $$, narg, args, NULL, NULL, true );
                 }
                 ;
@@ -10749,12 +10856,54 @@ intrinsic:      function_udf
                   $$ = new_alphanumeric(1,"CHAR");
                   if( ! intrinsic_call_1($$, CHAR, $r1, @r1)) YYERROR;
                 }
-
-       |       CONVERT  '(' varg[r1] convert_src[src] convert_dst[dst] ')' {
+                /* convert formulations: 
+                 *  1. ANY to ALNUM HEX, or NAT HEX
+                 *  2. HEX to BYTE
+                 *  3. ALNUM to NAT, ALNUM HEX, or NAT HEX
+                 *  4. NAT to ALNUM, ALNUM HEX, or NAT HEX
+                 */
+       |       CONVERT  '(' varg[r1] ANY convert_alpha[dst] convert_hex[hex]')' {
+                  location_set(@1);
+                  $$ = new_alphanumeric("CONVERT");
+                  unsigned int dst = ($dst | $hex );
+                  parser_intrinsic_convert($$, *$r1, convert_any_e, dst);
+                }
+       |       CONVERT  '(' varg[r1] ANY convert_nat[dst] convert_hex[hex]')' {
+                  location_set(@1);
+                  $$ = new_alphanumeric("CONVERT", current_encoding('N'));
+                  unsigned int dst = ($dst | $hex );
+                  parser_intrinsic_convert($$, *$r1, convert_any_e, dst);
+                }
+       |       CONVERT  '(' varg[r1] HEX BYTE ')' {
+                  location_set(@1);
+                  $$ = new_alphanumeric("CONVERT");
+                  parser_intrinsic_convert($$, *$r1, convert_hex_e, convert_byte_e);
+                }
+       |       CONVERT  '(' varg[r1] convert_alpha[src] convert_nat[dst] ')' {
+                  location_set(@1);
+                  $$ = new_alphanumeric("CONVERT", current_encoding('N'));
+                  auto src = convert_type_t($src);
+                  parser_intrinsic_convert($$, *$r1, src, $dst);
+                }
+       |       CONVERT  '(' varg[r1] convert_alpha[src] convert_alpha[dst] convert_hex[hex] ')' {
+                  location_set(@1);
+                  $$ = new_alphanumeric("CONVERT");
+                  auto src = convert_type_t($src);
+                  unsigned int dst = ($dst | $hex );
+                  parser_intrinsic_convert($$, *$r1, src, dst);
+                }
+       |       CONVERT  '(' varg[r1] convert_nat[src] convert_alpha[dst] ')' {
+                  location_set(@1);
+                  $$ = new_alphanumeric("CONVERT");
+                  auto src = convert_type_t($src);
+                  parser_intrinsic_convert($$, *$r1, src, $dst);
+                }
+       |       CONVERT  '(' varg[r1] convert_nat[src] convert_nat[dst] convert_hex[hex] ')' {
                   location_set(@1);
-                  $$ = new_alphanumeric(1,"CONVERT");
-                 cbl_unimplemented("CONVERT");
-                  /* if( ! intrinsic_call_3($$, CONVERT, $r1, $src, $dst) ) YYERROR; */
+                  $$ = new_alphanumeric("CONVERT", current_encoding('N'));
+                  auto src = convert_type_t($src);
+                  unsigned int dst = ($dst | $hex );
+                  parser_intrinsic_convert($$, *$r1, src, dst);
                 }
 
         |       DISPLAY_OF  '(' varg[r1]  ')' {
@@ -10774,18 +10923,23 @@ intrinsic:      function_udf
                   parser_exception_file( $$, $filename );
                 }
 
-        |       FIND_STRING '(' varg[r1] last start_after anycase ')' {
+                /* FIND-STRING argument-1 argument-2 
+                 * [LAST] [[START AFTER] argument-3] [ANYCASE] */
+        |       FIND_STRING '(' varg[r1] varg[r2] last start_after[after] anycase ')' {
                   location_set(@1);
-                  $$ = new_alphanumeric("FIND-STRING");
-                  /* auto r1 = new_reference(new_literal(strlen($r1), $r1, quoted_e)); */
-                 cbl_unimplemented("%<FIND_STRING%>");
-                  /* if( ! intrinsic_call_4($$, FIND_STRING, r1, $r2) ) YYERROR; */
+                  $$ = new_tempnumeric("FIND-STRING");
+                  if( $after && ! is_numeric($after->field) ) {
+                    error_msg(@after, "START AFTER %qs must be numeric", $after->name());
+                    YYERROR;
+                  }
+                  parser_intrinsic_find_string($$, *$r1, *$r2, $after, $last, $anycase);
                 }
 
         |       FORMATTED_DATE '(' DATE_FMT[r1] expr[r2] ')' {
                   location_set(@1);
                   $$ = new_alphanumeric(MAXLENGTH_FORMATTED_DATE, "FORMATTED-DATE");
                   auto r1 = new_reference(new_literal(strlen($r1), $r1, quoted_e));
+                  symbol_temporary_location(r1->field, @r1);
                   if( ! intrinsic_call_2($$, FORMATTED_DATE, r1, $r2) ) YYERROR;
                 }
 
@@ -10795,6 +10949,7 @@ intrinsic:      function_udf
                   location_set(@1);
                   $$ = new_alphanumeric(MAXLENGTH_FORMATTED_DATETIME, "FORMATTED-DATETIME");
                   auto r1 = new_reference(new_literal(strlen($r1), $r1, quoted_e));
+                  symbol_temporary_location(r1->field, @r1);
                   static cbl_refer_t r3(literally_zero);
                   if( ! intrinsic_call_4($$, FORMATTED_DATETIME,
                                          r1, $r2, $r3, &r3) ) YYERROR;
@@ -10804,6 +10959,7 @@ intrinsic:      function_udf
                   location_set(@1);
                   $$ = new_alphanumeric(MAXLENGTH_FORMATTED_DATETIME, "FORMATTED-DATETIME");
                   auto r1 = new_reference(new_literal(strlen($r1), $r1, quoted_e));
+                  symbol_temporary_location(r1->field, @r1);
                   if( ! intrinsic_call_4($$, FORMATTED_DATETIME,
                                              r1, $r2, $r3, $r4) ) YYERROR;
                 }
@@ -10815,14 +10971,16 @@ intrinsic:      function_udf
                   location_set(@1);
                   $$ = new_alphanumeric(MAXLENGTH_FORMATTED_TIME, "FORMATTED-DATETIME");
                   auto r1 = new_reference(new_literal(strlen($r1), $r1, quoted_e));
+                  symbol_temporary_location(r1->field, @r1);
                   if( ! intrinsic_call_3($$, FORMATTED_TIME,
                                              r1, $r2, $r3) ) YYERROR;
                 }
         |       FORMATTED_TIME '(' TIME_FMT[r1] expr[r2]  ')' {
                   location_set(@1);
                   $$ = new_alphanumeric(MAXLENGTH_FORMATTED_TIME, "FORMATTED-TIME");
-                  auto r3 = new_reference(new_literal("0"));
                   auto r1 = new_reference(new_literal(strlen($r1), $r1, quoted_e));
+                  auto r3 = new_reference(new_constant("0"));
+                  symbol_temporary_location(r1->field, @r1);
                   if( ! intrinsic_call_3($$, FORMATTED_TIME,
                                              r1, $r2, r3) ) YYERROR;
                 }
@@ -10830,13 +10988,15 @@ intrinsic:      function_udf
                   location_set(@1);
                   $$ = new_alphanumeric(MAXLENGTH_FORMATTED_DATETIME, "FORMATTED-CURRENT_DATE");
                   auto r1 = new_reference(new_literal(strlen($r1), $r1, quoted_e));
+                  symbol_temporary_location(r1->field, @r1);
                   if( ! intrinsic_call_1($$, FORMATTED_CURRENT_DATE, r1, @r1) )
                                          YYERROR;
                 }
         |       TEST_FORMATTED_DATETIME '(' DATE_FMT[r1] varg[r2] ')' {
-                location_set(@1);
-               $$ = new_tempnumeric("TEST-FORMATTED-DATETIME");
+                  location_set(@1);
+                 $$ = new_tempnumeric("TEST-FORMATTED-DATETIME");
                   auto r1 = new_reference(new_literal(strlen($r1), $r1, quoted_e));
+                  symbol_temporary_location(r1->field, @r1);
                   if( ! intrinsic_call_2($$, TEST_FORMATTED_DATETIME,
                                               r1, $r2) ) YYERROR;
                 }
@@ -10844,6 +11004,7 @@ intrinsic:      function_udf
                 location_set(@1);
                $$ = new_tempnumeric("TEST-FORMATTED-DATETIME");
                   auto r1 = new_reference(new_literal(strlen($r1), $r1, quoted_e));
+                  symbol_temporary_location(r1->field, @r1);
                   if( ! intrinsic_call_2($$, TEST_FORMATTED_DATETIME,
                                               r1, $r2) ) YYERROR;
                 }
@@ -10852,6 +11013,7 @@ intrinsic:      function_udf
                 location_set(@1);
                $$ = new_tempnumeric("TEST-FORMATTED-DATETIME");
                   auto r1 = new_reference(new_literal(strlen($r1), $r1, quoted_e));
+                  symbol_temporary_location(r1->field, @r1);
                   if( ! intrinsic_call_2($$, TEST_FORMATTED_DATETIME,
                                               r1, $r2) ) YYERROR;
                 }
@@ -10859,6 +11021,7 @@ intrinsic:      function_udf
                 location_set(@1);
                $$ = new_tempnumeric("INTEGER-OF-FORMATTED-DATE");
                   auto r1 = new_reference(new_literal(strlen($r1), $r1, quoted_e));
+                  symbol_temporary_location(r1->field, @r1);
                   if( ! intrinsic_call_2($$, INTEGER_OF_FORMATTED_DATE,
                                               r1, $r2) ) YYERROR;
                 }
@@ -10867,6 +11030,7 @@ intrinsic:      function_udf
                 location_set(@1);
                $$ = new_tempnumeric("INTEGER-OF-FORMATTED-DATE");
                   auto r1 = new_reference(new_literal(strlen($r1), $r1, quoted_e));
+                  symbol_temporary_location(r1->field, @r1);
                   if( ! intrinsic_call_2($$, INTEGER_OF_FORMATTED_DATE,
                                               r1, $r2) ) YYERROR;
                 }
@@ -10874,6 +11038,7 @@ intrinsic:      function_udf
                 location_set(@1);
                $$ = new_tempnumeric("SECONDS-FROM-FORMATTED-TIME");
                   auto r1 = new_reference(new_literal(strlen($r1), $r1, quoted_e));
+                  symbol_temporary_location(r1->field, @r1);
                   if( ! intrinsic_call_2($$, SECONDS_FROM_FORMATTED_TIME,
                                               r1, $r2) ) YYERROR;
                 }
@@ -10882,6 +11047,7 @@ intrinsic:      function_udf
                 location_set(@1);
                $$ = new_tempnumeric("SECONDS-FROM-FORMATTED-TIME");
                   auto r1 = new_reference(new_literal(strlen($r1), $r1, quoted_e));
+                  symbol_temporary_location(r1->field, @r1);
                   if( ! intrinsic_call_2($$, SECONDS_FROM_FORMATTED_TIME,
                                               r1, $r2) ) YYERROR;
                 }
@@ -10900,12 +11066,12 @@ intrinsic:      function_udf
        |       LENGTH '(' varg1a[val] ')' {
                   location_set(@1);
                   $$ = new_tempnumeric("LENGTH", none_e);
-                 parser_set_numeric($$, $val->field->data.capacity);
+                 parser_set_numeric($$, $val->field->data.capacity());
                  if( ! intrinsic_call_1($$, LENGTH, $val, @val)) YYERROR;
                }
         |       lopper_case[func] '(' alpha_val[r1] ')' {
                   location_set(@1);
-                  $$ = new_alphanumeric($r1->field->data.capacity, "lopper_case[func]");
+                  $$ = new_alphanumeric($r1->field->data.capacity(), "lopper_case[func]");
                   if( ! intrinsic_call_1($$, $func, $r1, @r1)) YYERROR;
                 }
 
@@ -11069,8 +11235,8 @@ intrinsic:      function_udf
                 {
                   location_set(@1);
                   static auto r2 = new_reference(FldNumericDisplay, "50");
-                  static auto one = new cbl_refer_t( new_literal("1") );
-                  static auto four = new cbl_refer_t( new_literal("4") );
+                  static auto one = new cbl_refer_t( new_constant("1") );
+                  static auto four = new cbl_refer_t( new_constant("4") );
                   cbl_span_t year(one, four);
                   auto r3 = new_reference(new_alphanumeric(MAXLENGTH_CALENDAR_DATE));
                   r3->refmod = year;
@@ -11085,8 +11251,8 @@ intrinsic:      function_udf
         |       DATE_TO_YYYYMMDD '(' expr[r1] expr[r2] ')'
                 {
                   location_set(@1);
-                  static auto one = new cbl_refer_t( new_literal("1") );
-                  static auto four = new cbl_refer_t( new_literal("4") );
+                  static auto one = new cbl_refer_t( new_constant("1") );
+                  static auto four = new cbl_refer_t( new_constant("4") );
                   cbl_span_t year(one, four);
                   auto r3 = new_reference(new_alphanumeric(MAXLENGTH_CALENDAR_DATE));
                   r3->refmod = year;
@@ -11111,8 +11277,8 @@ intrinsic:      function_udf
                 {
                   location_set(@1);
                   static auto r2 = new_reference(FldNumericDisplay, "50");
-                  static auto one = new cbl_refer_t( new_literal("1") );
-                  static auto four = new cbl_refer_t( new_literal("4") );
+                  static auto one = new cbl_refer_t( new_constant("1") );
+                  static auto four = new cbl_refer_t( new_constant("4") );
                   cbl_span_t year(one, four);
                   auto r3 = new_reference(new_alphanumeric(MAXLENGTH_CALENDAR_DATE));
                   r3->refmod = year;
@@ -11127,8 +11293,8 @@ intrinsic:      function_udf
         |       DAY_TO_YYYYDDD '(' expr[r1] expr[r2] ')'
                 {
                   location_set(@1);
-                  static auto one = new cbl_refer_t( new_literal("1") );
-                  static auto four = new cbl_refer_t( new_literal("4") );
+                  static auto one = new cbl_refer_t( new_constant("1") );
+                  static auto four = new cbl_refer_t( new_constant("4") );
                   cbl_span_t year(one, four);
                   auto r3 = new_reference(new_alphanumeric(MAXLENGTH_CALENDAR_DATE));
                   r3->refmod = year;
@@ -11152,9 +11318,9 @@ intrinsic:      function_udf
         |       YEAR_TO_YYYY '(' expr[r1] ')'
                 {
                   location_set(@1);
-                  static auto r2 = new_reference(new_literal("50", decimal_e));
-                  static auto one = new cbl_refer_t( new_literal("1") );
-                  static auto four = new cbl_refer_t( new_literal("4") );
+                  static auto r2 = new_reference(new_constant("50"));
+                  static auto one = new cbl_refer_t( new_constant("1") );
+                  static auto four = new cbl_refer_t( new_constant("4") );
                   cbl_span_t year(one, four);
                   auto r3 = new_reference(new_alphanumeric(MAXLENGTH_CALENDAR_DATE));
                   r3->refmod = year;
@@ -11169,8 +11335,8 @@ intrinsic:      function_udf
         |       YEAR_TO_YYYY '(' expr[r1] expr[r2] ')'
                 {
                   location_set(@1);
-                  static auto one = new cbl_refer_t( new_literal("1") );
-                  static auto four = new cbl_refer_t( new_literal("4") );
+                  static auto one = new cbl_refer_t( new_constant("1") );
+                  static auto four = new cbl_refer_t( new_constant("4") );
                   cbl_span_t year(one, four);
                   auto r3 = new_reference(new_alphanumeric(MAXLENGTH_CALENDAR_DATE));
                   r3->refmod = year;
@@ -11225,18 +11391,19 @@ module_type:  ACTIVATING { $$ = module_activating_e; }
        |       TOP_LEVEL  { $$ = module_toplevel_e; }
                ;
 
-convert_src:   ANY
-       |       HEX
-       |       convert_fmt
+//convert_fmt: convert_alpha
+//        |       convert_nat
+//                ;
+convert_alpha: ALPHANUMERIC { $$ = convert_alpha_e; }
+       |       ANUM         { $$ = convert_alpha_e; }
                ;
-convert_dst:   convert_fmt HEX
-       |       BYTE
-               ;
-convert_fmt:   ALPHANUMERIC
-       |       ANUM
-       |       NAT
-       |       NATIONAL
+convert_nat:   NAT          { $$ = convert_nat_e; }
+       |       NATIONAL     { $$ = convert_nat_e; }
                ;
+convert_hex:    HEX                 { $$ = convert_hex_e; }
+        |       HEX JUSTIFIED       { $$ = convert_just_e; }
+        |       HEX JUSTIFIED RIGHT { $$ = convert_rjust_e; }
+        ;
 
 numval_locale:  %empty {
                   $$.is_locale = false;
@@ -11280,7 +11447,7 @@ intrinsic_locale:
                 {
                   location_set(@1);
                   $$ = new_alphanumeric();
-                  cbl_refer_t locale(new_literal($locale_name));
+                  cbl_refer_t locale(new_literal(@locale_name, $locale_name));
                   if( ! intrinsic_call_3($$, LOCALE_COMPARE, $r1, $r2, &locale) ) YYERROR;
                 }
 
@@ -11329,9 +11496,9 @@ lopper_case:    LOWER_CASE      { $$ = LOWER_CASE; }
         |       UPPER_CASE      { $$ = UPPER_CASE; }
                 ;
 
-trim_trailing:  %empty          { $$ = new_literal("0"); }  // Remove both
-        |       LEADING         { $$ = new_literal("1"); }  // Remove leading  spaces
-        |       TRAILING        { $$ = new_literal("2"); }  // Remove trailing spaces
+trim_trailing:  %empty          { $$ = new_constant("0"); }  // Remove both
+        |       LEADING         { $$ = new_constant("1"); }  // Remove leading  spaces
+        |       TRAILING        { $$ = new_constant("2"); }  // Remove trailing spaces
         ;
 
 intrinsic0:     CURRENT_DATE {
@@ -11396,7 +11563,7 @@ intrinsic0:     CURRENT_DATE {
                   location_set(@1);
                  // Returns YYYYMMDDhhmmssss-0500)
                   $$ = new_alphanumeric(MAXLENGTH_CALENDAR_DATE, "WHEN-COMPILED"); 
-                 parser_intrinsic_call_0( $$, "__gg__when_compiled" );
+                  parser_intrinsic_call_0( $$, "__gg__when_compiled" );
                 }
                 ;
 
@@ -11558,8 +11725,8 @@ key:            %empty
         |       KEY
                 ;
 
-last:          %empty %prec LAST
-       |       LAST
+last:          %empty { $$ = false; } %prec LAST
+       |       LAST   { $$ = true;  }
                ;
 
 lines:          %empty
@@ -11612,9 +11779,13 @@ is_signed:     %empty      { $$ = true; }
        |       UNSIGNED_kw { $$ = false; }
                ;
 
-start_after:   %empty %prec AFTER
-       |       START AFTER varg
+start_after:   %empty { $$ = nullptr; } %prec AFTER
+       |       START AFTER start_pos { $$ = $start_pos; }
+       |       start_pos             { $$ = $start_pos; }
                ;
+start_pos:      num_constant { $$ = new_reference($1); }
+        |       scalar
+                ;
 
 status:         %empty
         |       STATUS
@@ -11835,19 +12006,22 @@ xmlgen_body:    XMLGENERATE name[id1] FROM name[id2]
                 xmlgen_nameof xmlgen_typeof xmlgen_suppress
                 ;
 
-xmlgen_count:   %empty
-        |       COUNT in name[id3]
+xmlgen_count:   %empty { $$ = nullptr; }
+        |       COUNT in name[id3] { $$ = $id3; }
                 ;
-xmlgen_decl:    %empty
-        |       with XML_DECLARATION with ATTRIBUTES
+xmlgen_decl:    %empty { $$ = {}; }
+        |       with XML_DECLARATION with ATTRIBUTES { $$ = {true, true}; }
+        |       with XML_DECLARATION                 { $$ = {true, false}; }
+        |                            with ATTRIBUTES { $$ = {false, true}; }
                 ;
 xmlgen_namespace:
-                %empty
+                %empty 
         |       NAMESPACE is name[id4] namespace_prefix
                 ;
 namespace_prefix:
-                %empty
-        |       NAMESPACE_PREFIX is namestr[id5]
+                %empty 
+        |       NAMESPACE_PREFIX is name[id5] 
+        |       NAMESPACE_PREFIX is LITERAL[id5] 
                 ;
 xmlgen_nameof:  %empty
         |       NAME of xmlgen_ids
@@ -13089,7 +13263,7 @@ numstr2i( const char input[], radix_t radix ) {
 }
 
 static inline cbl_field_t *
-new_literal( const char initial[], enum radix_t radix ) {
+new_literal( const cbl_loc_t loc, const char initial[], enum radix_t radix ) {
   auto attr = constant_e;
 
   switch( radix ) {
@@ -13102,8 +13276,10 @@ new_literal( const char initial[], enum radix_t radix ) {
     attr = bool_encoded_e;
     break;
   }
-  return new_literal(strlen(initial), initial,
-                     cbl_field_attr_t(constant_e | attr));
+  auto field =  new_literal(strlen(initial), initial,
+                            cbl_field_attr_t(constant_e | attr));
+  symbol_temporary_location(field, loc);
+  return field;
 }
 
 class is_elementary_type { // for INITIALIZE purposes
@@ -13170,8 +13346,11 @@ struct expand_group : public std::list<cbl_refer_t> {
 };
 
 
-static const uint32_t * initial_default_value;
-       const uint32_t * wsclear() { return initial_default_value; }
+static const uint32_t *initial_default_value;
+const uint32_t *
+wsclear() {
+  return initial_default_value;
+}
 
 void
 wsclear( uint32_t i ) {
@@ -13217,7 +13396,7 @@ initialize_one( cbl_num_result_t target, bool with_filler,
       char ach[5];
       int v = *s;
       sprintf(ach, "%d", v);
-      source.field = new_literal(ach);
+      source.field = new_constant(ach);
       source.addr_of = true;
     }
 
@@ -13236,9 +13415,9 @@ initialize_one( cbl_num_result_t target, bool with_filler,
   if( value_category == data_category_all ||
       value_category == data_category_of(tgt) ) {
     // apply any applicable VALUE
-    if( explicitly || tgt.field->data.initial ) {
+    if( explicitly || tgt.field->data.original() ) {
       assert( with_filler || !tgt.field->has_attr(filler_e) );
-      if( tgt.field->data.initial ) {
+      if( tgt.field->data.original() ) {
         parser_initialize(tgt);
       }
     }
@@ -13392,12 +13571,12 @@ initialize_statement( const cbl_num_result_t& target, bool with_filler,
                            size_t first, second;
                            first = second = group_offset(span.first);
                            if( ! span.second ) {
-                             second += std::max(span.first->data.capacity,
+                             second += std::max(span.first->data.capacity(),
                                                 span.first->data.memsize);
                            } else {
                              second = group_offset(span.second)
                                     - group_offset(span.first);
-                             second += std::max(span.second->data.capacity,
+                             second += std::max(span.second->data.capacity(),
                                                 span.second->data.memsize);
                            }
                            return std::make_pair(first, second);
@@ -13565,7 +13744,7 @@ void parser_add_declaratives( size_t n, cbl_declarative_t *declaratives) {
 }
 
 cbl_field_t *
-new_literal( const literal_t& lit, enum cbl_field_attr_t attr ) {
+new_literal( const cbl_loc_t loc, const literal_t& lit, enum cbl_field_attr_t attr ) {
   bool zstring = lit.prefix[0] == 'Z';
   if( !zstring && lit.data[lit.len] != '\0' ) {
     dbgmsg("%s:%d: line %d, no NUL terminator '%-*.*s'{"
@@ -13580,9 +13759,12 @@ new_literal( const literal_t& lit, enum cbl_field_attr_t attr ) {
   attrs |= constant_e;
   attrs |= literal_attr(lit.prefix);
 
-  return new_literal(lit.len, lit.data, cbl_field_attr_t(attrs), lit.encoding);
+  auto field = new_literal(lit.len, lit.data, cbl_field_attr_t(attrs), lit.encoding);
+  symbol_temporary_location(field, loc);
+  return field;
 }
 
+
 bool
 cbl_file_t::validate_forward( size_t isym ) const {
   if( isym > 0 && FldForward == symbol_field_forward(isym)->type ) {
@@ -13690,7 +13872,7 @@ literal_t::set( const cbl_field_t * field ) {
   assert(is_literal(field));
 
   set_prefix( "", 0 );
-  set_data( field->data.capacity,
+  set_data( field->data.capacity(),
             const_cast<char*>(field->data.initial),
             field_index(field) );
   return *this;
@@ -13773,7 +13955,7 @@ const char *
 cbl_field_t::value_str() const {
     if( data.etc_type == cbl_field_data_t::value_e )
        return string_of( data.value_of() );
-    return "???";
+    return data.etc_type_str();
 }
 
 static const cbl_division_t not_syntax_only = cbl_division_t(-1);
@@ -13814,7 +13996,7 @@ cobol_dialect_set( cbl_dialect_t dialect ) {
 static bool internal_ebcdic_locked = false;
 
 void internal_ebcdic_lock() {
-  internal_ebcdic_locked = true;
+////  internal_ebcdic_locked = true;
 }
 void internal_ebcdic_unlock() {
   internal_ebcdic_locked = false;
@@ -13822,11 +14004,6 @@ void internal_ebcdic_unlock() {
 
 bool
 cobol_gcobol_feature_set( cbl_gcobol_feature_t gcobol_feature, bool on ) {
-  if( gcobol_feature == feature_internal_ebcdic_e ) {
-    if( internal_ebcdic_locked ) return false;
-    if( ! on ) gcc_unreachable();
-    current.default_encoding.set(EBCDIC_e);
-  }
   if( on ) {
     cbl_gcobol_features |= gcobol_feature;
   } else {
@@ -13839,6 +14016,8 @@ static bool
 literal_refmod_valid( YYLTYPE loc, const cbl_refer_t& r ) {
   if( r.field->has_attr(any_length_e) ) return true;
 
+  unsigned int nchar = r.field->char_capacity();
+
   const cbl_span_t& refmod(r.refmod);
 
   if( ! is_literal(refmod.from->field) ) {
@@ -13846,7 +14025,7 @@ literal_refmod_valid( YYLTYPE loc, const cbl_refer_t& r ) {
     if( ! is_literal(refmod.len->field) ) return true;
     auto edge = refmod.len->field->as_integer();
     if( 0 < edge ) {
-      if( edge-1 < r.field->data.capacity ) return true;
+      if( edge-1 < nchar ) return true;
     }
     // len < 0 or not: 0 < from + len <= capacity
     error_msg(loc, "%s(%s:%zu) out of bounds, "
@@ -13854,19 +14033,19 @@ literal_refmod_valid( YYLTYPE loc, const cbl_refer_t& r ) {
              r.field->name,
              refmod.from->name(),
              size_t(edge),
-             static_cast<unsigned int>(r.field->data.capacity) );
+             nchar );
     return false;
   }
 
   auto edge = refmod.from->field->as_integer();
   if( edge > 0 ) {
-    if( --edge < r.field->data.capacity ) {
+    if( --edge < nchar ) {
       if( ! refmod.len ) return true;
       if( ! is_literal(refmod.len->field) ) return true;
       auto len = refmod.len->field->as_integer();
       if( len > 0 ) {
        edge += len;
-       if( --edge < r.field->data.capacity ) return true;
+       if( --edge < nchar ) return true;
       }
       // len < 0 or not: 0 < from + len <= capacity
       loc = symbol_field_location(field_index(r.field));
@@ -13875,7 +14054,7 @@ literal_refmod_valid( YYLTYPE loc, const cbl_refer_t& r ) {
                r.field->name,
                size_t(refmod.from->field->as_integer()),
                size_t(len),
-               static_cast<unsigned int>(r.field->data.capacity) );
+               nchar );
       return false;
     }
   }
@@ -13883,7 +14062,7 @@ literal_refmod_valid( YYLTYPE loc, const cbl_refer_t& r ) {
   error_msg(loc,"%s(%zu) out of bounds, size is %u",
            r.field->name,
            size_t(refmod.from->field->as_integer()),
-           static_cast<unsigned int>(r.field->data.capacity) );
+           nchar );
   return false;
 }
 
@@ -14104,3 +14283,91 @@ eval_subject_t::compare( const cbl_refer_t& object,
   return result;
 }
 
+/*
+ * Define a binary field according to USAGE: attr, type, and capacity. 
+ * Return type. 
+ * Do not set initial value; that is up to PICTURE and VALUE.
+ */
+static cbl_field_type_t
+field_binary_usage( YYLTYPE loc, cbl_field_t *field,
+                    cbl_field_type_t type, uint32_t capacity,
+                    bool signable )
+{
+  bool infer = true;
+  
+  // Some binary types have defined capacity;
+  switch(type) {
+    // COMPUTATIONAL and COMP-5 rely on PICTURE.
+  case FldNumericBinary:
+    field->attr |= big_endian_e;
+    __attribute__((fallthrough));
+  case FldNumericBin5:
+    // If no capacity yet, then no picture, infer $comp.capacity.
+    // If field has capacity, ensure USAGE is compatible.
+    if( field->data.capacity() > 0 ) { // PICTURE before USAGE
+      infer = false;
+      auto true_capacity = field->data.capacity();
+      
+      switch( field->type ) {
+      case FldAlphanumeric:   // PIC X COMP-5 or COMP-X
+        assert( field->data.digits == 0 );
+        assert( field->data.rdigits == 0 );
+        dialect_ok(loc, MfCompX, "alphanumeric PICTURE with numeric USAGE");
+        symbol_field_type_update(field, type, true);
+        field->clear_attr(signable_e);
+        // In case PIC X for UTF-16 (say), the X's represent digits.
+        true_capacity /= field->codeset.stride();
+        field->data.capacity(true_capacity);
+        break;
+      case FldNumericDisplay: // PIC 9 COMP-5 or COMP-X
+        if( capacity == 0xFF ) { // comp-x is a bit like comp-5
+          assert( field->data.digits == field->data.capacity() );
+          dialect_ok(loc, MfCompX, "alphanumeric PICTURE with numeric USAGE");
+        }
+        symbol_field_type_update(field, type, true);
+        capacity = type_capacity(field->type, field->data.digits);
+        field->data.capacity(capacity);
+        field->blank_initial(capacity / field->codeset.stride());
+        break;
+      case FldInvalid:
+        symbol_field_type_update(field, type, true);
+        field->data.capacity(capacity);
+        if( signable ) field->set_attr(signable_e);
+        gcc_assert(field->type == FldNumericBin5);
+        break;
+      default:
+        break;
+      }
+    }
+    break;
+  case FldPacked: // comp-6 is unsigned comp-3
+    assert(! signable);  // else PACKED_DECIMAL from scanner
+    field->set_attr(separate_e);
+    dialect_ok(loc, MfComp6, "COMP-6");
+    if( symbol_field_type_update(field, type, true) ) {
+      infer = false;
+      field->set_capacity(type_capacity(field->type, field->data.digits));
+    } else {
+      error_msg(loc, "USAGE PACKED DECIMAL conflicts with PICTURE");
+    }
+    break;
+  default:
+    break;
+  }
+
+  if( infer ) {
+    symbol_field_type_update(field, type, true);
+    if( capacity > 0 ) {
+      if( field->data.capacity() > 0 ) {
+        error_msg(loc, "%s is BINARY type, incompatible with PICTURE",
+                  field->name);
+      }
+      if( signable ) {
+        field->set_attr(signable_e);
+      }
+      field->set_capacity(capacity);
+    }
+  }
+
+  return field->type;
+}
index 54d1f9a358f9c21edfa7c8e70cc75b5fccec2885..12d944f3ab7a4502cdf644729a6abc254b7e9656 100644 (file)
 #include <stack>
 #include <string>
 
-#define MAXLENGTH_FORMATTED_DATE     10
-#define MAXLENGTH_FORMATTED_TIME     19
-#define MAXLENGTH_CALENDAR_DATE      21
-#define MAXLENGTH_FORMATTED_DATETIME 30
+#define MAXLENGTH_FORMATTED_DATE     (10*4)
+#define MAXLENGTH_FORMATTED_TIME     (19*4)
+#define MAXLENGTH_CALENDAR_DATE      (21*4)
+#define MAXLENGTH_FORMATTED_DATETIME (30*4)
 
 #pragma GCC diagnostic push
 #pragma GCC diagnostic ignored "-Wmissing-field-initializers"
@@ -116,26 +116,15 @@ extern int yydebug;
 static const std::set<std::string> compat_programs {
   "CBL_ALLOC_MEM", 
   "CBL_CHECK_FILE_EXIST", 
+  "CBL_CLOSE_FILE",
   "CBL_DELETE_FILE", 
   "CBL_FREE_MEM", 
+  "CBL_GET_PROGRAM_INFO",
+  "CBL_OPEN_FILE",
+  "CBL_READ_FILE",
+  "CBL_WRITE_FILE",
 };
 
-const char *
-consistent_encoding_check( const YYLTYPE& loc, const char input[] ) {
-  cbl_field_t faux = {};
-  faux.type = FldAlphanumeric;
-  faux.data.capacity = capacity_cast(strlen(input));
-  faux.data.initial = input;
-
-  auto s = faux.internalize();
-  if( !s ) {
-    error_msg(loc, "inconsistent string literal encoding for '%s'", input);
-  } else {
-    if( s != input ) return s;
-  }
-  return NULL;
-}
-
 const char * original_picture();
       char * original_number( char input[] = NULL );
 
@@ -184,6 +173,8 @@ enum data_clause_t {
   typedef_clause_e     = 0x8000,
 };
 
+static std::map<data_clause_t,cbl_loc_t> data_clause_locations;
+
 static inline bool
 has_clause( int data_clauses, data_clause_t clause ) {
   return clause == (data_clauses & clause);
@@ -235,11 +226,12 @@ namcpy(const YYLTYPE& loc, cbl_name_t tgt, const char *src ) {
 
 cbl_field_t *
 new_alphanumeric( size_t capacity = MAXIMUM_ALPHA_LENGTH,
-                 const cbl_name_t name = nullptr );
+                 const cbl_name_t name = nullptr,
+                  cbl_encoding_t encoding = no_encoding_e );
 
 static inline cbl_field_t *
-new_alphanumeric( const cbl_name_t name ) {
-  return new_alphanumeric(MAXIMUM_ALPHA_LENGTH, name);
+new_alphanumeric( const cbl_name_t name, cbl_encoding_t encoding = no_encoding_e ) {
+  return new_alphanumeric(MAXIMUM_ALPHA_LENGTH, name, encoding);
 }
 
 static inline cbl_refer_t *
@@ -281,11 +273,10 @@ static inline char * dequote( char input[] ) {
 static const char *
 name_of( cbl_field_t *field ) {
   assert(field);
-  if( field->data.initial == nullptr ) {
-    return field->name;
+  if( field->name[0] == '_' && field->data.initial ) {
+    return field->data.original()? field->data.original() : field->data.initial;
   }
-  return field->name[0] == '_' && field->data.initial?
-    field->data.original() : field->name;
+  return field->name;
 }
 
 static const char *
@@ -712,7 +703,7 @@ class eval_subject_t {
   bool decide( relop_t op, const cbl_refer_t& object, bool invert ) {
     if( pcol == columns.end() ) return false;
     dbgmsg("%s() if not %s goto %s", __func__, result->name, when()->name);
-    
+
     if( compare(op, object, true) ) {
       if( invert ) {
         parser_logop( result, NULL, not_op, result );
@@ -915,8 +906,26 @@ list_add( list<cbl_num_result_t>& list, const cbl_refer_t& refer, int round ) {
   list.push_back(arg);
 }
 
-static  list<cbl_domain_t> domains;
-typedef list<cbl_domain_t>::iterator domain_iter;
+struct domain_t : public cbl_domain_t {
+  cbl_encoding_t encoding;
+  domain_t( cbl_encoding_t encoding, const cbl_domain_t& domain )
+    : cbl_domain_t(domain)
+    , encoding(encoding)
+  {}
+  explicit domain_t( const cbl_domain_t& domain )
+    : cbl_domain_t(domain)
+    , encoding( current_encoding('A') )
+  {}
+  bool encoding_ok( cbl_encoding_t enc ) const {
+    return enc == encoding
+      ||   enc == no_encoding_e
+      ||   encoding == no_encoding_e
+      ||   first.is_numeric
+      ||    last.is_numeric;
+  }
+};
+
+static  list<domain_t> domains;
 
 /*
  * The name queue is a queue of lists of data-item names recognized by the
@@ -965,8 +974,9 @@ struct file_list_t {
     std::copy( that.files.begin(), that.files.end(), files.begin() );
   }
 
-  static size_t symbol_index( cbl_file_t* file ) {
-    return ::symbol_index( symbol_elem_of(file) );
+  static uint64_t symbol_index( cbl_file_t* file ) {
+    uint64_t retval = symbol_unique_index(symbol_elem_of(file));
+    return retval;
   }
 };
 
@@ -1325,10 +1335,12 @@ public:
   cbl_label_t *declaratives_eval, *paragraph, *section;
   const char *collating_sequence;
   struct encoding_t {
+    friend bool cobol_gcobol_feature_set( cbl_gcobol_feature_t gcobol_feature,
+                                          bool on );
     struct encoding_base_t {
       size_t isym;
       cbl_encoding_t encoding;
-      encoding_base_t() : isym(0), encoding(CP1252_e) {}
+      encoding_base_t() : isym(0), encoding(custom_encoding_e) {}
       encoding_base_t(cbl_encoding_t encoding) : isym(0), encoding(encoding) {}
       void set( size_t isym, cbl_encoding_t encoding ) {
         this->isym = isym;
@@ -1339,18 +1351,43 @@ public:
         this->isym = 0;
         this->encoding = encoding;
       }
-
     } alpha, national;
-    encoding_t() : national(EBCDIC_e) {}
+
+    encoding_t() : alpha(alpha_default()), national(national_default()) {}
+
+    bool sizes_ok() const {
+      charmap_t * alp = __gg__get_charmap(alpha.encoding);
+      charmap_t * nat = __gg__get_charmap(national.encoding);
+      return alp->stride() <= nat->stride();
+    }
+
+  protected:
+    /*
+     * Use static default encodings
+     */
+    static cbl_encoding_t alpha_default() {
+      return cbl_field_t::codeset_t::default_encodings.alpha.type;
+    }
+    static cbl_encoding_t national_default() {
+      return cbl_field_t::codeset_t::default_encodings.national.type;
+    }
+
+    // Set static default alpha encoding.
+    // Called only by above friend function in support of ebcdic.
+    static void alpha_default( cbl_encoding_t encoding) {
+      auto alpha = __gg__encoding_iconv_descr(encoding);
+      gcc_assert(alpha);
+      cbl_field_t::codeset_t::default_encodings.alpha = *alpha;
+    }
   } alphabet;
-  
+
   bool locale_add( const cbl_locale_t& locale ) {
     auto e = symbol_locale_add(program_index, &locale);
     assert(e);
     auto p = locales.insert(locale);
     return p.second;
   }
-      
+
   cbl_options_t options;
 
   explicit prog_descr_t( size_t isymbol )
@@ -1629,8 +1666,6 @@ static class current_t {
   rel_part_t antecedent_cache;
 
  public:
-  static prog_descr_t::encoding_t::encoding_base_t default_encoding;
-
   current_t()
     : first_statement(0)
     , in_declaratives(false)
@@ -1726,7 +1761,7 @@ static class current_t {
       tree ena, dcl;
       runtime_t() : ena(nullptr), dcl(nullptr) {}
     } runtime;
-    
+
     bool empty() const {
       return declaratives_list_t::empty();
     }
@@ -1774,7 +1809,7 @@ static class current_t {
                           } );
     }
 
-    std::vector<uint64_t> 
+    std::vector<uint64_t>
     encode() const {
       std::vector<uint64_t> encoded;
       auto p = std::back_inserter(encoded);
@@ -1919,6 +1954,11 @@ static class current_t {
     return programs.top().locale_add(locale);
   }
 
+  static inline const char *
+  cbl_encoding_str( cbl_encoding_t encoding ) {
+    return __gg__encoding_iconv_name(encoding);
+  }
+
   bool new_program ( const YYLTYPE& loc, cbl_label_type_t type,
                      const char name[], const char os_name[],
                      bool common, bool initial )
@@ -1936,14 +1976,20 @@ static class current_t {
     const cbl_label_t *L;
     if( (L = symbol_program_add(parent, &label)) == NULL ) return false;
     prog_descr_t program(symbol_index(symbol_elem_of(L)));
-#if 1 //EBCDIC  // enable when ready
-      auto alpha_encoding =
-        programs.empty()? default_encoding : programs.top().alphabet.alpha;
-      if( alpha_encoding.encoding == EBCDIC_e ) {
-        dbgmsg("%s:%d: We're in EBCDIC", __func__, __LINE__);
-      }
-      program.alphabet.alpha = alpha_encoding;
-#endif
+    auto encoding = current_encoding('A');
+    if( encoding == EBCDIC_e ) {
+      dbgmsg("%s:%d: We're in EBCDIC", __func__, __LINE__);
+    }
+    program.alphabet.alpha = encoding;
+    program.alphabet.national = current_encoding('N');
+
+    if( ! program.alphabet.sizes_ok() ) {
+      error_msg(loc, "Alphanumeric encoding %qs "
+                "cannot be wider than National encoding %qs",
+                cbl_encoding_str(encoding),
+                cbl_encoding_str(program.alphabet.national.encoding));
+    }
+
     programs.push( program );
     programs.apply_pending();
 
@@ -2101,7 +2147,7 @@ static class current_t {
     // assembly language.
     static int eval_count = 1;
     char eval[32], lave[32];
-    
+
     sprintf(eval, "_DECLARATIVES_EVAL%d", eval_count);
     sprintf(lave, "_DECLARATIVES_LAVE%d", eval_count++);
 
@@ -2112,9 +2158,9 @@ static class current_t {
     ast_enter_section(eval_label);
 
     declarative_runtime_match(declaratives.as_list(), lave_label);
-    
+
     parser_label_label(lave_label);
-    
+
     return lave_label;
   }
 
@@ -2122,11 +2168,11 @@ static class current_t {
     std::swap( programs.top().section, section );
     return section;
   }
-  
+
   ec_type_t ec_type_of( file_status_t status ) {
     static std::vector<ec_type_t> ec_by_status {
       /* 0 */ ec_none_e, // ec_io_warning_e if low byte is nonzero
-      /* 1 */ ec_io_at_end_e, 
+      /* 1 */ ec_io_at_end_e,
       /* 2 */ ec_io_invalid_key_e,
       /* 3 */ ec_io_permanent_error_e,
       /* 4 */ ec_io_logic_error_e,
@@ -2186,7 +2232,7 @@ static class current_t {
    * To indicate to the runtime-match function that we want to evaluate
    * only the exception condition, unrelated to a file, we set the
    * file register to 0 and the handled-exception register to the
-   * handled exception condition. 
+   * handled exception condition.
    *
    * declaratives_execute performs the "declarative ladder" produced
    * by declaratives_runtime_match.  That section CALLs the
@@ -2242,8 +2288,6 @@ static class current_t {
   cbl_label_t * compute_label() { return error_labels.compute_error; }
 } current;
 
-prog_descr_t::encoding_t::encoding_base_t current_t::default_encoding;
-
 void current_enabled_ecs( tree ena ) {
   current.declaratives.runtime.ena = ena;
 }
@@ -2266,19 +2310,23 @@ current_options() {
 
 cbl_encoding_t
 current_encoding( char a_or_n ) {
-  cbl_encoding_t retval;
+  cbl_encoding_t encoding;
   switch(a_or_n) {
   case 'A':
-    retval = current.alpha_encoding();
+    encoding = cbl_field_t::codeset_t::default_encodings.alpha.type;
+    if( current.program() )
+      encoding = current.alpha_encoding();
     break;
   case 'N':
-    retval = current.national_encoding();
+    encoding = cbl_field_t::codeset_t::default_encodings.national.type;
+    if( current.program() )
+      encoding = current.national_encoding();
     break;
   default:
     gcc_unreachable();
     break;
   }
-  return retval;
+  return encoding;
 }
 
 size_t
@@ -2340,12 +2388,12 @@ normalize_picture( char picture[] );
 
 static inline cbl_field_t *
 new_tempnumeric(const cbl_name_t name = nullptr, cbl_field_attr_t attr = signable_e ) {
-  return new_temporary(FldNumericBin5, name, attr == signable_e);
+  return new_temporary(FldNumericBin5, name, attr);
 }
 
 static inline cbl_field_t *
 new_tempnumeric(const cbl_field_attr_t attr ) {
-  return new_temporary(FldNumericBin5, nullptr, attr == signable_e);
+  return new_temporary(FldNumericBin5, nullptr, attr);
 }
 
 static inline cbl_field_t *
@@ -2370,13 +2418,7 @@ literal_refmod_valid( YYLTYPE loc, const cbl_refer_t& r );
 static bool
 is_integer_literal( const cbl_field_t *field ) {
   if( field->type == FldLiteralN ) {
-    size_t nchar;
-    const char *initial = __gg__iconverter(field->codeset.encoding,
-                                           DEFAULT_SOURCE_ENCODING,
-                                           field->data.initial,
-                                           strlen(field->data.initial),
-                                           &nchar);
-    assert(strlen(initial) == nchar);
+    const char *initial = field->data.original();
     switch( *initial ) {
     case '-': case '+': ++initial;
     }
@@ -2384,7 +2426,7 @@ is_integer_literal( const cbl_field_t *field ) {
     const char *eos = initial + strlen(initial);
     auto p = std::find_if_not( initial, eos, fisdigit );
     if( p == eos ) return true;
-    
+
     if( *p++ == symbol_decimal_point() ) {
       switch( *p++ ) {
       case 'E': case 'e':
@@ -2563,9 +2605,15 @@ intrinsic_call_4( cbl_field_t *tgt, int token,
  */
 
 static inline cbl_field_t *
-new_literal( const char initial[] ) {
+new_constant( const char initial[] ) {
   return new_literal( strlen(initial), initial );
 }
+static inline cbl_field_t *
+new_literal( const cbl_loc_t loc, const char initial[] ) {
+  auto field = new_constant(initial);
+  symbol_temporary_location(field, loc);
+  return field;
+}
 
 cbl_refer_t *
 negate( cbl_refer_t * refer, bool neg = true ) {
@@ -2578,7 +2626,7 @@ negate( cbl_refer_t * refer, bool neg = true ) {
 
 cbl_field_t *
 conditional_set( cbl_field_t *tgt, bool tf ) {
-  static cbl_field_t *one = new_literal("1");
+  static cbl_field_t *one = new_constant("1");
 
   enum relop_t op = tf? eq_op : ne_op;
   parser_relop( tgt, one, op, one );
@@ -2615,7 +2663,7 @@ symbol_find( const std::list<const char *>& names ) {
 }
 
 static inline cbl_field_t *
-field_find( const std::list<const char *>& names ) {
+field_find( cbl_loc_t loc, const std::list<const char *>& names ) {
   if( names.size() == 1 ) {
     auto value = cdf_value(names.front());
     if( value ) {
@@ -2624,7 +2672,7 @@ field_find( const std::list<const char *>& names ) {
         field = new_tempnumeric();
         parser_set_numeric(field, value->as_number());
       } else {
-        field = new_literal(value->string);
+        field = new_literal(loc, value->string);
       }
       return field;
     }
@@ -2732,9 +2780,11 @@ valid_redefine( const YYLTYPE& loc,
         dbgmsg( "size error redef: %s", field_str(field) );
         error_msg(loc, "%s (%s size %u) larger than REDEFINES %s (%s size %u)",
                   field->name,
-                  3 + cbl_field_type_str(field->type), field->size(),
+                  3 + cbl_field_type_str(field->type),
+                  field->size()/field->codeset.stride(),
                   orig->name,
-                  3 + cbl_field_type_str(orig->type), orig->size() );
+                  3 + cbl_field_type_str(orig->type),
+                  orig->size()/field->codeset.stride() );
       }
     }
   }
@@ -2764,26 +2814,41 @@ valid_redefine( const YYLTYPE& loc,
   return true;
 }
 
+#if 0
 static void
 field_value_all(struct cbl_field_t * field ) {
   // Expand initial by repeating its contents until it is of length capacity:
   assert(field->data.initial != NULL);
   size_t initial_length = strlen(field->data.initial);
-  char *new_initial = static_cast<char*>(xmalloc(field->data.capacity + 1));
+  char *new_initial =
+          static_cast<char*>(xmalloc(field->data.capacity()/
+                                     field->codeset.stride() + 1));
   size_t i = 0;
-  while(i < field->data.capacity) {
+
+  while(i < field->data.capacity()/field->codeset.stride()) {
     new_initial[i] = field->data.initial[i%initial_length];
     i += 1;
   }
-  new_initial[field->data.capacity] = '\0';
+  new_initial[field->data.capacity()/field->codeset.stride()] = '\0';
   free(const_cast<char *>(field->data.initial));
   field->data.initial = new_initial;
 }
+#endif
+
+static cbl_field_t *
+parent_has_picture( cbl_field_t *field ) {
+  while( (field = parent_of(field)) != NULL ) {
+    if( symbol_redefines(field) ) return nullptr;
+    if( field->data.initial ) break; // initial create by PICTURE clause, usually
+  }
+  return field;
+}
 
 static cbl_field_t *
 parent_has_value( cbl_field_t *field ) {
   while( (field = parent_of(field)) != NULL ) {
-    if( field->data.initial ) break;
+    if( symbol_redefines(field) ) return nullptr;
+    if( field->data.original() ) break;
   }
   return field;
 }
@@ -2922,63 +2987,99 @@ field_capacity_error( const YYLTYPE& loc, const cbl_field_t *field ) {
   uint32_t parent_capacity = 0;
   if( field->parent ) {
     auto e = symbol_at(field->parent);
-    if( e->type == SymField ) parent_capacity = cbl_field_of(e)->data.capacity;
-  }
-  /*
-   * Field may become a table whose capacity was inherited from a parent with
-   * data. If so, the field's capacity will be overwritten by its
-   * PICTURE-defined size.
-   */
-  if( parent_capacity < field->data.capacity && !symbol_redefines(field) ) {
-    dbgmsg( "%s: %s", __func__, field_str(field) );
-    error_msg(loc,  "%s has USAGE incompatible with PICTURE",
-              field->name );
-    return true;
+    if( e->type == SymField ) parent_capacity = cbl_field_of(e)->data.capacity();
+    /*
+     * Field may become a table whose capacity was inherited from a parent with
+     * data. If so, the field's capacity will be overwritten by its
+     * PICTURE-defined size.
+     */
+    if( parent_capacity < field->data.capacity() && !symbol_redefines(field) ) {
+      dbgmsg( "%s: %s", __func__, field_str(field) );
+      error_msg(loc,  "%s has USAGE incompatible with PICTURE",
+                field->name );
+      return true;
+    }
   }
   return false;
 }
 #define ERROR_IF_CAPACITY(L, F)                                 \
   do { if( field_capacity_error(L, F) ) YYERROR; } while(0)
 
-static const char *
-blank_pad_initial( const char initial[], size_t capacity, size_t new_size ) {
-  assert(capacity < new_size);
-  assert(initial != NULL);
-
-  if( normal_value_e != cbl_figconst_of(initial) ) return initial;
+template <typename T>
+static void
+blankit( T* beg, size_t n, T ch ) {
+  std::fill(beg, beg + n, ch);
+}
 
-  auto p = reinterpret_cast<char *>( xmalloc(2 + new_size) );
-  memset(p, 0x20, new_size);
-  memcpy(p, initial, capacity);
-  p[new_size] = '\0'; // for debugging
-  p[++new_size] = '\0'; // for debugging
-  return p;
+/*
+ * Normally blank_initial takes just a length argument and initializes
+ * data.initial to all blanks according to the field's encoding.  Optionally it
+ * applies a figurative constant and uses that instead. 
+ */
+void
+cbl_field_t::blank_initial( size_t nchar, cbl_figconst_t figconst ) {
+  charmap_t *charmap = __gg__get_charmap(codeset.encoding);
+  cbl_char_t space_char = figconst == normal_value_e?
+    charmap->mapped_character(ascii_space)
+  : charmap->figconst_character(figconst);
+  
+  size_t nbyte = nchar * codeset.stride();
+  char *init = static_cast<char *>(xmalloc(nbyte+4));
+  char *enit = init + nbyte;
+  std::fill(enit, enit + 4, '\0'); // append for NULs
+  
+  switch(codeset.stride()) {
+  case 1: 
+    blankit( reinterpret_cast<uint8_t*>(init), nchar, uint8_t(space_char) );
+    break;
+  case 2:
+    blankit( reinterpret_cast<uint16_t*>(init), nchar, uint16_t(space_char) );
+    break;
+  case 4:
+    blankit( reinterpret_cast<uint32_t*>(init), nchar, uint32_t(space_char) );
+    break;
+  default:
+    gcc_unreachable();
+  }
+  data.initial = init;
 }
 
 /*
- * When cbl_field_t::internalize is called, its data.initial value has been
- * set, but nothing has been done to it.  It is encoded according to the source
- * code.  internalize() converts data.initial to the field's encoding. 
- *
- * If syntax used was was PIC VALUE, in that order, then PIC set the field's
- * encoding, and the VALUE clause can verify that its encoding matches.  If the
- * order was VALUE PIC, the value leaves the encoding uninitialized unless the
- * value string bore an encoding prefix.  When PIC is processed, codeset_t::set
- * allows it to set the encoding only if it's either uninitialized, or the PIC
- * encoding matches the existing one set by VALUE.  In no event does one
- * override the other; they must agree.
- *
- * internalize() fails if data.initial cannot be converted to the field's
- * encoding.
+ * When called, data.nbyte, if nonzero, holds the length of data.orig.data.
+ * Set data.capacity to its correct size, and create data.initial as all
+ * blanks, based on that size.  Then encode the original string into
+ * data.initial, preserving any trailing blanks.
  */
-static void
-value_encoding_check( const YYLTYPE& loc, cbl_field_t *field ) {
-  if( ! field->internalize() ) {
-    error_msg(loc, "inconsistent string literal encoding for '%s'",
-              field->data.initial);
+void
+cbl_field_t::set_initial( size_t nchar, const cbl_loc_t& loc ) {
+  auto srclen = data.capacity(); 
+  set_capacity(nchar);
+  blank_initial( char_capacity() );
+  if( data.original() ) {
+    attr |= cbl_figconst_of(data.original());
+    if( has_attr(hex_encoded_e) ) {
+      // If initial value is too long, the caller should report it. 
+      auto len = std::min(srclen, data.capacity());
+      std::copy(data.original(), data.original() + len,
+                const_cast<char*>(data.initial));
+    } else {
+      if( 0 < data.capacity() ) {
+        encode(srclen, loc);
+      }
+    }
   }
 }
 
+/*
+ * When called without a length, set_initial determines the character count
+ * from the current size, established by the size of the VALUE string or
+ * literal.
+ */
+void
+cbl_field_t::set_initial( const cbl_loc_t& loc ) {
+  set_initial( data.capacity(), loc );
+}
+
 #pragma GCC diagnostic push
 #pragma GCC diagnostic ignored "-Wmissing-field-initializers"
 
@@ -2987,7 +3088,7 @@ field_alloc( const YYLTYPE& loc, cbl_field_type_t type, size_t parent, const cha
   static const uint32_t level = 0;
   cbl_field_t *f, field = { type, 0, cbl_field_data_t(), level, name, yylineno };
   field.parent = parent;
-  
+
   f = field_add(loc, &field);
   assert(f);
   return f;
@@ -3111,9 +3212,9 @@ parser_move_carefully( const char */*F*/, int /*L*/,
 
     if( is_index ) {
       if( tgt.field->type != FldIndex && src.field->type != FldIndex) {
-        error_msg(src.loc, "invalid SET %s (%s) TO %s (%s): not a field index",
-                  tgt.field->name, cbl_field_type_str(tgt.field->type),
-                  src.field->name, cbl_field_type_str(src.field->type));
+        error_msg(src.loc, "invalid SET %qs (%s) TO %qs (%s): not a field index",
+                  name_of(tgt.field), 3 + cbl_field_type_str(tgt.field->type),
+                  name_of(src.field), 3 + cbl_field_type_str(src.field->type));
         delete tgt_list;
         return false;
       }
@@ -3122,37 +3223,10 @@ parser_move_carefully( const char */*F*/, int /*L*/,
         if( src.field->type == FldPointer &&
             tgt.field->type == FldPointer ) {
           dialect_ok(src.loc, MfMovePointer, "MOVE POINTER");
-        }
-        if( ! is_index ) {
-          char ach[16];
-          char stype[32];
-          char dtype[32];
-          strcpy(stype, cbl_field_type_str(src.field->type));
-          strcpy(dtype, cbl_field_type_str(tgt.field->type));
-
-          if( src.field->attr & all_alpha_e )
-            {
-            strcpy(stype, "FldAlphabetic");
-            }
-          if( tgt.field->attr & all_alpha_e )
-            {
-            strcpy(dtype, "FldAlphabetic");
-            }
-          if( !(src.field->attr & scaled_e) && src.field->data.rdigits )
-            {
-            sprintf(ach, ".%d", src.field->data.rdigits);
-            strcat(stype, ach);
-            }
-          if( !(tgt.field->attr & scaled_e) && tgt.field->data.rdigits )
-            {
-            sprintf(ach, ".%d", tgt.field->data.rdigits);
-            strcat(dtype, ach);
-            }
-          error_msg(src.loc,  "cannot MOVE '%s' (%s) to '%s' (%s)",
-                    name_of(src.field), stype,
-                    name_of(tgt.field), dtype);
-          delete tgt_list;
-          return false;
+        } else {
+          error_msg(src.loc, "cannot MOVE %qs (%s) TO %qs (%s)",
+                    nice_name_of(src.field), 3 + cbl_field_type_str(src.field->type),
+                    nice_name_of(tgt.field), 3 + cbl_field_type_str(tgt.field->type));
         }
       }
     }
@@ -3291,8 +3365,8 @@ data_division_ready() {
   if( (nsymbol = symbols_update(nsymbol, nparse_error == 0)) > 0 ) {
     if( ! literally_one ) {
       // Use strdup so cbl_field_t::internalize can free them if need be.
-      literally_one = new_literal(xstrdup("1"));
-      literally_zero = new_literal(xstrdup("0"));
+      literally_one = new_constant(xstrdup("1"));
+      literally_zero = new_constant(xstrdup("0"));
     }
   }
 
@@ -3318,7 +3392,7 @@ anybody_redefines( const cbl_field_t *tree )
       break;
       }
     // cppcheck-suppress [unreadVariable] obviously not true
-    tree = parent_of(tree);   
+    tree = parent_of(tree);
     }
   return retval;
   }
@@ -3371,7 +3445,7 @@ procedure_division_ready( YYLTYPE loc, cbl_field_t *returning, ffi_args_t *ffi_a
     apply_cdf_turn(exception_turn);
   }
   exception_turns.clear();
-  
+
   // Start the Procedure Division.
   size_t narg = ffi_args? ffi_args->elems.size() : 0;
   std::vector <cbl_ffi_arg_t> args(narg);
@@ -3534,8 +3608,8 @@ file_section_parent_set( cbl_field_t *field ) {
     auto file = cbl_file_of(symbol_at(file_section_fd));
     auto record_area = cbl_field_of(symbol_at(file->default_record));
 
-    record_area->data.capacity = std::max(record_area->data.capacity,
-                                                field->data.capacity);
+    record_area->data.capacity( std::max(record_area->data.capacity(),
+                                         field->data.capacity()) );
 
     field->file = file_section_fd;
     const auto redefined = symbol_redefines(record_area);
@@ -3557,6 +3631,11 @@ ast_file_status_between( file_status_t lower, file_status_t upper );
 void internal_ebcdic_lock();
 void internal_ebcdic_unlock();
 
+static cbl_field_type_t
+field_binary_usage( YYLTYPE loc, cbl_field_t *field,
+                    cbl_field_type_t type, uint32_t capacity,
+                    bool signable ); 
+
 void
 ast_end_program(const char name[]  ) {
   std::for_each( symbols_begin(), symbols_end(),
@@ -3604,7 +3683,7 @@ goodnight_gracie() {
   return true;
 }
 
-// false after USE statement, to enter Declarative with EC intact. 
+// false after USE statement, to enter Declarative with EC intact.
 static bool statement_cleanup = true;
 static YYLTYPE current_location;
 
index 75b2f3c7d2a4fefbede0b8ecaddca20f53f99aa6..f730fc2dda917e42505ba427906ed756bc2633f2 100644 (file)
@@ -94,6 +94,14 @@ PICTURE   [^[:space:]]+
 DOTSEP    [.]+[[:space:]]
 DOTEOL    [[:blank:]]*[.]{BLANK_EOL}
 
+         /*
+          * "If the symbol ',' or the symbol '.' is the last symbol of
+          *  character-string-1, the PICTURE clause shall be the last clause
+          *  of the data description entry and shall be followed immediately
+          *  (without an intervening separator space) by the separator period."
+          */
+NONPIC   [,]{DOTSEP} 
+
 SKIP     [[:blank:]]*SKIP[123][[:blank:]]*[.]?{BLANK_EOL}
 TITLE    [[:blank:]]*TITLE($|[.]|[^\n]*)
 
@@ -316,6 +324,7 @@ BACKGROUND-COLOR            { return BACKGROUND_COLOR; }
 BELL                           { return BELL; }
 BINARY-ENCODING                        { return BINARY_ENCODING; }
 BLINK                          { return BLINK; }
+BYTE                           { return BYTE; }
 CAPACITY                       { return CAPACITY; }
 
 CENTER         {
@@ -1030,8 +1039,7 @@ USE({SPC}FOR)?            { return USE; }
 
   INDEX                                { return INDEX; }
   MESSAGE-TAG                  { not_implemented("USAGE type: MESSAGE-TAG"); }
-  NATIONAL                     { not_implemented("USAGE type: NATIONAL");
-                                 return NATIONAL; }
+  NATIONAL                     { return NATIONAL; }
   OBJECT{SPC}REFERENCE         { not_implemented("USAGE type: OBJECT REFERENCE"); }
 
   PACKED-DECIMAL               { return PACKED_DECIMAL; }
@@ -1099,6 +1107,7 @@ USE({SPC}FOR)?            { return USE; }
   LEADING              { return LEADING; }
   LEFT                 { return LEFT; }
   MODE                 { return MODE; }
+  NO                   { return NO; }
   OCCURS/{SPC}{NAME}   { return OCCURS; }
   OCCURS               { yy_push_state(integer_count);  return OCCURS; }
   OF                   { return OF; }
@@ -1130,6 +1139,7 @@ USE({SPC}FOR)?            { return USE; }
   VARYING              { return VARYING; }
   VOLATILE             { return VOLATILE; }
   WHEN                 { return WHEN; }
+  WITH                 { return WITH; }
 
   COPY                 {
                          yy_push_state(copy_state);
@@ -1242,7 +1252,7 @@ USE({SPC}FOR)?            { return USE; }
                          yylval.string = xstrdup(yytext); return picset(ALNUM); }
   {ALNUM}/[(]{NAME}[)] { yy_push_state(picture_count);
                          yylval.string = xstrdup(yytext); return picset(ALNUM); }
-  {ALNUM}              { yylval.string = xstrdup(yytext); return picset(ALNUM); }
+  {ALNUM}/{NONPIC}?    { yylval.string = xstrdup(yytext); return picset(ALNUM); }
 
   {ALPHED}             { yylval.string = xstrdup(yytext); return picset(ALPHED); }
   {NUMEDITED}          { yylval.string = xstrdup(yytext); return picset(NUMED); }
@@ -1830,6 +1840,7 @@ B-SHIFT-RC
   BIT-OF/{NONWORD}                     { pop_return BIT_OF; }
   BIT-TO-CHAR/{NONWORD}                        { pop_return BIT_TO_CHAR; }
   BOOLEAN-OF-INTEGER/{NONWORD}         { pop_return BOOLEAN_OF_INTEGER; }
+  BYTE/{NONWORD}                       { pop_return BYTE; }
   BYTE-LENGTH/{NONWORD}                        { pop_return BYTE_LENGTH; }
   CHAR-NATIONAL/{NONWORD}              { pop_return CHAR_NATIONAL; }
   CHAR/{NONWORD}                       { pop_return CHAR; }
index e75bb383a71c825cd7b99fa3dbb1e27be667fa39..d2db44b13038e09281deb1890a33a13871f38aa0 100644 (file)
@@ -120,6 +120,7 @@ static bool nonspace( char ch ) { return !ISSPACE(ch); }
 
 static int
 numstr_of( const char string[], radix_t radix = decimal_e ) {
+  yylval.numstr.is_float = false;
   yylval.numstr.radix = radix;
   ydflval.string = yylval.numstr.string = xstrdup(string);
   char *comma = strchr(yylval.numstr.string, ',');
@@ -185,6 +186,7 @@ numstr_of( const char string[], radix_t radix = decimal_e ) {
         return NO_CONDITION;
       }
     }
+    yylval.numstr.is_float = true;
   }
   if( 1 < std::count(input, eoinput, symbol_decimal_point()) ) {
     error_msg(yylloc, "invalid numeric literal %qs", ++p);
@@ -1165,13 +1167,13 @@ typed_name( const char name[] ) {
     {
       auto f = cbl_field_of(e);
       if( is_constant(f) ) {
-       if(  f->data.initial ) {
-         int token = cbl_figconst_tok(f->data.initial);
+       if(  f->data.original() ) {
+         int token = cbl_figconst_tok(f->data.original());
          if( token ) return token;
        }
-        int token = datetime_format_of(f->data.initial);
+        int token = datetime_format_of(f->data.original());
         if( token ) {
-          yylval.string = xstrdup(f->data.initial);
+          yylval.string = xstrdup(f->data.original());
           return token;
         }
       }
@@ -1183,7 +1185,7 @@ typed_name( const char name[] ) {
       if( type == FldLiteralN ) {
         yylval.numstr.radix =
           f->has_attr(hex_encoded_e)? hexadecimal_e : decimal_e;
-        yylval.numstr.string = xstrdup(f->data.initial);
+        yylval.numstr.string = xstrdup(f->data.original());
         return NUMSTR;
       }
       if( !f->has_attr(record_key_e) ) { // not a key-name literal
index a5f1467dfaa18cdccda501eb8ac20100a1084b54..3b4adad180adfd4b76d7774b32ae6a7ce44113db 100644 (file)
@@ -117,7 +117,7 @@ extern bool cursor_at_sol;
                 fprintf(stderr, "%s", (b)->name); \
                 if( (b)->type == FldLiteralA || (b)->type == FldLiteralN ) \
                     { \
-                    fprintf(stderr, " \"%s\"", (b)->data.initial); \
+                    fprintf(stderr, " \"%s\"", (b)->data.original()); \
                     } \
                 else \
                     { \
@@ -143,8 +143,8 @@ extern bool cursor_at_sol;
                     size_t nbytes; \
                     const char *literal = __gg__iconverter((b).field->codeset.encoding, \
                                                            DEFAULT_SOURCE_ENCODING, \
-                                                           (b).field->data.initial, \
-                                                           strlen((b).field->data.initial), \
+                                                           (b).field->data.original(), \
+                                                           strlen((b).field->data.original()), \
                                                            &nbytes); \
                     fprintf(stderr, " \"%s\"", literal); \
                     } \
@@ -334,7 +334,7 @@ extern bool cursor_at_sol;
       else if( b->type == FldLiteralN ) \
         { \
         gg_fprintf(trace_handle, 1, " attr 0x%lx",  build_int_cst_type(SIZE_T, b->attr)); \
-        gg_fprintf(trace_handle, 1, " c:o:d:r %ld", build_int_cst_type(SIZE_T, b->data.capacity)); \
+        gg_fprintf(trace_handle, 1, " c:o:d:r %ld", build_int_cst_type(SIZE_T, b->data.capacity())); \
         gg_fprintf(trace_handle, 1, ":%ld",         build_int_cst_type(SIZE_T, b->offset)); \
         gg_fprintf(trace_handle, 1, ":%d",          build_int_cst_type(INT,    b->data.digits)); \
         gg_fprintf(trace_handle, 1, ":%d",         build_int_cst_type(INT,    b->data.rdigits)); \
@@ -400,7 +400,7 @@ extern bool cursor_at_sol;
       else if( (b).field->type == FldLiteralN ) \
         { \
         gg_fprintf(trace_handle, 1, " attr 0x%lx",  build_int_cst_type(SIZE_T, (b).field->attr)); \
-        gg_fprintf(trace_handle, 1, " c:o:d:r %ld", build_int_cst_type(SIZE_T, (b).field->data.capacity)); \
+        gg_fprintf(trace_handle, 1, " c:o:d:r %ld", build_int_cst_type(SIZE_T, (b).field->data.capacity())); \
         gg_fprintf(trace_handle, 1, ":%ld",         build_int_cst_type(SIZE_T, (b).field->offset)); \
         gg_fprintf(trace_handle, 1, ":%d",          build_int_cst_type(INT,    (b).field->data.digits)); \
         gg_fprintf(trace_handle, 1, ":%d)",         build_int_cst_type(INT,    (b).field->data.rdigits)); \
index 2b13b1fa0a623e20d5ea22996f4385b2c8ba9655..37875d22ae0440184320f3b1cf0aea5a280f9676 100644 (file)
@@ -216,15 +216,15 @@ create_cblc_file_t()
     // When doing FILE I/O, you need the cblc_file_t structure
 
     /*
-typedef struct cblc_file_t
+typedef struct cblc_file_t*
     {
     char                *name;             // This is the name of the structure; might be the name of an environment variable
-    size_t               symbol_index;     // The symbol table index of the related cbl_file_t structure
+    uint64_t             symbol_index;     // The symbol table index of the related cbl_file_t structure
     char                *filename;         // The name of the file to be opened
     FILE                *file_pointer;     // The FILE *pointer
     cblc_field_t        *default_record;   // The record_area
-    size_t               record_area_min;  // The size of the smallest 01 record in the FD
-    size_t               record_area_max;  // The size of the largest  01 record in the FD
+    size_t               record_area_min;  // The size of the smallest 01 record in the FD, in characters
+    size_t               record_area_max;  // The size of the largest  01 record in the FD, in characters
     cblc_field_t       **keys;             // For relative and indexed files.  The first is the primary key. Null-terminated.
     int                 *key_numbers;      // One per key -- each key has a number. This table is key_number + 1
     int                 *uniques;          // One per key
@@ -243,7 +243,8 @@ typedef struct cblc_file_t
     int                  errnum;           // most recent errno; can't reuse "errno" as the name
     file_status_t        io_status;        // See 2014 standard, section 9.1.12
     int                  padding;          // Actually a char
-    int                  delimiter;        // ends a record; defaults to '\n'.
+    cbl_char_t           delimiter;        // ends a record; defaults to '\n'.
+    int                  stride();         // width of a character
     int                  flags;            // cblc_file_flags_t
     int                  recent_char;      // This is the most recent char sent to the file
     int                  recent_key;
@@ -258,7 +259,7 @@ typedef struct cblc_file_t
     retval = gg_get_filelevel_struct_type_decl( "cblc_file_t",
                                             33,
                                             CHAR_P,    "name",
-                                            SIZE_T,    "symbol_table_index",
+                                            ULONGLONG, "symbol_table_index",
                                             CHAR_P,    "filename",
                                             FILE_P,    "file_pointer",
                                             cblc_field_p_type_node, "default_record",
@@ -282,9 +283,10 @@ typedef struct cblc_file_t
                                             INT,       "errnum",
                                             INT,       "io_status",
                                             INT,       "padding",
-                                            INT,       "delimiter",
+                                            UINT,      "delimiter",
+                                            INT,       "stride",
                                             INT,       "flags",
-                                            INT,       "recent_char",
+                                            UINT,      "recent_char",
                                             INT,       "recent_key",
                                             INT,       "prior_op",
                                             INT,       "encoding", // Actually cbl_encoding_t
index a177fcdfa62be60b421c7a871d01748f3bddbf9a..a94ef8bddfa84b1ebd0a1dd52123eec6220ac804 100644 (file)
@@ -162,7 +162,7 @@ symbol_table_extend() {
 
   if( 0 != ftruncate(symbols.fd, len) ) {
     cbl_err( "%s:%d: could not extend symbol table to %lu elements",
-            __func__, __LINE__, gb4(symbols.capacity));
+             __func__, __LINE__, gb4(symbols.capacity));
   }
 
   /*
@@ -263,6 +263,11 @@ cbl_ffi_arg_t( cbl_ffi_crv_t crv,
   error_msg(loc, __VA_ARGS__);                                \
  } while(0)
 
+#define WARNING_FIELD(F, ...)                                \
+ do{                                                        \
+  auto loc = symbol_field_location(field_index(F));        \
+  warn_msg(loc, __VA_ARGS__);                                \
+ } while(0)
 
 static const struct cbl_occurs_t nonarray = cbl_occurs_t();
 
@@ -278,22 +283,18 @@ class group_size_t {
  public:
   group_size_t() : size(0) {}
   group_size_t& operator+( const cbl_field_t& field ) {
-    size += field.data.capacity;
+    size += field.data.capacity();
     return *this;
   }
   size_t capacity() const { return size; }
 };
 
-enum  { constq = constant_e | quoted_e };
+#define constq (constant_e | quoted_e)
 
 static symbol_elem_t
 elementize( const cbl_field_t& field ) {
   symbol_elem_t sym (SymField);
   sym.elem.field = field;
-  // Dubner did the following because he didn't feel like creating yet another
-  // cbl_field_t constructor that included the hardcoded encoding for the
-  // global special registers.
-  sym.elem.field.codeset.set();
   return sym;
 }
 
@@ -747,7 +748,7 @@ symbol_explicitly_redefines( const cbl_field_t *field ) {
 static uint32_t
 field_size( const struct cbl_field_t *field ) {
   size_t n = field->occurs.ntimes();
-  return field->data.capacity * (n > 0? n : 1);
+  return field->data.capacity() * (n > 0? n : 1);
 }
 
 const char *
@@ -807,28 +808,18 @@ cbl_field_t::size() const {
 
 uint64_t
 cbl_field_t::set_attr( cbl_field_attr_t attr ) {
-  if( attr == signable_e ) {
-    if( ! has_attr(attr) && this->var_decl_node != NULL ) {
-      parser_field_attr_set(this, attr);
-    }
-  }
   return this->attr |= uint64_t(attr);
 }
 
 uint64_t
 cbl_field_t::clear_attr( cbl_field_attr_t attr ) {
-  if( attr == signable_e ) {
-    if( this->var_decl_node != nullptr && has_attr(attr) ) {
-      parser_field_attr_set(this, attr, false);
-    }
-  }
   return this->attr &= ~uint64_t(attr);
 }
 
 static uint32_t
 field_memsize( const struct cbl_field_t *field ) {
   uint32_t n = field->occurs.ntimes();
-  n = field->data.capacity * (n > 0? n : 1);
+  n = field->data.capacity() * (n > 0? n : 1);
   return std::max(n, field->data.memsize);
 }
 
@@ -1032,6 +1023,11 @@ symbol_find_odo( const cbl_field_t * field ) {
 static inline bool
 is_index( const cbl_field_type_t type ) { return type == FldIndex; }
 
+static inline const char *
+cbl_encoding_str( cbl_encoding_t encoding ) {
+  return __gg__encoding_iconv_name(encoding);
+}
+
 static size_t
 symbols_dump( size_t first, bool header ) {
   size_t ninvalid = 0;
@@ -1044,7 +1040,7 @@ symbols_dump( size_t first, bool header ) {
   }
 
   for( struct symbol_elem_t *e = symbols_begin(first); e < symbols_end(); e++ ) {
-    char *s;
+    char *s = nullptr;
 
     switch(e->type) {
     case SymFilename:
@@ -1115,7 +1111,15 @@ symbols_dump( size_t first, bool header ) {
         free(part);
       }
       break;
-    default:
+    case SymLocale:
+      s = xasprintf("%4" GCC_PRISZ "u %-18s %s %s collation %s", (fmt_size_t)e->program,
+                    "Locale",
+                    e->elem.locale.name,
+                    cbl_encoding_str(e->elem.locale.encoding),
+                    e->elem.locale.collation );
+      break;
+    }
+    if( ! s ) {
       dbgmsg("%s: cannot dump symbol type %d", __func__, e->type);
       continue;
     }
@@ -1181,9 +1185,9 @@ static struct symbol_elem_t *
 
   if( is_elementary(group->type) ) { // "group" is in fact just a field
     if( is_record_area(group) ) {
-      if( group->data.capacity == 0 ) {
+      if( group->data.capacity() == 0 ) {
         const auto& file = *cbl_file_of(symbol_at(group->file));
-        group->data.capacity = file.varying_size.max;
+        group->data.capacity(  file.varying_size.max );
       }
 
       // Find 01s for the file that is not a record area field.
@@ -1204,8 +1208,8 @@ static struct symbol_elem_t *
           assert(record->level == 1);
           e = calculate_capacity(p);
           auto record_size = std::max(record->data.memsize,
-                                      record->data.capacity);
-          group->data.capacity = std::max(group->data.capacity, record_size);
+                                      record->data.capacity());
+          group->data.capacity( std::max(group->data.capacity(), record_size) );
         }
       }
 
@@ -1213,8 +1217,8 @@ static struct symbol_elem_t *
       // Reach back to that symbol to set its capacity, if need be.
       auto area = symbol_redefines(group);
       if( area ) {
-        area->data.capacity = std::max(area->data.capacity,
-                                      group->data.capacity);
+        area->data.capacity(  std::max(area->data.capacity(),
+                                       group->data.capacity()) );
       }
 
       return e; // no 01, return self
@@ -1224,7 +1228,7 @@ static struct symbol_elem_t *
 
     if( redefined ) {
       redefined->data.memsize = std::max(field_memsize(redefined), field_size(group));
-      if( redefined->data.memsize == redefined->data.capacity ) {
+      if( redefined->data.memsize == redefined->data.capacity() ) {
         redefined->data.memsize = 0;
       }
     }
@@ -1240,7 +1244,7 @@ static struct symbol_elem_t *
 
   assert(group->type == FldGroup);
 
-  group->data.capacity = 0;
+  group->data.capacity(0);
 
   std::list<cbl_field_t*> members;
 
@@ -1278,20 +1282,20 @@ static struct symbol_elem_t *
 
       field->data.memsize = 0;
 
-      if( redefined->data.memsize == redefined->data.capacity ) {
+      if( redefined->data.memsize == redefined->data.capacity() ) {
         redefined->data.memsize = 0;
       }
       continue;
     }
-    group->data.capacity += field_size(field);
+    group->data.add_capacity( field_size(field) );
     group->data.memsize += field_memsize(field);
 
     // If group has a parent that is a record area, expand it, too.
     if( 0 < group->parent ) {
       redefined = symbol_redefines(group);
       if( redefined && is_record_area(redefined) ) {
-        if( redefined->data.capacity < group->data.memsize ) {
-          redefined->data.capacity = group->data.memsize;
+        if( redefined->data.capacity() < group->data.memsize ) {
+          redefined->data.capacity( group->data.memsize );
         }
       }
     }
@@ -1303,16 +1307,16 @@ static struct symbol_elem_t *
   }
 
   group->data.memsize = std::max(max_memsize, group->data.memsize);
-  if( group->data.memsize == group->data.capacity ) group->data.memsize = 0;
+  if( group->data.memsize == group->data.capacity() ) group->data.memsize = 0;
 
-  if( 0 < group->data.memsize && group->data.memsize < group->data.capacity ) {
+  if( 0 < group->data.memsize && group->data.memsize < group->data.capacity() ) {
     if( yydebug ) {
       dbgmsg( "%s:%d: small capacity?\n\t%s", __func__, __LINE__, field_str(group) );
     }
-    group->data.memsize = group->data.capacity;
+    group->data.memsize = group->data.capacity();
   }
 
-  if( group->data.capacity == 0 ) {
+  if( group->data.capacity() == 0 ) {
     dbgmsg( "%s:%d: zero capacity?\n\t%s", __func__, __LINE__, field_str(group) );
   }
 
@@ -1372,15 +1376,15 @@ is_variable_length( const cbl_field_t *field ) {
   // RENAMES may be included in end_of_group.
   size_t isym = field_index(field),  esym = end_of_group(isym);
   bool odo = std::any_of( symbol_at(isym) + 1, symbol_at_impl(esym),
-                         [field]( const auto& elem ) {
-                           if( elem.type == SymField ) {
-                             auto f = cbl_field_of(&elem);
-                             if( field->level < f->level ) { // exclude RENAMES
-                               return 0 < f->occurs.depending_on;
-                             }
-                           }
-                           return false;
-                         } );
+                          [field]( const auto& elem ) {
+                            if( elem.type == SymField ) {
+                              auto f = cbl_field_of(&elem);
+                              if( field->level < f->level ) { // exclude RENAMES
+                                return 0 < f->occurs.depending_on;
+                              }
+                            }
+                            return false;
+                          } );
   return odo;
 }
 
@@ -1511,37 +1515,16 @@ field_str( const cbl_field_t *field ) {
   if( 'r' == parredef && field->level == 0 ) parredef = 'p';
   if( field->has_attr(typedef_e) ) parredef = 'T';
 
-  const char *init = field->data.initial? field->data.initial : NULL;
+  const char *init = field->data.original();
   if( init ) {
     auto fig = cbl_figconst_of(init);
     if( normal_value_e != fig ) {
       init = cbl_figconst_str(fig);
     } else {
-#if 0
-    // At this point, we might have to convert 'init' back to ASCII
-      char *false_init = static_cast<char *>(xmalloc(field->init.capacity+1));
-      memcpy(false_init, field->init.initial, field->data.capacity);
-      false_data[field->data.capacity] = '\0';
-      size_t charsout;
-
-      cbl_encoding_t enc_from = field->codeset.encoding;
-      if( field->type == FldNumericDisplay )
-        {
-        // Apparently we need to trace back the meaning of data.literal for
-        // field::type == FldNumericDisplay
-        enc_from = DEFAULT_SOURCE_ENCODING;
-        }
-
-      init = __gg__iconverter(enc_from,
-                              DEFAULT_SOURCE_ENCODING,
-                              false_data,
-                              field->data.capacity,
-                              &charsout);
-#endif
-      auto eoinit = init + strlen(init);
       char *s = xasprintf("'%s'", init);
 
       // No NUL within the initial data.
+      auto eoinit = init + strlen(init);
       auto ok = std::none_of( init, eoinit,
                               []( char ch ) { return ch == '\0'; } );
       assert(ok);
@@ -1549,7 +1532,7 @@ field_str( const cbl_field_t *field ) {
       // If any of the init are unprintable, provide a hex version.
       if( ! std::all_of(init, eoinit, fisprint) ) {
         if( is_elementary(field->type) && field->type != FldPointer ) {
-          const size_t len = strlen(s) + 8 + 2 * field->data.capacity;
+          const size_t len = strlen(s) + 8 + 2 * strlen(init);
           s = reinterpret_cast<char*>(xrealloc(s, len));
           strcat( s, " (0x" );
           char *p = s + strlen(s);
@@ -1601,7 +1584,7 @@ field_str( const cbl_field_t *field ) {
                    (field->attr & external_e)? 'E' : 0x20,
                    storage_type,
                    field->data.memsize,
-                   field->data.capacity, field->data.digits, field->data.rdigits,
+                   field->data.capacity(), field->data.digits, field->data.rdigits,
                    init, field->attr_str(attrs), field->line );
   return string;
 }
@@ -1647,8 +1630,9 @@ extend_66_capacity( cbl_field_t *alias ) {
   } else {
     ++e2;
   }
-  alias->data.capacity = std::for_each(e, e2, cap).capacity;
-  assert(alias->data.capacity > 0);
+  cap = std::for_each(e, e2, cap);
+  alias->data.capacity( cap.capacity );
+  assert(alias->data.capacity() > 0);
 }
 
 bool
@@ -1721,7 +1705,7 @@ operator<<( std::ostream& os, const cbl_occurs_bounds_t& bound ) {
 static std::ostream&
 operator<<( std::ostream& os, const cbl_field_data_t& field ) {
   return os << field.memsize << ','
-            << field.capacity << ','
+            << field.capacity() << ','
             << field.digits << ','
             << field.rdigits << ','
             << (field.picture? field.picture : "");
@@ -1857,12 +1841,11 @@ symbols_update( size_t first, bool parsed_ok ) {
                   field->line, field->level_str(), field->name);
 
         } else {
-          dbgmsg("%s: error: data item %s #" HOST_SIZE_T_PRINT_UNSIGNED
+          cbl_internal_error("%s: data item %s #" HOST_SIZE_T_PRINT_UNSIGNED
                  " '%s' capacity %u rejected",
                    __func__,
                    3 + cbl_field_type_str(field->type),
-                   (fmt_size_t)isym, field->name, field->data.capacity);
-          gcc_unreachable();
+                   (fmt_size_t)isym, field->name, field->data.capacity());
         }
       }
       return 0;
@@ -1879,7 +1862,7 @@ symbols_update( size_t first, bool parsed_ok ) {
   // A shared record area has no 01 child because that child redefines its parent.
   for( auto sharer : shared_record_areas ) {
     auto redefined = cbl_field_of(symbol_at(sharer->parent));
-    sharer->data.capacity = redefined->data.capacity;
+    sharer->data.capacity( redefined->data.capacity() );
   }
 
   for( p = symbols_begin(first); p < symbols_end(); p++ ) {
@@ -1894,8 +1877,8 @@ symbols_update( size_t first, bool parsed_ok ) {
     if( field->level != 0 && field->has_attr(constant_e) ) {
       auto fig = cbl_figconst_field_of(field->data.initial);
       if( fig ) {
-       field->var_decl_node = fig->var_decl_node;
-       continue;
+        field->var_decl_node = fig->var_decl_node;
+        continue;
       }
     }
 
@@ -1920,7 +1903,7 @@ symbols_update( size_t first, bool parsed_ok ) {
       continue;
     }
     if( is_numeric(field) && ! field->has_attr(constant_e) ) {
-      if( field->data.capacity == 0 ) {
+      if( field->data.capacity() == 0 ) {
         ERROR_FIELD(field, "numeric %qs has USAGE that requires PICTURE %s",
                     field->name, field->data.initial);
       }
@@ -1934,7 +1917,6 @@ symbols_update( size_t first, bool parsed_ok ) {
           gcc_unreachable();
         case FldAlphaEdited:
         case FldAlphanumeric:
-        case FldClass:
         case FldDisplay:
         case FldGroup:
         case FldLiteralA:
@@ -1946,6 +1928,7 @@ symbols_update( size_t first, bool parsed_ok ) {
                       "internal: %qs encoding not defined", field->name);
           }
           break;
+        case FldClass:
         case FldConditional:
         case FldFloat:
         case FldIndex:
@@ -2191,7 +2174,7 @@ symbol_field_parent_set( cbl_field_t *field )
         if( ! prior->codeset.set() ) { // needs attention
           dbgmsg("'%s' is already National", prior->name);
         }
-        field->attr |= numeric_group_attrs(prior);
+        field->attr |= numeric_group_attrs(field);
       }
       // verify level 88 domain value
       if( is_numeric(prior) && field->level == 88 ) {
@@ -2256,6 +2239,7 @@ const std::list<cbl_field_t> cdf_literalize();
  */
 void
 symbol_table_init(void) {
+  const static cbl_field_t::codeset_t cp1252(CP1252_e);
   assert(symbols.fd == -1);
   assert(symbols.nelem == 0);
 
@@ -2270,33 +2254,33 @@ symbol_table_init(void) {
 
   // These should match the definitions in libgcobol/constants.cc
   static cbl_field_t constants[] = {
-    { FldAlphanumeric, space_value_e | int(constq) | register_e,
-      {1,1,0,0, " \0\xFF"}, 0, "SPACE" },
-    { FldAlphanumeric, space_value_e | int(constq) | register_e,
-      {1,1,0,0, " \0\xFF"}, 0, "SPACES" },
-    { FldAlphanumeric, low_value_e | int(constq) | register_e,
-      {1,1,0,0, "L\0\xFF"}, 0, "LOW_VALUES" },
-    { FldAlphanumeric, zero_value_e | int(constq) | register_e,
-      {1,1,0,0, "0"}, 0, "ZEROS" },
-    { FldAlphanumeric, high_value_e | int(constq) | register_e,
-      {1,1,0,0, "H\0\xFF"}, 0, "HIGH_VALUES" },
+    { FldAlphanumeric, SPACE_VALUE_E | constq | register_e,
+      {1,1,0,0, " \0\xFF"}, 0, "SPACE", cp1252 },
+    { FldAlphanumeric, SPACE_VALUE_E | constq | register_e,
+      {1,1,0,0, " \0\xFF"}, 0, "SPACES", cp1252 },
+    { FldAlphanumeric, LOW_VALUE_E | constq | register_e,
+      {1,1,0,0, "L\0\xFF"}, 0, "LOW_VALUES", cp1252 },
+    { FldAlphanumeric, ZERO_VALUE_E | constq | register_e,
+      {1,1,0,0, "0"}, 0, "ZEROS", cp1252 },
+    { FldAlphanumeric, HIGH_VALUE_E | constq | register_e,
+      {1,1,0,0, "H\0\xFF"}, 0, "HIGH_VALUES", cp1252 },
     // IBM standard: QUOTE is a double-quote unless APOST compiler option
-    { FldAlphanumeric, quote_value_e | int(constq) | register_e ,
-      {1,1,0,0, "\"\0\xFF"}, 0, "QUOTES" },
-    { FldPointer, int(constq) | register_e ,
-      {8,8,0,0, zeroes_for_null_pointer}, 0, "NULLS" },
+    { FldAlphanumeric, QUOTE_VALUE_E | constq | register_e ,
+      {1,1,0,0, "\"\0\xFF"}, 0, "QUOTES", cp1252 },
+    { FldPointer, constq | register_e ,
+      {8,8,0,0, zeroes_for_null_pointer}, 0, "NULLS", cp1252 },
     // IBM defines TALLY
     // 01  TALLY GLOBAL PICTURE 9(5) USAGE BINARY VALUE ZERO.
     { FldNumericBin5, signable_e | register_e,
-      {16, 16, MAX_FIXED_POINT_DIGITS, 0, NULL}, 0, "_TALLY" },
+      {16, 16, MAX_FIXED_POINT_DIGITS, 0, NULL}, 0, "_TALLY", cp1252 },
     // 01  ARGI is the current index into the argv array
     { FldNumericBin5, signable_e | register_e,
-      {16, 16, MAX_FIXED_POINT_DIGITS, 0, NULL}, 0, "_ARGI" },
+      {16, 16, MAX_FIXED_POINT_DIGITS, 0, NULL}, 0, "_ARGI", cp1252 },
 
     // These last two don't require actual storage; they get BOOL var_decl_node
     // in parser_symbol_add()
-    { FldConditional, constant_e | register_e , {1,1,0,0, ""}, 0, "_VERY_TRUE" },
-    { FldConditional, constant_e | register_e , {1,1,0,0, ""}, 0, "_VERY_FALSE" },
+    { FldConditional, constant_e | register_e , {1,1,0,0, ""}, 0, "_VERY_TRUE", cp1252 },
+    { FldConditional, constant_e | register_e , {1,1,0,0, ""}, 0, "_VERY_FALSE", cp1252 },
   };
   for( struct cbl_field_t *f = constants;
        f < constants + COUNT_OF(constants); f++ ) {
@@ -2373,29 +2357,29 @@ symbol_table_init(void) {
 
   static cbl_field_t debug_registers[] = {
     { FldGroup, register_e,
-      {132,132,0,0, NULL}, 1, "DEBUG-ITEM" },
+      {132,132,0,0, NULL}, 1, "DEBUG-ITEM", cp1252 },
     { FldAlphanumeric, register_e,
-      {6,6,0,0, "      "}, 2, "DEBUG-LINE" },
+      {6,6,0,0, "      "}, 2, "DEBUG-LINE", cp1252 },
     { FldAlphanumeric, register_e|filler_e,
-      {1,1,0,0, " "},      2, "FILLER" },
+      {1,1,0,0, " "},      2, "FILLER", cp1252 },
     { FldAlphanumeric, register_e,
-      {30,30,0,0, NULL},   2, "DEBUG-NAME" },
+      {30,30,0,0, NULL},   2, "DEBUG-NAME", cp1252 },
     { FldAlphanumeric, register_e|filler_e,
-      {1,1,0,0, " "},      2, "FILLER" },
+      {1,1,0,0, " "},      2, "FILLER", cp1252 },
     { FldNumericDisplay, signable_e | register_e | leading_e | separate_e,
-      {5,5,4,0, NULL},     2, "DEBUG-SUB-1" },
+      {5,5,4,0, NULL},     2, "DEBUG-SUB-1", cp1252 },
     { FldAlphanumeric, register_e|filler_e,
-      {1,1,0,0, " "},      2, "FILLER" },
+      {1,1,0,0, " "},      2, "FILLER", cp1252 },
     { FldNumericDisplay, signable_e | register_e | leading_e | separate_e,
-      {5,5,4,0, NULL},     2, "DEBUG-SUB-2" },
+      {5,5,4,0, NULL},     2, "DEBUG-SUB-2", cp1252 },
     { FldAlphanumeric, register_e|filler_e,
-      {1,1,0,0, " "},      2, "FILLER" },
+      {1,1,0,0, " "},      2, "FILLER", cp1252 },
     { FldNumericDisplay, signable_e | register_e | leading_e | separate_e,
-      {5,5,4,0, NULL},     2, "DEBUG-SUB-3" },
+      {5,5,4,0, NULL},     2, "DEBUG-SUB-3", cp1252 },
     { FldAlphanumeric, register_e | filler_e,
-      {1,1,0,0, " "},      2, "FILLER" },
+      {1,1,0,0, " "},      2, "FILLER", cp1252 },
     { FldAlphanumeric, signable_e | register_e,
-      {76,76,0,0, NULL},   2, "DEBUG-CONTENTS" },
+      {76,76,0,0, NULL},   2, "DEBUG-CONTENTS", cp1252 },
   };
 
   // debug registers
@@ -2405,7 +2389,7 @@ symbol_table_init(void) {
     std::accumulate(debug_registers,
                     debug_registers + COUNT_OF(debug_registers), group_size_t());
   debug_registers[0].data.memsize =
-  debug_registers[0].data.capacity = group_size.capacity();
+  debug_registers[0].data.capacity( group_size.capacity() );
 
   auto debug_start = p = table.elems + table.nelem;
   p = std::transform(debug_registers,
@@ -2416,14 +2400,14 @@ symbol_table_init(void) {
 
   // special registers
   static cbl_field_t special_registers[] = {
-    { FldNumericDisplay, register_e, {2,2,2,0, NULL}, 0, "_FILE_STATUS" },
-    { FldNumericBin5,    register_e, {2,2,4,0, NULL}, 0, "UPSI-0" },
-    { FldNumericBin5,    signable_e|register_e, {2,2,4,0, NULL}, 0, "RETURN-CODE" },
-    { FldNumericBin5,    register_e, {2,2,4,0, NULL}, 0, "LINAGE-COUNTER" },
-    { FldLiteralA,        register_e, {0,0,0,0, "/dev/stdin"}, 0, "_dev_stdin" },
-    { FldLiteralA, int(constq)|register_e, {0,0,0,0, "/dev/stdout"}, 0, "_dev_stdout" },
-    { FldLiteralA, int(constq)|register_e, {0,0,0,0, "/dev/stderr"}, 0, "_dev_stderr" },
-    { FldLiteralA, int(constq)|register_e, {0,0,0,0, "/dev/null"},   0, "_dev_null" },
+    { FldNumericDisplay, register_e, {2,2,2,0, NULL}, 0, "_FILE_STATUS", cp1252 },
+    { FldNumericBin5,    register_e, {2,2,4,0, NULL}, 0, "UPSI-0", cp1252 },
+    { FldNumericBin5,    signable_e|register_e, {2,2,4,0, NULL}, 0, "RETURN-CODE", cp1252 },
+    { FldNumericBin5,    register_e, {2,2,4,0, NULL}, 0, "LINAGE-COUNTER", cp1252 },
+    { FldLiteralA,        register_e, {0,0,0,0, "/dev/stdin"}, 0, "_dev_stdin", cp1252 },
+    { FldLiteralA, constq|register_e, {0,0,0,0, "/dev/stdout"}, 0, "_dev_stdout", cp1252 },
+    { FldLiteralA, constq|register_e, {0,0,0,0, "/dev/stderr"}, 0, "_dev_stderr", cp1252 },
+    { FldLiteralA, constq|register_e, {0,0,0,0, "/dev/null"},   0, "_dev_null", cp1252 },
   };
 
   assert(table.nelem + COUNT_OF(special_registers) < table.capacity);
@@ -2435,17 +2419,18 @@ symbol_table_init(void) {
   table.nelem = p - table.elems;
   assert(table.nelem < table.capacity);
 
+  const static auto reg_based_any = cbl_field_attr_t(register_e | based_e | any_length_e);
   // xml registers
   static cbl_field_t xml_registers[] = {
-    { FldNumericBin5,  register_e, {4,4,9,0, "0"}, 1, "XML-CODE" },
-    { FldAlphanumeric, register_e, {30,30,0,0, " "}, 1, "XML-EVENT" },
-    { FldNumericBin5,  register_e, {4,4,9,0, "0"}, 1, "XML-INFORMATION" },
-    { FldAlphanumeric, register_e | based_e | any_length_e, {1,1,0,0, nullptr}, 1, "XML-NAMESPACE" },
-    { FldAlphanumeric, register_e | based_e | any_length_e, {1,1,0,0, nullptr}, 1, "XML-NNAMESPACE" },
-    { FldAlphanumeric, register_e | based_e | any_length_e, {1,1,0,0, nullptr}, 1, "XML-NAMESPACE-PREFIX" },
-    { FldAlphanumeric, register_e | based_e | any_length_e, {1,1,0,0, nullptr}, 1, "XML-NNAMESPACE-PREFIX" },
-    { FldAlphanumeric, register_e | based_e | any_length_e, {1,1,0,0, nullptr}, 1, "XML-TEXT" },
-    { FldAlphanumeric, register_e | based_e | any_length_e, {1,1,0,0, nullptr}, 1, "XML-NTEXT" },
+    { FldNumericBin5,  register_e, {4,4,9,0, "0"}, 1, "XML-CODE", cp1252 },
+    { FldAlphanumeric, register_e, {30,30,0,0, " "}, 1, "XML-EVENT", cp1252 },
+    { FldNumericBin5,  register_e, {4,4,9,0, "0"}, 1, "XML-INFORMATION", cp1252 },
+    { FldAlphanumeric, reg_based_any, {1,1,0,0, nullptr}, 1, "XML-NAMESPACE", cp1252 },
+    { FldAlphanumeric, reg_based_any, {1,1,0,0, nullptr}, 1, "XML-NNAMESPACE", cp1252 },
+    { FldAlphanumeric, reg_based_any, {1,1,0,0, nullptr}, 1, "XML-NAMESPACE-PREFIX", cp1252 },
+    { FldAlphanumeric, reg_based_any, {1,1,0,0, nullptr}, 1, "XML-NNAMESPACE-PREFIX", cp1252 },
+    { FldAlphanumeric, reg_based_any, {1,1,0,0, nullptr}, 1, "XML-TEXT", cp1252 },
+    { FldAlphanumeric, reg_based_any, {1,1,0,0, nullptr}, 1, "XML-NTEXT", cp1252 },
   }, * const eoxml = xml_registers + COUNT_OF(xml_registers);
 
   assert(table.nelem + COUNT_OF(xml_registers) < table.capacity);
@@ -2455,17 +2440,28 @@ symbol_table_init(void) {
   table.nelem = p - table.elems;
   assert(table.nelem < table.capacity);
 
-  // Add any CDF values already defined as literals.
-  // After symbols are ready, the CDF adds them directly. 
+  // Add any CDF values defined on the command line. 
+  // After symbols are ready, the CDF adds them directly.
   const std::list<cbl_field_t> cdf_values = cdf_literalize();
+  auto icdf = table.nelem;
   table.nelem += cdf_values.size();
   assert(table.nelem < table.capacity);
-  
+  auto ecdf = table.nelem;
+
   p = std::transform(cdf_values.begin(), cdf_values.end(), p, elementize);
-  
+
   // Initialize symbol table.
   symbols = table;
 
+  // Encode CDF literals.  Could not be done previously because encoding checks
+  // against figurative constants in the symbol table.
+  for( auto i=icdf; i < ecdf; i++ ) {
+    auto& f = symbols.elems[i].elem.field;
+    if( f.type == FldLiteralA ) {
+      f.set_initial(cbl_loc_t());
+    }
+  }
+  
   for( auto e = symbols.elems; e < symbols.elems + symbols.nelem; e++ ) {
     if( e->type == SymField ) {
       update_symbol_map2(e);
@@ -2595,12 +2591,10 @@ numeric_group_attrs( const cbl_field_t *field ) {
   static const uint64_t inherit = signable_e | leading_e | separate_e | big_endian_e;
   static_assert(sizeof(cbl_field_t::type) < sizeof(inherit), "need bigger type");
   assert(field);
-  if( field->type == FldNumericDisplay || field->type == FldGroup ) {
-    if( field->parent > 0 && symbol_at(field->parent)->type == SymField ) {
-      cbl_field_t *parent = parent_of(field);
-      assert(parent);
-      return inherit & parent->attr;
-    }
+  if( field->parent > 0 && symbol_at(field->parent)->type == SymField ) {
+    cbl_field_t *parent = parent_of(field);
+    assert(parent);
+    return inherit & parent->attr;
   }
   return 0;
 }
@@ -2678,7 +2672,7 @@ symbol_field_add( size_t program, struct cbl_field_t *field )
       field->codeset = parent->codeset;
     }
     // BINARY-LONG, for example, sets capacity.
-    if( is_numeric(parent->usage) && parent->data.capacity > 0 ) {
+    if( is_numeric(parent->usage) && parent->data.capacity() > 0 ) {
       field->type = parent->usage;
       field->data = parent->data;
       field->data = 0;  // cppcheck-suppress redundantAssignment
@@ -2923,7 +2917,7 @@ struct symbol_elem_t *
 symbol_field_alias( struct symbol_elem_t *e, const char name[] )
 {
   cbl_field_t alias = *cbl_field_of(e);
-  cbl_field_data_t data = { alias.data.memsize, alias.data.capacity };
+  cbl_field_data_t data = { alias.data.memsize, alias.data.capacity() };
   alias.data = data;
   alias.data.memsize = 0;
 
@@ -3139,7 +3133,7 @@ class is_section {
 
 
 static bool fd_record_size_cmp( const symbol_elem_t& a, const symbol_elem_t& b ) {
-  return cbl_field_of(&a)->data.capacity < cbl_field_of(&b)->data.capacity;
+  return cbl_field_of(&a)->data.capacity() < cbl_field_of(&b)->data.capacity();
 }
 
 /*
@@ -3195,8 +3189,8 @@ symbol_file_record_sizes( struct cbl_file_t *file ) {
   // Make a copy, update the sizes, and return it.
   cbl_file_t::varying_t output = file->varying_size;
 
-  output.min = cbl_field_of(&*p.first)->data.capacity;
-  output.max = cbl_field_of(&*p.second)->data.capacity;
+  output.min = cbl_field_of(&*p.first)->data.capacity();
+  output.max = cbl_field_of(&*p.second)->data.capacity();
 
   assert(output.min > 0 && "min record size is 0");
   assert(output.min <= output.max);
@@ -3313,6 +3307,10 @@ cbl_alphabet_t::reencode()  {
               __gg__encoding_iconv_name(current_encoding(display_encoding_e));
   iconv_t cd = iconv_open(tocode, fromcode);
 
+  const charmap_t *charmap_disp =
+              __gg__get_charmap(current_encoding(display_encoding_e));
+  size_t stride = charmap_disp->stride();
+
 #if optimal_reencode
   if( fromcode == tocode ) { // semantically
     tgt.resize(0);
@@ -3348,7 +3346,7 @@ cbl_alphabet_t::reencode()  {
     }
     assert(outbytesleft < sizeof(pos));
     n = sizeof(pos) - outbytesleft;
-    if( 1 < n ) {
+    if( stride < n ) {
       error_msg(loc, "%s character '%c' (%x hex) requires %zu bytes as %s",
                 fromcode, ch, ch, n, tocode);
       continue;
@@ -3426,6 +3424,10 @@ cbl_alphabet_t::also( const YYLTYPE& loc, size_t ch ) {
 using std::deque;
 static deque<cbl_field_t*> stack;
 
+/*
+ * Allocate a temporary field. Assign the type and name, if supplied.  Caller
+ * deals with encoding and initial value.
+ */
 static cbl_field_t *
 new_temporary_impl( enum cbl_field_type_t type, const cbl_name_t name = nullptr )
 {
@@ -3531,44 +3533,48 @@ new_literal_add( const char initial[], uint32_t len,
     field = new_temporary_impl(FldLiteralA);
     field->attr |= attr;
 
-    if(len == 0)
-      {
-      // This will cover UTF-32, should that arise.
-      size_t nbytes = 4;
-      char *init = static_cast<char *>(xmalloc(nbytes));
-      memset(init, 0, nbytes);
-      field->data.initial = init;
-      }
-    if(len)
-      {
-      char *init = static_cast<char *>(xmalloc(len+4));
-      memcpy(init, initial, len);
-      memset(init+len, 0, 4);
-      field->data.initial = init;
-      }
-    field->data.capacity = len;
+    char *orig = static_cast<char *>(xmalloc(len+4));
+    
+    auto p = initial? std::copy(initial, initial + len, orig) : orig;
+    std::fill(p, p+4, 0);
+    field->data.original(orig);
+    field->data.capacity(len); // in case of Z-string
     }
 
+  assert(field->name[0] != '\0'); // new_temporary_impl sets literal names
+
   if( ! field->has_attr(hex_encoded_e) ) {
     // If the literal bore a prefix, set the encoding,
-    if( encoding != cbl_field_t::codeset_t::source_encoding->type ) {
+    if( encoding != cbl_field_t::codeset_t::default_encodings.source->type ) {
       field->codeset.set(encoding);
+    } else {
+      field->codeset.set();
     }
-    field->internalize();
   }
 
-  static size_t literal_count = 1;
-  sprintf(field->name,
-          "%s%c_" HOST_SIZE_T_PRINT_DEC,
-          "_literal",
-          field->type == FldLiteralA ? 'a' : 'n',
-          (fmt_size_t)literal_count++);
+  if( field->type == FldLiteralN ) {
+    field->set_initial(cbl_loc_t());
+  } else {
+    field->set_initial(len);
+  }
 
   return parser_symbol_add2(field);
 }
 
 static temporaries_t temporaries;
 
+void
+symbol_temporary_location( const cbl_field_t *field, const cbl_loc_t& loc ) {
+  temporaries.locs[field] = loc;
+}
+
+cbl_loc_t
+symbol_temporary_location( const cbl_field_t *field ) {
+  extern YYLTYPE yylloc;
+  auto p = temporaries.locs.find(field);
+  return p == temporaries.locs.end()? cbl_loc_t(yylloc) : p->second;
+}
+
 cbl_field_t *
 temporaries_t::literal( uint32_t len, const char value[],
                         cbl_field_attr_t attr, cbl_encoding_t encoding ) {
@@ -3586,6 +3592,19 @@ temporaries_t::literal( uint32_t len, const char value[],
   return literals[key] = new_literal_add(value, len, attr, encoding);
 }
 
+cbl_field_t *
+new_literal_2( uint32_t len, const char initial[],
+             cbl_field_attr_t attr, cbl_encoding_t encoding ) {
+  encoding = current_encoding('A');
+  const charmap_t *charmap = __gg__get_charmap(encoding);
+  cbl_field_t *retval = temporaries.literal(len*charmap->stride(),
+                                            initial,
+                                            attr,
+                                            encoding);
+  return retval;
+}
+
+
 cbl_field_t *
 new_literal( uint32_t len, const char initial[],
              cbl_field_attr_t attr, cbl_encoding_t encoding ) {
@@ -3681,30 +3700,82 @@ symbol_temporaries_free() {
 }
 
 cbl_field_t *
-new_alphanumeric( size_t capacity, const cbl_name_t name = nullptr ) {
+new_alphanumeric( size_t capacity, const cbl_name_t name, cbl_encoding_t encoding ) {
   cbl_field_t * field = new_temporary_impl(FldAlphanumeric, name);
-  field->data.capacity = capacity;
+  field->set_capacity( capacity );
+  if( encoding != no_encoding_e ) {
+    field->codeset.set(encoding);
+  }
+  //// Dubner hacking away:  If name is non-null, then assume this is a
+  //// function definition, and force the codeset, which otherwise will have
+  //// defaulted to current_encoding('A'), and the valid() test in codeset.set
+  //// will have prevented it from being changed.
+  if( name && encoding != no_encoding_e ) {
+    field->codeset.set_explicit(encoding);
+  }
   temporaries.add(field);
   return parser_symbol_add2(field);
 }
 
 extern os_locale_t os_locale;
 
-const encodings_t cbl_field_t::codeset_t::source_encodings[2] = {
-  { false, iconv_UTF_8_e, "UTF-8" },
-  { true,  iconv_CP1252_e, "CP1252" },
-};
-const encodings_t * cbl_field_t::codeset_t::source_encoding = {
-  cbl_field_t::codeset_t::source_encodings
-};
 
-const encodings_t cbl_field_t::codeset_t::standard_internal = {
-  true, iconv_CP1252_e, "CP1252"
-};
-#define standard_internal cbl_field_t::codeset_t::standard_internal
+uint8_t
+cbl_field_t::codeset_t::stride() const {
+  const charmap_t *charmap = __gg__get_charmap(encoding);
+  return charmap->stride();
+}
+
+cbl_field_t::codeset_t::default_encodings_t
+cbl_field_t::codeset_t::default_encodings
+  {
+  encodings_t {   true, iconv_CP1252_e, "CP1252" } ,  // encodings_t alpha & national
+    {
+      encodings_t{ false, iconv_UTF_8_e, "UTF-8" } ,
+      encodings_t { true,  iconv_CP1252_e, "CP1252" }    }
+  };
+
+bool
+cobol_alpha_encoding( const char name[] ) {
+  auto encoding = __gg__encoding_iconv_descr(name);
+  if( encoding ) {
+    cbl_field_t::codeset_t::default_encodings.alpha = *encoding;
+    const charmap_t *charmap = __gg__get_charmap(encoding->type);
+    if( charmap->is_like_ebcdic() ) {
+      cobol_gcobol_feature_set(feature_internal_ebcdic_e);
+    } else {
+      // This handles multiple occurrences of -fexec-charset on a command line
+      cobol_gcobol_feature_set(feature_internal_ebcdic_e, false);
+    }
+    // Let's handle the ordinary situation of just setting alpha, and make
+    // sure national is at least as wide.  (We shouldn't have to set national
+    // first, just so we can set alpha.)
+    auto enc_n = cbl_field_t::codeset_t::default_encodings.national;
+    const charmap_t *charmap_n = __gg__get_charmap(enc_n.type);
+    if( charmap_n->stride() < charmap->stride() ) {
+      // Default national is narrower than *encoding, so boost default national
+      // up:
+      cobol_national_encoding(encoding->name);
+    }
+    return true;
+  }
+  return false;
+}
+
+bool
+cobol_national_encoding( const char name[] ) {
+  auto encoding = __gg__encoding_iconv_descr(name);
+  if( encoding ) {
+    cbl_field_t::codeset_t::default_encodings.national = *encoding;
+    return true;
+  }
+  return false;
+}
+
 
 cbl_field_t *
-new_temporary( enum cbl_field_type_t type, const char *initial, bool is_signed ) {
+new_temporary( enum cbl_field_type_t type, const char *initial, cbl_field_attr_t attr ) {
+  const bool is_signed = attr == signable_e;
   const bool force_unsigned = type == FldNumericBin5 && ! is_signed;
 
   if( ! initial && ! force_unsigned ) {
@@ -3712,9 +3783,10 @@ new_temporary( enum cbl_field_type_t type, const char *initial, bool is_signed )
     return temporaries.acquire(type, initial);
   }
   if( is_literal(type) ) {
+    const auto& defaults( cbl_field_t::codeset_t::default_encodings );
     auto field = temporaries.literal(strlen(initial), initial,
                                      type == FldLiteralA? quoted_e : none_e,
-                                     standard_internal.type);
+                                     defaults.source->type);
     return field;
   }
   cbl_field_t *field = new_temporary_impl(type, initial);
@@ -3773,9 +3845,21 @@ new_temporary_clone( const cbl_field_t *orig) {
   return parser_symbol_add2(field);
 }
 
+cbl_field_t *
+new_literal_float( const cbl_loc_t& loc, const char initial[] ) {
+  auto field = new_temporary_impl(FldFloat);
+  field->attr = constant_e;
+  std::string init(initial);
+  std::replace( init.begin(), init.end(), ',', '.' );
+  field->encode_numeric(init.c_str(), loc);
+  field->data.original(initial);
+
+  return parser_symbol_add2(field);
+}
+
 /*
  *  This set of ASCII-like encodings is incomplete and possibly wrong.  A
- *  complete definition would better supported with a Boolean in enccodings_t.
+ *  complete definition would better supported with a Boolean in encodings_t.
  *  If it returns false pessimistically, the only consequence is inefficiency:
  *  the string is processed by iconv(3).
  */
@@ -3799,7 +3883,7 @@ cbl_field_t::holds_ascii() const {
 bool
 cbl_field_t::is_ascii() const {
   return std::all_of( data.initial,
-                      data.initial + data.capacity,
+                      data.initial + data.capacity(),
                       isascii );
 }
 
@@ -3822,174 +3906,265 @@ cbl_field_t::is_ascii() const {
  * never reverts.
  */
 
-const char *
-cbl_field_t::internalize() {
-  /*  The purpose of this routine is to return a nul-terminated string which
-      is data.initial converted from the source-code characters to the
-      codeset.encoding characters.
-      
-      The contract between this routine and the routines that call it is that
-      for alphanumeric types, data.initial shall have the same number of
-      characters as will be needed to fill data.capacity.
-
-      Be aware that for PIC X(32) Z"foo", there are the characters "foo",
-      followed by a NUL, and then 28 spaces to fill it out.  It turns out that
-      iconv, given a character count of 32, converts all 32, including the
-      embedded NUL.  So, that case works even through strlen(initial) is
-      smaller than the length of initial, which is the same as capacity.
-  */
-
-  static const char *fromcode = codeset.source_encodings[0].name;
-  static const size_t noconv = size_t(-1);
-  static std::unordered_map<std::string, iconv_t> tocodes;
-
-  if( ! codeset.valid() ) {
-    dbgmsg("%s:%d: not converting %s", __func__, __LINE__, data.initial);
-    return data.initial;
-  }
+size_t
+cbl_field_t::source_code_check(const void *initial, size_t length)
+  {
+  size_t retval;
 
-  const char *tocode = __gg__encoding_iconv_name(codeset.encoding);
+  size_t iconv_retval;
 
-  std::string toname(tocode);
-  auto p = tocodes.find(toname);
-  if( p == tocodes.end() ) {
-    tocodes[toname] = iconv_open(tocode, fromcode);
-  }
-  iconv_t cd = tocodes[toname];
+  try_again:
 
-  if (cd == (iconv_t)-1) {
-    cbl_message(ParIconvE,
-                "failed %<iconv_open%> tocode = %qs fromcode = %qs",
-                tocode, fromcode);
-  }
+  cbl_encoding_t fromcode = codeset.default_encodings.source->type;
+  cbl_encoding_t tocode   = codeset.encoding;
+  std::string toname      = __gg__encoding_iconv_name(tocode);
 
-  if( fromcode == tocode || has_attr(hex_encoded_e) ) {
-    return data.initial;
+  __gg__iconverter(fromcode,
+                   tocode,
+                   initial,
+                   length,
+                   &retval,
+                   &iconv_retval);
+  if( iconv_retval )
+    {
+    // something went wrong on conversion.
+    if( codeset.default_encodings.next_source_encoding() )
+      {
+      goto try_again;
+      }
+    // None of our valid "from" encodings worked
+    dbgmsg("%s:%d: iconv failed for %s: %s", __func__, __LINE__,
+           toname.c_str(), xstrerror(errno));
+    ERROR_FIELD(this, "iconv failed: %s", xstrerror(errno));
+    }
+  return retval;
   }
 
-  if( data.capacity == 0 ) {
-    assert(0 == strlen(data.initial));
-    return data.initial;
-  }
-  if( holds_ascii() && is_ascii() ) {
-    if( type != FldNumericEdited ) {
-      if( ! data.initial_within_capacity() ) {
-        ERROR_FIELD(this, "%s %s VALUE %qs of %zu exceeds size %u",
-                    cbl_field_t::level_str(level), name, data.initial,
-                    strlen(data.initial), data.capacity );
+/*
+ * Look up the encoding handle for a given target based on the current source
+ * encoding.
+ */
+static iconv_t
+iconv_cd( cbl_encoding_t tgt ) {
+  struct iconv_key_t {
+    cbl_encoding_t to, from;
+    iconv_key_t() : to(no_encoding_e), from(no_encoding_e) {}
+    iconv_key_t( cbl_encoding_t to, cbl_encoding_t from ) : to(to), from(from) {}
+    bool operator<( const iconv_key_t& that ) const {
+      if( this->from == that.from ) {
+        return this->to < that.to;
       }
+      return this->from < that.from;
+    }
+  };
+  static std::map<iconv_key_t, iconv_t> cds;
+
+  auto src = cbl_field_t::codeset_t::default_encodings.current_source_encoding()->type;
+  assert(src);
+
+  iconv_key_t key(tgt, src);
+  auto p = cds.find(key);
+  iconv_t cd;
+    
+  if( p == cds.end() ) {
+    const char *fromcode = __gg__encoding_iconv_name(src);
+    const char *tocode   = __gg__encoding_iconv_name(tgt);    
+    gcc_assert(fromcode && tocode);
+    
+    if( (cd = iconv_open(tocode, fromcode)) == iconv_t(-1) ) {
+      return cd;
     }
-    return data.initial;
+    cds[key] = cd;
+  } else {
+    cd = p->second;
   }
-  assert(data.capacity > 0);
 
-  // The final 2 bytes of the output are "!\0".  It's a debugging sentinel.
-  size_t n;
-  size_t inbytesleft = data.capacity;
-  size_t outbytesleft = inbytesleft;
-  if( !is_literal(this) && inbytesleft < strlen(data.initial) ) {
-    inbytesleft = strlen(data.initial);
-  }
-  if( type == FldNumericEdited ) {
-    outbytesleft = inbytesleft;
-  }
-  const unsigned int in_len = inbytesleft;
+  return cd;
+}
 
-  char *in  = const_cast<char*>(data.initial);
-  char *out = static_cast<char*>( xcalloc(1, outbytesleft + 2) ), *output = out;
+/*
+ * Precondition: data.nbyte is the length of the output, and encoding is set.
+ * The caller supplies the input length because the string may include embedded
+ * NULs.  If conversion succeeds, return NULL.  If it fails, return a pointer
+ * to the failed character in data.orig.
+ */
+const char *
+cbl_field_t::encode( size_t srclen, cbl_loc_t loc ) {
+  gcc_assert(data.capacity());
+  gcc_assert(data.initial);
+  gcc_assert(data.original());
+  gcc_assert(codeset.valid());
+
+  if( this->type == FldLiteralN )
+    {
+    // DUBNER made this change; FldLiteralN doesn't get encoded.  This probably
+    // should have been nipped in the bud somewhere upstream.
+    return data.original();
+    }
 
-  assert(fromcode != tocode);
+  extern YYLTYPE yylloc;
+  const char *bad_boy = data.original();
+  if( 0 == loc.first_line )
+    loc = level == 0 ? yylloc : symbol_field_location(field_index(this));
 
   /*
-   * If we're currently assuming the source code is encoded according to the
-   * locale (the default), and there's an iconv failure, try once more using a
-   * different assumption, that the source code is encoded as CP1252.
-   *
-   * This heuristic means that some UTF-8 literals could be converted until a
-   * CP1252 byte is encountered.  We could be stricter about that.
-   *
-   * Also possible is a failure to avoid iconv with fromcode and tocode denote
-   * the same encoding but with different spellings, e.g. CP1252 and CP1252//.
+   * Hex-encoded means we don't convert.  data.initial should be long enough to
+   * hold the encoded string.
    */
-
-  do {
-    if( (n = iconv( cd, &in, &inbytesleft, &out, &outbytesleft)) == noconv ) {
-      if( fromcode == codeset.source_encodings[0].name ) {
-        codeset.source_encoding = &codeset.source_encodings[1];
-        fromcode = codeset.source_encoding->name;
-        tocodes.clear();
-        cd = tocodes[toname] = iconv_open(tocode, fromcode);
-        dbgmsg("%s: trying input encoding %s", __func__, fromcode);
-        if( fromcode == tocode ) return data.initial; // no conversion required.
-        n = noconv - 1; // try again
-      }
+  if( has_attr(hex_encoded_e) ) {
+    if( data.capacity() < srclen ) {
+      const char *inbuf = data.original() + data.capacity();
+      error_msg( loc, "VALUE %qs is too long to initialize %qs, "
+                      "discarded %ld bytes at %qs",
+                 data.original(), name, long(srclen - data.capacity()), inbuf);
+      srclen = data.capacity();
     }
-    if( n == 0 ) break;
-  } while( n != noconv );
-
-  if( n == noconv ) {
-    size_t i = in_len - inbytesleft;
-    yyerror("failed to encode %s %qs as %s (%zu of %u bytes left)",
-           fromcode, data.initial + i, tocode, inbytesleft, in_len);
-    if( false ) return NULL;
-    return data.initial;
-  }
-
-  if( 0 < inbytesleft ) {
-    // data.capacity + inbytesleft is not correct if the remaining portion has
-    // multibyte characters.  But the fact remains that the VALUE is too big.
-    ERROR_FIELD(this, "%s %s VALUE '%s' requires %zu bytes for size %u",
-            cbl_field_t::level_str(level), name, data.initial,
-            data.capacity + inbytesleft, data.capacity );
-  }
-
-  // Replace data.initial only if iconv output differs.
-  if( 0 != memcmp(data.initial, output, out - output) ) {
-    assert(out <= output + data.capacity || type == FldNumericEdited);
-    dbgmsg("%s: converted '%.*s' to %s",
-                        __func__, data.capacity, data.initial, tocode);
-    struct localspace_t {
-      char space[4];
-      size_t len, erc;
-      explicit localspace_t( iconv_t cd ) {
-        static char input[1] = { 0x20 };
-        size_t inbytesleft2 = sizeof(input), outbytesleft2 = sizeof(space);
-        char *in2 = input, *out2 = space;
-
-        erc = iconv(cd, &in2, &inbytesleft2, &out2, &outbytesleft2);
-        len = out2 - space;
+    std::copy(data.original(), data.original() + srclen,
+              const_cast<char*>(data.initial));
+    return nullptr; // may return "truncated success" with error.
+  }
+  
+  auto figconst = cbl_figconst_of(data.original());
+  if( normal_value_e != figconst ) {
+    blank_initial( char_capacity(), figconst ); 
+    return nullptr;
+  }
+  
+  for( auto src = codeset.default_encodings.current_source_encoding();
+       src;
+       src = codeset.default_encodings.next_source_encoding() ) {
+    /*
+     * Get the iconv handle to convert the source-code encoding to the field's
+     * encoding.  If no such handle exists because iconv(3) can't do it
+     * (weird), try the next potential source-code encoding, which probably
+     * won't work either.
+     */
+    iconv_t cd = iconv_cd(codeset.encoding);
+    if( cd == iconv_t(-1) ) {
+      error_msg(loc, "cannot convert from %qs to %qs: %s",
+                cbl_encoding_str(src->type),
+                cbl_encoding_str(codeset.encoding),
+                xstrerror(errno));
+      continue;
+    }
+    
+    /*
+     * If conversion succeeds, return NULL.  
+     * If it fails, try the next potential encoding.
+     */
+    size_t inbytesleft = srclen, outbytesleft = data.capacity();
+    char *inbuf = const_cast<char*>(data.original());
+    char *outbuf = const_cast<char*>(data.initial);
+
+    size_t erc = iconv(cd, &inbuf, &inbytesleft, &outbuf, &outbytesleft);
+
+    if( erc == size_t(-1) ) {
+      if( outbytesleft == 0 ) { // input doesn't fit
+        gcc_assert(0 < inbytesleft);
+        gcc_assert(0 < level);
+        if( loc.first_line == 0 )
+          loc = symbol_field_location(field_index(this));
+        if( type == FldNumericEdited ) {
+          // Tolerate trailing zeros for P-values
+          if( data.rdigits < 0 ) {
+            if( inbytesleft <= size_t(data.rdigits * -1) ) {
+             bool all_zeros = std::all_of(reinterpret_cast<const char*>(inbuf), 
+                                          data.original() + srclen,
+                                          [](char ch) {
+                                            return '0' == ch;
+                                          });
+              if( all_zeros ) return nullptr;
+            }
+          }
+        }
+        error_msg( loc, 
+                   "VALUE %qs is too long to initialize %qs, discarded %qs",
+                   data.original(), name, inbuf);
+        return nullptr; // success-ish
       }
-      bool valid() const { return 0 < len && erc != size_t(-1); }
-    } spc(cd);
-
-    if( ! spc.valid() ) {
-      dbgmsg("%s:%d: iconv failed for %s: %s", __func__, __LINE__,
-             tocode, xstrerror(errno));
-      ERROR_FIELD(this, "iconv failed: %s", xstrerror(errno));
-      return data.initial;
+      dbgmsg("'%c' of '%s'[%lu] could not be converted from %s to %s: %s",
+             *inbuf, data.original(), inbuf - data.original(), 
+             cbl_encoding_str(
+                   codeset.default_encodings.current_source_encoding()->type),
+             cbl_encoding_str(codeset.encoding),
+             xstrerror(errno) );
+      bad_boy = inbuf;
+      continue;
     }
-    assert( 0 < spc.len && spc.valid() );
 
-    if( is_literal(this) ) {
-      data.capacity = out - output; // trailing '!' will be overwritten
-    }
-    // Pad with trailing blanks, tacking a '!' on the end.
-    for( const char *eout = output + data.capacity;
-         out < eout;
-         out += spc.len ) {
-      memcpy(out, spc.space, spc.len);
+    if( inbytesleft == 0 ) {
+      if( data.all() ) {
+        for( size_t len = outbuf - data.initial;
+             outbuf + len <= data.initial + data.capacity();
+             outbuf += len ) {
+          std::copy( data.initial, data.initial + len, outbuf );
+        }
+      }
+      if( is_literal(this) ) {
+        data.capacity( outbuf - const_cast<char*>(data.initial) );
+      }
+      return nullptr; // success
     }
-    // Numeric literal strings may have leading zeros, making their length
-    // longer than their capacity.
-    out[0] = type == FldLiteralN? '\0' : '!';
-    assert(out[1] == '\0');
-    data.orig = data.initial;
-    data.initial = output;
-  } else {
-    free(output);
+    // else try again
   }
+  if( 0 == loc.first_line )
+    loc = level == 0 ? yylloc : symbol_field_location(field_index(this));
+  error_msg( loc, "%<%c%> of %qs could not be converted from %s to %s: %s",
+             *bad_boy, data.original(),
+             cbl_encoding_str(
+                    codeset.default_encodings.current_source_encoding()->type),
+             cbl_encoding_str(codeset.encoding),
+             xstrerror(errno) );
+  return data.original();
+}
+
+void
+cbl_field_t::set_capacity(size_t nchar) {
+  switch(this->type) {
+    case FldGroup:
+    case FldAlphanumeric:
+    case FldNumericDisplay:
+    case FldNumericEdited:
+    case FldAlphaEdited:
+    case FldLiteralA:
+    case FldInvalid:
+      if( codeset.valid() ) {
+        if( attr & hex_encoded_e ) {
+          data.capacity( capacity_cast(nchar) );
+        } else {
+          data.capacity( capacity_cast(nchar) * codeset.stride() );
+        }
+          
+      } else {
+        cbl_internal_error("%s: %s %s has invalid encoding",
+                           __func__, cbl_field_type_str(type), name);
+      }
+      break;
+    case FldNumericBinary:
+    case FldFloat:
+    case FldPacked:
+    case FldNumericBin5:
+    case FldLiteralN:
+    case FldClass:
+    case FldConditional:
+    case FldForward:
+    case FldIndex:
+    case FldSwitch:
+    case FldDisplay:
+    case FldPointer:
+      data.capacity( capacity_cast(nchar) );
+      break;
+  }
+}
 
-  return data.initial;
+void
+cbl_field_t::add_capacity(size_t nchar) {
+  data.add_capacity( nchar * codeset.stride() );
+}
+
+uint32_t
+cbl_field_t::char_capacity() const {
+  return data.capacity() / codeset.stride();
 }
 
 const char *
@@ -4153,14 +4328,14 @@ symbol_label_section_exists( size_t eval_label_index ) {
   /*  cppcheck warns that the following statement depends on the order of
       evaluation of side effects.  Since this isn't my code, and since I don't
       think the warning can be eliminated without rewriting it, I am just
-      supprressing it.
+      suppressing it.
       -- Bob Dubner, 2025-07-14 */
   // cppcheck-suppress unknownEvaluationOrder
   bool has_section = std::any_of( ++eval, symbols_end(),
                                [program = eval->program]( const auto& sym ) {
                                  if( program == sym.program && sym.type == SymLabel ) {
                                    const auto& L(sym.elem.label);
-                                  // true if the symbol is an explicit label.
+                                   // true if the symbol is an explicit label.
                                    return L.type == LblSection &&  L.name[0] != '_';
                                  }
                                  return false;
@@ -4869,7 +5044,7 @@ key_field_size( uint32_t sum, size_t ifield ) {
 uint32_t cbl_file_key_t::
 size() {
   if( leftmost != 0 ) {
-    return cbl_field_of(symbol_at(leftmost))->data.capacity;
+    return cbl_field_of(symbol_at(leftmost))->data.capacity();
   }
   return std::accumulate(fields, fields + nfield, 0, key_field_size);
 }
@@ -5152,3 +5327,140 @@ has_value( cbl_field_type_t type ) {
   dbgmsg( "%s:%d: invalid symbol_type_t %d", __func__, __LINE__, type );
   return false;
 }
+
+bool
+validate_numeric_edited(cbl_field_t *field)
+  {
+  // returns TRUE when data.initial is compatible with PICTURE
+  bool retval = true;
+  if( field->type == FldNumericEdited
+      && field->data.original()
+      && !(field->attr & quoted_e)
+      && !(field->attr & FIGCONST_MASK) )
+    {
+    char *expanded = expand_picture(field->data.picture);
+
+    unsigned int decimal_point_local = __gg__decimal_point;
+    const char *pleft   = expanded;
+    const char *pright  = pleft + strlen(pleft);
+    const char *pmiddle = strchr(pleft, decimal_point_local);
+    if( !pmiddle )
+      {
+      pmiddle = pright;
+      }
+
+    // Count up digit placeholders to the left of the the decimal point:
+    int currencies_local = 0;
+    int signs      = 0;
+    int pldigits   = 0;
+    while( pleft < pmiddle )
+      {
+      unsigned int ch = (unsigned char)*pleft++;
+      if( symbol_currency(ch) )
+        {
+        // The very first currency symbol is not a digit placeholder:
+        pldigits += currencies_local;
+        currencies_local = 1;
+        }
+      else if(   ch == (unsigned char)ascii_plus
+              || ch == (unsigned char)ascii_minus )
+        {
+        // The very first sign is not a digit placeholder
+        pldigits += signs;
+        signs = 1;
+        }
+      else if(   ch == (unsigned char)ascii_Z
+              || ch == (unsigned char)ascii_z
+              || ch == (unsigned char)ascii_P
+              || ch == (unsigned char)ascii_p
+              || ch == (unsigned char)ascii_9
+              || ch == (unsigned char)ascii_asterisk )
+        {
+        // 9 Z and * are digit placeholders:
+        pldigits += 1;
+        }
+      }
+
+    // Count up digit placeholders to the left of the the decimal point:
+    int prdigits   = 0;
+    while( pmiddle < pright )
+      {
+      unsigned int ch = (unsigned char)*pmiddle++;
+      if(   ch == (unsigned char)ascii_Z
+         || ch == (unsigned char)ascii_z
+         || ch == (unsigned char)ascii_P
+         || ch == (unsigned char)ascii_p
+         || ch == (unsigned char)ascii_9
+         || ch == (unsigned char)ascii_asterisk )
+        {
+        // 9 Z and * are digit placeholders:
+        prdigits += 1;
+        }
+      }
+
+    // We have established the number of left and right digit placeholders.
+    // We now need to do the same for the number that has to go into those
+    // placeholders.
+
+    const char *dleft   = field->data.original();
+    const char *dright  = dleft + strlen(dleft);
+    const char *dmiddle = strchr(dleft, decimal_point);
+    if( !dmiddle )
+      {
+      dmiddle = dright;
+      }
+
+    // Skip over leading zeros
+    int dldigits = 0;
+    int nonzero  = 0;
+    while(dleft < dmiddle)
+      {
+      unsigned int ch = (unsigned char)*dleft++;
+      if( ch == (unsigned char)ascii_0 )
+        {
+        // We are looking at a zero.  Ignore leading zeroes if we haven't
+        // already seen a digit
+        dldigits += nonzero;
+        }
+      else if(ch >= (unsigned char)ascii_1 && ch <= (unsigned char)ascii_9 )
+        {
+        nonzero = 1;
+        dldigits += nonzero;
+        }
+      }
+
+    // Now count up the digits to the right of the decimal point:
+    int drdigits = 0;
+    // Adjust dright to skip trailing spaces
+    while( dright > dmiddle )
+      {
+      if( (unsigned char)*(dright-1) != (unsigned char)ascii_space )
+        break;
+      dright -= 1;
+      }
+    // Adjust dright to skip trailing zeroes
+    while( dright > dmiddle )
+      {
+      if( (unsigned char)*(dright-1) != (unsigned char)ascii_0 )
+        break;
+      dright -= 1;
+      }
+    // And count up the remaining characters:
+    while( dmiddle < dright )
+      {
+      unsigned int ch = (unsigned char)*dmiddle++;
+      if(ch >= (unsigned char)ascii_0 && ch <= (unsigned char)ascii_9 )
+        {
+        drdigits += 1;
+        }
+      }
+
+    // After all that, the acceptance test is disturbingly simple:
+    if( dldigits > pldigits || drdigits > prdigits )
+      {
+      retval = false;
+      }
+    free(expanded);
+    }
+  return retval;
+  }
index e0a7195822324088384f30e7deeded9224d37fe5..7a362564efe39c055012590d4604fb97611301a9 100644 (file)
@@ -121,6 +121,9 @@ static inline bool gcobol_feature_embiggen() {
     (cbl_gcobol_features & feature_embiggen_e);
 }
 
+bool cobol_alpha_encoding( const char name[] );
+bool cobol_national_encoding( const char name[] );
+
 enum cbl_division_t {
   identification_div_e,
   environment_div_e,
@@ -175,8 +178,6 @@ int cbl_figconst_tok( const char *value );
 enum cbl_figconst_t cbl_figconst_of( const char *value );
 const char * cbl_figconst_str( cbl_figconst_t fig );
 
-const char * consistent_encoding_check( const YYLTYPE& loc, const char input[] );
-
 class cbl_domain_elem_t {
   uint32_t length;
   const char *value;
@@ -186,16 +187,14 @@ class cbl_domain_elem_t {
   cbl_domain_elem_t()
     : length(0), value(NULL), is_numeric(false), all(false)
   {}
-  cbl_domain_elem_t( const YYLTYPE& loc,
-                     bool all,
+  cbl_domain_elem_t( bool all,
                      uint32_t length,
                      const char *value,
                      bool is_numeric = false )
     : length(length), value(value), is_numeric(is_numeric), all(all)
   {
     if( value && ! is_numeric ) {
-      auto s = consistent_encoding_check(loc, value);
-      if( s ) this->value = s;
+      this->value = value;
     }
   }
   const char *name() const { return value; }
@@ -206,12 +205,11 @@ struct cbl_domain_t {
   cbl_domain_elem_t first, last;
   cbl_domain_t() : first(), last(first)
   {}
-  cbl_domain_t( const YYLTYPE& loc,
-                bool all,
+  cbl_domain_t( bool all,
                 uint32_t length,
                 const char * value,
                 bool is_numeric = false )
-    : first(loc, all, length, value, is_numeric), last(first)
+    : first(all, length, value, is_numeric), last(first)
   {}
   cbl_domain_t( const cbl_domain_elem_t& a, const cbl_domain_elem_t& z )
     : first(a)
@@ -256,24 +254,38 @@ enum symbol_type_t {
 // the same.
 #define MAXIMUM_ALPHA_LENGTH 8192
 
-struct cbl_field_data_t {
+class cbl_field_data_t {
+  uint32_t nbyte;            // allocated space
+  struct orig_t {
+    bool all;
+    const char *data;
+    REAL_VALUE_TYPE value;
+    orig_t() : all(false), data(nullptr), value{} {}
+    explicit orig_t( const char *data, bool all = false )
+      : all(all), data(data), value{}
+    {}
+    explicit orig_t( REAL_VALUE_TYPE value )
+      : all(false), data(nullptr), value(value)
+    {}
+  } orig;
+public:
   uint32_t memsize;             // nonzero if larger subsequent redefining field
-  uint32_t capacity,            // allocated space
-           digits;              // magnitude: total digits (or characters)
+  uint32_t digits;              // magnitude: total digits (or characters)
   int32_t  rdigits;             // digits to the right
-  const char *orig, *initial, *picture;
-
-  enum etc_type_t { val88_e, upsi_e, value_e } etc_type;
+  const char *initial, *picture;
+  enum etc_type_t { no_value_e, val88_e, upsi_e, value_e } etc_type;
   const char *
   etc_type_str() const {
     switch(etc_type) {
     case val88_e: return "val88_e";
     case upsi_e: return "upsi_e";
-    case value_e: return "value_e";
+    case no_value_e: return "no value";
+    case  value_e: return  "value_e";
     }
     return "???";
   }
-  
+  bool etc_ok() const { return etc_type != no_value_e; }
+
   union etc_t {
     // "Domain" is an array representing the VALUE of CLASS or 88 type.
     struct val88_t {
@@ -288,44 +300,47 @@ struct cbl_field_data_t {
   } etc;
 
   cbl_field_data_t()
-    : memsize(0)
-    , capacity(0)
+    : nbyte(0)
+    , memsize(0)
     , digits(0)
     , rdigits(0)
-    , orig(0)
     , initial(0)
     , picture(0)
-    , etc_type(value_e)
+    , etc_type(no_value_e)
     , etc()
   {}
 
-  cbl_field_data_t( uint32_t memsize,  uint32_t capacity )
-    : memsize(memsize)
-    , capacity(capacity)
+  cbl_field_data_t( uint32_t memsize,  uint32_t nbyte )
+    : nbyte(nbyte)
+    , memsize(memsize)
     , digits(0)
     , rdigits(0)
-    , orig(0)
     , initial(0)
     , picture(0)
-    , etc_type(value_e)
+    , etc_type(no_value_e)
     , etc()
   {}
 
-  cbl_field_data_t( uint32_t memsize,  uint32_t capacity,
+  cbl_field_data_t( uint32_t memsize,  uint32_t nbyte,
                     uint32_t digits,  uint32_t rdigits,
                     const char *initial,
                     const char *picture = NULL ) 
-    : memsize(memsize)
-    , capacity(capacity)
+    : nbyte(nbyte)
+    , orig(initial)
+    , memsize(memsize)
     , digits(digits)
     , rdigits(rdigits)
-    , orig(0)
-    , initial(initial)
+    , initial(initial) // initial == data.orig.data
     , picture(picture)
-    , etc_type(value_e)
+    , etc_type(no_value_e)
     , etc()
   {}
 
+  inline uint32_t capacity( uint32_t size ) { return nbyte = size; }
+  inline uint32_t capacity() const          { return nbyte; }
+  
+  inline uint32_t add_capacity( uint32_t size ) { return nbyte += size; }
+
   cbl_field_data_t( const cbl_field_data_t& that ) {
     copy_self(that);
   }
@@ -369,8 +384,50 @@ struct cbl_field_data_t {
     return etc.value = build_int_cst_type(integer_type_node, i);
   } 
 
+  tree_code value_type() const {
+    gcc_assert(etc_type == value_e);
+    tree_node *node = TREE_TYPE(etc.value);
+    tree_code  code = TREE_CODE(node);
+    return code;
+  }
+  bool value_is_float() const {
+    gcc_assert(etc_type == value_e);
+    tree_node *node = TREE_TYPE(etc.value);
+    return SCALAR_FLOAT_TYPE_P(node);
+  }
+  bool value_is_fixed() const {
+    gcc_assert(etc_type == value_e);
+    tree_node *node = TREE_TYPE(etc.value);
+    return FIXED_POINT_TYPE_P(node);
+  }
+  bool value_is_integer() const {
+    gcc_assert(etc_type == value_e);
+    tree_node *node = TREE_TYPE(etc.value);
+    return INTEGRAL_TYPE_P(node);
+  }
+
+  // verify is numeric and zero fraction 
+  std::pair<int64_t, bool> int64_of() const {
+    if( etc_type == value_e ) {
+      auto r = TREE_REAL_CST_PTR( value_of() );
+      auto n = real_to_integer(r);
+      REAL_VALUE_TYPE r2;
+      real_from_integer (&r2, VOIDmode, n, SIGNED);
+      // If the orginal value r is equal to r2, derived from its integer
+      // part n, then the fractional component is zero.
+      if( real_identical (r, &r2) ) {
+        return std::make_pair( int64_t(n), true );
+      }
+    }
+    return std::make_pair(int64_t(0), false);
+  }
+
+  bool has_initial_value() const {
+    return orig.data || etc_type != no_value_e;
+  }
+
   void set_real_from_capacity( REAL_VALUE_TYPE *r ) const {
-    real_from_integer (r, VOIDmode, capacity, SIGNED);
+    real_from_integer (r, VOIDmode, nbyte, SIGNED);
   }
 
   time_now_f time_func;
@@ -386,8 +443,8 @@ struct cbl_field_data_t {
   int32_t ldigits() const { return std::max(int(digits), int(digits - rdigits)); }
 
   cbl_field_data_t& valify() {
-    assert(initial);
-    std::string input(initial);
+    assert(orig.data);
+    std::string input(orig.data);
     if( decimal_is_comma() ) {
       std::replace(input.begin(), input.end(), ',', '.');
     }
@@ -409,48 +466,40 @@ struct cbl_field_data_t {
   }
   cbl_field_data_t& valify( const char *input ) {
     assert(input);
-    initial = input;
-    capacity = strlen(initial);
+    original(input);
     return valify();
   }
 
-  // If initial (of Numeric Edited) has any length but capacity, adjust it.  
-  bool manhandle_initial() {
-    assert(capacity > 0);
-    assert(initial != nullptr);
-    if( capacity < strlen(initial) ) {
-      char *p = const_cast<char*>(initial);
-      p[capacity] = '\0';
-      return true;
-    }
-    if( strlen(initial) < capacity ) {
-      auto tgt = reinterpret_cast<char *>( xmalloc(capacity + 1) );
-      auto pend = tgt + capacity;
-      auto p = std::copy(initial, initial + strlen(initial), tgt);
-      std::fill(p, pend, 0x20);
-      p = pend - 1;
-      *p = '\0';
-      initial = tgt;
-    }
-    return false;
+  bool all() const { return orig.data? orig.all : false; }
+  bool is_alpha_edited() const;
+  const char *original() const { return orig.data? orig.data : nullptr; }
+  const REAL_VALUE_TYPE& original_numeric() const { return orig.value; }
+
+  // Set the original string, and set the capacity to its length if nothing
+  // else already did.  This function is used only for VALUE numeric literal,
+  // to preserve the VALUE clause until the field is fully defined.
+  const char *original( const char *orig, bool all = false) {
+    if( nbyte == 0 ) nbyte = strlen(orig);
+    this->orig = orig_t( orig, all );
+    return this->orig.data;
   }
-  bool initial_within_capacity() const {
-    return initial[capacity] == '\0'
-      ||   initial[capacity] == '!';
+  // Set the computed cce value.  Do not impute capacity. 
+  void original( REAL_VALUE_TYPE value ) {
+    orig = orig_t(value);
   }
-  const char *original() const { return orig? orig : initial; }
-
  protected:
   cbl_field_data_t& copy_self( const cbl_field_data_t& that ) {
     memsize = that.memsize;
-    capacity = that.capacity;
+    nbyte = that.nbyte;
     digits = that.digits;
     rdigits = that.rdigits;
+    orig = that.orig;
     initial = that.initial;
     picture = that.picture;
     etc_type = that.etc_type;
 
     switch(etc_type) {
+      case no_value_e:
       case value_e:
         etc.value = that.etc.value;
         break;
@@ -552,6 +601,10 @@ struct cbl_subtable_t {
   size_t offset, isym;
 };
 
+const encodings_t *
+             __gg__encoding_iconv_descr( cbl_encoding_t encoding );
+const encodings_t *
+             __gg__encoding_iconv_descr( const char name[] );
 const char * __gg__encoding_iconv_name( cbl_encoding_t encoding );
 bool         __gg__encoding_iconv_valid( cbl_encoding_t encoding );
 
@@ -584,7 +637,37 @@ struct cbl_field_t {
   uint32_t level;
   cbl_occurs_t occurs;
   struct codeset_t {
-    static const encodings_t standard_internal, source_encodings[2], *source_encoding;
+    struct default_encodings_t {
+      friend bool cobol_alpha_encoding( const char name[] );
+      friend bool cobol_national_encoding( const char name[] );
+      encodings_t alpha, national;
+      const encodings_t possible_sources[2] = {}, *source = 0;
+      
+      default_encodings_t( const encodings_t& alpha,
+                           const std::vector<encodings_t>& possible_sources )
+        : alpha(alpha)
+        , national(alpha)
+      {
+        std::copy(possible_sources.begin(),
+                  possible_sources.end(),
+                  const_cast<encodings_t*>(this->possible_sources));
+        source = this->possible_sources;
+      }
+      const encodings_t *next_source_encoding() {
+        if( ++source < possible_sources + COUNT_OF(possible_sources) ) {
+          return source;
+        }
+        return nullptr;
+      }
+      const encodings_t *current_source_encoding() const {
+        return
+          source < possible_sources + COUNT_OF(possible_sources) ?
+          source : nullptr;
+      }
+
+    };
+    static default_encodings_t default_encodings;
+
     cbl_encoding_t encoding;
     size_t alphabet;  // unlikely
     explicit codeset_t(cbl_encoding_t encoding = custom_encoding_e,
@@ -592,10 +675,15 @@ struct cbl_field_t {
       : encoding(encoding), alphabet(alphabet)
     {}
     bool valid() const {
-      return
-        (alphabet == 0 && encoding != custom_encoding_e)
-        ||
-        (alphabet != 0 && encoding == custom_encoding_e);
+      switch(encoding) {
+      case no_encoding_e:
+        return false;
+      case custom_encoding_e:
+        return alphabet != 0;
+      default:
+        break;
+      }
+      return alphabet == 0;
     }
     bool consistent() const {
       return valid() && ( encoding == current_encoding('A')
@@ -604,12 +692,20 @@ struct cbl_field_t {
                           ||
                           encoding == UTF8_e );
     }
+    // set_explicit overrides an encoding inferred via e.g. PIC 999.
+    bool set_explicit( cbl_encoding_t encoding ) {
+      assert(valid_encoding(encoding));
+      this->encoding = encoding;
+      this->alphabet = 0;
+      return valid();
+    }
+    bool set_per_source() {
+      return set_explicit( default_encodings.source->type );
+    }
     bool set( cbl_encoding_t encoding, size_t alphabet = 0 ) {
       assert(valid_encoding(encoding));
       if( ! valid() ) { // setting first time
-        this->encoding = encoding;
-        this->alphabet = alphabet;
-        return valid();
+        return set_explicit(encoding);
       }
       return this->encoding == encoding && this->alphabet == alphabet;
     }
@@ -643,7 +739,8 @@ struct cbl_field_t {
     cbl_encoding_t set() const {
       return valid()? encoding : cbl_encoding_t(-1);
     }
-    const char *name() const {
+    uint8_t stride() const;
+    inline const char *name() const {
       return valid()? __gg__encoding_iconv_name(encoding) : "nocoding";
     }
   } codeset;
@@ -680,6 +777,19 @@ struct cbl_field_t {
     strcpy(this->name, name);
   }
 
+  cbl_field_t( cbl_field_type_t type, uint64_t attr,
+               const cbl_field_data_t& data,
+               uint32_t level, const cbl_name_t name, 
+               const cbl_field_t::codeset_t& codeset )
+    : offset(0), type(type), usage(FldInvalid), attr(attr)
+    , parent(0), our_index(0), level(level), codeset(codeset)
+    , line(0), name(""), file(0), data(data)
+    , var_decl_node(nullptr), data_decl_node(nullptr)
+  {
+    gcc_assert(strlen(name) < sizeof this->name);
+    strcpy(this->name, name);
+  }
+
   cbl_field_t( cbl_field_type_t type, uint32_t level, int line, uint64_t attr = 0 )
     : offset(0), type(type), usage(FldInvalid), attr(attr)
     , parent(0), our_index(0), level(level)
@@ -702,7 +812,7 @@ struct cbl_field_t {
   }
 
   bool is_valid() const {
-    return data.capacity > 0
+    return data.capacity() > 0
       || level == 88
       || level == 66
       || type == FldClass
@@ -728,7 +838,7 @@ struct cbl_field_t {
   }
 
   bool reasonable_capacity() const {
-    return data.capacity <= MAX_FIXED_POINT_DIGITS;
+    return data.capacity() <= MAX_FIXED_POINT_DIGITS * codeset.stride();
   }
 
   cbl_field_t& same_as( const cbl_field_t& that, bool is_typedef ) {
@@ -746,7 +856,7 @@ struct cbl_field_t {
     return *this;
   }
 
-  void report_invalid_initial_value(const YYLTYPE& loc) const;
+  bool report_invalid_initial_value(const YYLTYPE& loc) const;
 
   bool is_ascii() const;
   bool is_integer() const { return is_numeric(type) && data.rdigits == 0; }
@@ -755,6 +865,11 @@ struct cbl_field_t {
     return type == FldNumericBinary || type == FldNumericBin5;
   }
 
+  bool is_numeric_constant() const {
+    return type == FldLiteralN
+      || (type == FldFloat && has_attr(constant_e));
+  }
+
   HOST_WIDE_INT as_integer() const {
     return real_to_integer( TREE_REAL_CST_PTR (data.value_of()) );
   }
@@ -764,7 +879,7 @@ struct cbl_field_t {
 
     type = FldNumericBin5;
     attr |= embiggened_e;
-    data.capacity = eight;
+    data.capacity(eight);
     data.digits = 0;
   }
 
@@ -783,7 +898,15 @@ struct cbl_field_t {
 
   bool has_subordinate( const cbl_field_t *that ) const;
 
-  const char * internalize();
+  uint32_t char_capacity() const;
+  void set_capacity(size_t cap);
+  void add_capacity(size_t cap);
+  void set_initial( const cbl_loc_t& loc );
+  void set_initial( size_t nchar, const cbl_loc_t& loc = cbl_loc_t() );
+  size_t source_code_check(const void *initial, size_t length);
+  const char * encode( size_t, cbl_loc_t loc = cbl_loc_t());
+  void encode_numeric( const char input[], cbl_loc_t loc,
+                       const REAL_VALUE_TYPE& rvt = {});
   const char *value_str() const;
 
   bool is_key_name() const { return has_attr(record_key_e); }
@@ -792,7 +915,7 @@ struct cbl_field_t {
     return data.digits?
       long(data.digits) - data.rdigits
       :
-      data.capacity;
+      data.capacity();
   }
   uint32_t size() const; // table capacity or capacity
 
@@ -804,6 +927,7 @@ struct cbl_field_t {
   inline const char * level_str() const {
     return level_str(level);
   }
+  void blank_initial( size_t len, cbl_figconst_t figconst = normal_value_e );
 };
 
 const cbl_field_t * cbl_figconst_field_of( const char *value );
@@ -1374,13 +1498,19 @@ struct label_cmp_lessthan {
 
 size_t field_index( const cbl_field_t *f );
 
+cbl_field_t * new_literal_float( const cbl_loc_t& loc, const char initial[] );
+
 cbl_field_t * new_temporary( enum cbl_field_type_t type,
                              const char initial[] = NULL,
-                             bool attr = false );
+                             cbl_field_attr_t = none_e );
 cbl_field_t * new_temporary_like( cbl_field_t skel );
 cbl_field_t * new_temporary_clone( const cbl_field_t *orig);
 cbl_field_t * keep_temporary( cbl_field_type_t type );
 
+cbl_field_t * new_literal_2( uint32_t len, const char initial[],
+                             cbl_field_attr_t attr,
+                             cbl_encoding_t encoding = ASCII_e );
+
 cbl_field_t * new_literal( uint32_t len, const char initial[],
                            cbl_field_attr_t attr,
                            cbl_encoding_t encoding = ASCII_e );
@@ -1391,19 +1521,34 @@ new_literal( uint32_t len, const char initial[] ) {
 }
 
 void symbol_temporaries_free();
+void symbol_temporary_location( const cbl_field_t *field,
+                                const cbl_loc_t& loc);
+cbl_loc_t symbol_temporary_location( const cbl_field_t *field );
 
 class temporaries_t {
-  friend void symbol_temporaries_free();
-  struct literal_an {
+  friend void symbol_temporaries_free();    
+  friend void symbol_temporary_location( const cbl_field_t *field,
+                                         const cbl_loc_t& loc);
+  friend cbl_loc_t symbol_temporary_location( const cbl_field_t *field );
+
+  class literal_an {
     bool is_quoted, is_verbatim; // verbatim: don't use codeset
+   public:
     std::string value;
     literal_an() : is_quoted(false), is_verbatim(false), value("???") {}
     literal_an( const char value[], bool is_quoted, bool is_verbatim = false )
       : is_quoted(is_quoted), is_verbatim(is_verbatim), value(value) {}
+    literal_an( size_t len, const char value[] )
+      : is_quoted(true), is_verbatim(false), value(value, len)
+    {
+      gcc_assert(0 < len);
+      gcc_assert(value[len-1] == '\0');
+      gcc_assert(this->value.back() == '\0');
+    }
     literal_an( const literal_an& that )
-      : is_quoted(that.is_quoted),
-        is_verbatim(that.is_verbatim),
-        value(that.value)
+      : is_quoted(that.is_quoted)
+      , is_verbatim(that.is_verbatim)
+      , value(that.value)
     {}
     literal_an& operator=( const literal_an& that ) {
       is_quoted = that.is_quoted;
@@ -1413,16 +1558,21 @@ class temporaries_t {
     }
     bool operator<( const literal_an& that ) const {
       if( value == that.value ) { // alpha before numeric
-        if( is_quoted == that.is_quoted ) { // verbatim before not
-          return (is_verbatim? 0 : 1)  < (that.is_verbatim? 0 : 1);
+        if( is_quoted == that.is_quoted ) { // non-verbatim first
+          return that.is_verbatim;
         }
-        return (is_quoted? 0 : 1)  < (that.is_quoted? 0 : 1);
+        return that.is_quoted; // unquoted first
       }
       return value < that.value;
     }
+    bool terminated() const {
+      // Z strings include the NUL terminator.   
+      return !is_verbatim && is_quoted && !value.empty() && '\0' == value.back();
+    }
   };
 
   std::map<literal_an, cbl_field_t *> literals;
+  std::map<const cbl_field_t*, cbl_loc_t> locs;
   typedef std::set<cbl_field_t *> fieldset_t;
   typedef std::map<cbl_field_type_t, fieldset_t> fieldmap_t;
   fieldmap_t used, freed;
@@ -2039,6 +2189,8 @@ struct symbol_elem_t {
   }
 };
 
+#pragma GCC diagnostic push
+#pragma GCC diagnostic ignored "-Winvalid-offsetof"
 # define offsetof(TYPE, MEMBER)  __builtin_offsetof (TYPE, MEMBER)
 
 static inline symbol_elem_t *
@@ -2110,6 +2262,7 @@ symbol_elem_of( const cbl_field_t *field ) {
     // cppcheck-suppress cstyleCast
     reinterpret_cast<const symbol_elem_t *>((const char*)field - n);
 }
+#pragma GCC diagnostic pop
 
 symbol_elem_t * symbols_begin( size_t first = 0 );
 symbol_elem_t * symbols_end(void);
@@ -2307,7 +2460,7 @@ struct cbl_until_addresses_t {
 
 size_t symbol_index(); // nth after first program symbol
 size_t symbol_index( const symbol_elem_t *e );
-size_t symbol_unique_index( const struct symbol_elem_t *e );
+uint64_t symbol_unique_index( const struct symbol_elem_t *e );
 
 struct symbol_elem_t * symbol_at( size_t index );
 
@@ -2899,6 +3052,8 @@ const char * symbol_type_str( enum symbol_type_t type );
 const char * cbl_field_type_str( enum cbl_field_type_t type );
 const char * cbl_logop_str( enum logop_t op );
 
+const char * cbl_field_type_name( enum cbl_field_type_t type ); // for messages
+
 static inline const char *
 refer_type_str( const cbl_refer_t *r ) {
   return r && r->field? cbl_field_type_str(r->field->type) : "(none)";
@@ -2923,4 +3078,6 @@ size_t count_characters(const char *in, size_t length);
 
 void current_enabled_ecs( tree ena );
 
+bool validate_numeric_edited(cbl_field_t *field);
+
 #endif
index 8ce64728b238b047cb655cbfde05359523ddcf01..45bcc78d3276e681ab59d4a3feab032775603b34 100644 (file)
@@ -1,5 +1,5 @@
 // generated by /home/jklowden/projects/3rd/gcc/parser/gcc/cobol/token_names.h.gen cobol/parse.h
-// Wed Nov 26 11:57:23 EST 2025
+// Sun Jan 11 18:01:04 EST 2026
 tokens = {
        { "identification", IDENTIFICATION_DIV }, // 258
        { "environment", ENVIRONMENT_DIV }, // 259
index 3a01e867aa7385402b39737dae6dc3389edc197c..f27db2ad6e2eb8dab254089639a7eba1879419a6 100644 (file)
  * header files.
  */
 
-#include <cobol-system.h>
+#include "cobol-system.h"
 #include <coretypes.h>
 #include <tree.h>
+#include <fold-const.h>
 #undef yy_flex_debug
 
 #include <langinfo.h>
@@ -48,8 +49,8 @@
 #include <backtrace.h>
 #include <diagnostic.h>
 #include <opts.h>
-#include "util.h"
 
+#include "util.h"
 #include "cbldiag.h"
 #include "cdfval.h"
 #include "lexio.h"
@@ -62,6 +63,7 @@
 #include "genapi.h"
 #include "genutil.h"
 #include "../../libgcobol/charmaps.h"
+#include "../../libgcobol/valconv.h"
 
 #pragma GCC diagnostic ignored "-Wunused-result"
 #pragma GCC diagnostic ignored "-Wmissing-field-initializers"
@@ -269,32 +271,31 @@ cdf_literalize( const std::string& name, const cdfval_t& value ) {
     if( value.is_numeric() ) {
       auto initial = xasprintf("%ld", (long)value.as_number());
       auto len = strlen(initial);
-      cbl_field_data_t data(len, len);
-      data.initial = initial;
+      cbl_field_data_t data(len, len, len,0, initial); // digits == len, no rdigits
       data.valify();
       field = cbl_field_t{ FldLiteralN, constant_e, data, 0, name.c_str()};
     } else {
       auto len = strlen(value.string);
       cbl_field_data_t data(len, len);
-      data.initial = xstrdup(value.string);
+      data.original(xstrdup(value.string));
       field = cbl_field_t{ FldLiteralA, constant_e, data, 0, name.c_str() };
       field.set_attr(quoted_e);
     }
-    field.codeset.set();
 
+    field.codeset.set();
     return field;
 }
 
-const std::list<cbl_field_t> 
+const std::list<cbl_field_t>
 cdf_literalize() {
   std::list<cbl_field_t> fields;
   auto dict = cdf_dictionary();
-  
+
   for( auto elem : dict ) {
     std::string name(elem.first);
     const cdfval_t& value(elem.second);
-    
-    fields.push_back(cdf_literalize(name, value));    
+
+    fields.push_back(cdf_literalize(name, value));
   }
   return fields;
 }
@@ -371,6 +372,53 @@ cbl_field_type_str( enum cbl_field_type_t type )
   return "???";
 }
 
+const char *
+cbl_field_type_name( enum cbl_field_type_t type )
+{
+  switch(type) {
+  case FldDisplay:
+    return "DISPLAY";
+  case FldInvalid:
+    return ""; // Invalid";
+  case FldGroup:
+    return "GROUP";
+  case FldAlphanumeric:
+    return "ALPHANUMERIC";
+  case FldNumericBinary:
+    return "NUMERIC-BINARY";
+  case FldFloat:
+    return "FLOAT";
+  case FldNumericBin5:
+    return "COMPUTATIONAL-5";
+  case FldPacked:
+    return "PACKED-DECIMAL";
+  case FldNumericDisplay:
+    return "NUMERIC-DISPLAY";
+  case FldNumericEdited:
+    return "NUMERIC-EDITED";
+  case FldAlphaEdited:
+    return "ALPHANUMERIC-EDITED";
+  case FldLiteralA:
+    return "ALPHANUMERIC LITERAL";
+  case FldLiteralN:
+    return "NUMERIC LITERAL";
+  case FldClass:
+    return "CLASS";
+  case FldConditional:
+    return "CONDITIONAL";
+  case FldForward:
+    return "FORWARD";
+  case FldIndex:
+    return "INDEX";
+  case FldSwitch:
+    return "SWITCH";
+  case FldPointer:
+    return "POINTER";
+ }
+  cbl_internal_error("%s:%d: invalid %<symbol_type_t%> %d", __func__, __LINE__, type);
+  return "???";
+}
+
 const char *
 cbl_logop_str( enum logop_t op )
 {
@@ -404,7 +452,7 @@ determine_intermediate_type( const cbl_refer_t& aref,
   if( aref.field->type == FldFloat || bref.field->type == FldFloat )
     {
     output.type = FldFloat;
-    output.data.capacity = 16;
+    output.data.capacity(16);
     output.attr = (intermediate_e );
     }
   else if(   op == '*'
@@ -412,13 +460,13 @@ determine_intermediate_type( const cbl_refer_t& aref,
                                                       > MAX_FIXED_POINT_DIGITS)
     {
     output.type = FldFloat;
-    output.data.capacity = 16;
+    output.data.capacity(16);
     output.attr = (intermediate_e );
     }
   else
     {
     output.type = FldNumericBin5;
-    output.data.capacity = 16;
+    output.data.capacity(16);
     output.data.digits   = MAX_FIXED_POINT_DIGITS;
     output.attr = (intermediate_e | signable_e );
     }
@@ -449,20 +497,23 @@ const char *numed_message;
 extern int yydebug, yy_flex_debug;
 
 bool
-is_alpha_edited( const char picture[] ) {
+cbl_field_data_t::is_alpha_edited() const {
   static const char valid[] = "abxABX90/(),.";
   assert(picture);
-
-  for( const char *p = picture; *p != '\0'; p++ ) {
-    if( strchr(valid, *p) ) continue;
-    if( ISDIGIT(*p) ) continue;
-    if( symbol_decimal_point() == *p ) continue;
-    if( symbol_currency(*p) ) continue;
-
-    if( yydebug ) {
-      dbgmsg( "%s: bad character '%c' at %.*s<-- in '%s'",
-             __func__, *p, int(p - picture) + 1, picture, picture );
-    }
+  auto ep = picture + strlen(picture);
+
+  // Find first character that is not part of an alpha-edited PICTURE.
+  auto p = std::find_if( picture, ep,
+                         []( char ch ) {
+                           if( strchr(valid, ch) ) return false;
+                           if( ISDIGIT(ch) ) return false;
+                           if( symbol_decimal_point() == ch ) return false;
+                           if( symbol_currency(ch) ) return false;
+                           return true;
+                         } );
+  if( p != ep ) {
+    dbgmsg( "%s: bad character '%c' at %.*s<-- in '%s'",
+            __func__, *p, int(p - picture) + 1, picture, picture );
     return false;
   }
   return true;
@@ -820,7 +871,7 @@ symbol_field_type_update( cbl_field_t *field,
     case FldPointer:
       // set the type
       field->type = candidate;
-      if( field->data.capacity == 0 ) {
+      if( field->data.capacity() == 0 ) {
         static const cbl_field_data_t data = {0, 8, 0, 0, NULL};
         field->data = data;
         field->attr &= ~size_t(signable_e);
@@ -927,10 +978,7 @@ symbol_field_type_update( cbl_field_t *field,
     case FldNumericDisplay:
     case FldAlphaEdited:
     case FldNumericEdited:
-      {
-      bool retval = field->codeset.set();
-      return retval;
-      }
+      return field->codeset.set();
     default:
       break;
     }
@@ -1002,7 +1050,7 @@ redefine_field( cbl_field_t *field ) {
     field->data.initial = NULL;
   }
 
-  if( field->data.capacity == 0 ) field->data = primary->data;
+  if( field->data.capacity() == 0 ) field->data = primary->data;
 
   if( is_numeric(field->type) && field->usage == FldDisplay ) {
     fOK = symbol_field_type_update(field, FldNumericDisplay, false);
@@ -1011,24 +1059,820 @@ redefine_field( cbl_field_t *field ) {
   return fOK;
 }
 
+static
+FIXED_WIDE_INT(128)
+dirty_to_binary(const char  *instring,
+                uint32_t    &capacity,
+                uint32_t    &digits,
+                int32_t     &rdigits,
+                uint64_t    &attr)
+  {
+  digits = 0;
+  rdigits = 0;
+  attr = 0;
+
+  FIXED_WIDE_INT(128) value = 0;
+
+  // We need to convert data.initial to an FIXED_WIDE_INT(128) value
+  const char *p = instring;
+  int sign = 1;
+  bool ignore_zeroes = true;
+  if( *p == '-' )
+    {
+    attr |= signable_e;
+    sign = -1;
+    p += 1;
+    }
+  else if( *p == '+' )
+    {
+    // We set it signable so that the instruction DISPLAY +1
+    // actually outputs "+1"
+    attr |= signable_e;
+    p += 1;
+    }
+
+  //  We need to be able to handle
+  //  123
+  //  123.456
+  //  123E<exp>
+  //  123.456E<exp>
+  //  where <exp> can be N, +N and -N
+  //
+
+  int rdigit_delta = 0;
+  int exponent = 0;
+  const char *exp = strchr(p, 'E');
+  if( !exp )
+    {
+    exp = strchr(p, 'e');
+    }
+  if(exp)
+    {
+    exponent = atoi(exp+1);
+    }
+
+  // We can now calculate the value, and the number of digits and rdigits.
+
+  // We trim off leading zeroes before the decimal point, and trailing zeroes
+  // after a decimal point.
+
+  const char *pend = exp;
+  if( !exp )
+    {
+    pend = instring + strlen(instring);
+    }
+
+  const char *pdecimal = strchr(instring, symbol_decimal_point());
+  if( pdecimal )
+    {
+    while( pend > instring && *(pend-1) == '0' )
+      {
+      pend -= 1;
+      }
+    }
+
+  while(p < pend)
+    {
+    char ch = *p++;
+    if( ch == symbol_decimal_point() )
+      {
+      rdigit_delta = 1;
+      ignore_zeroes = false;
+      continue;
+      }
+    if( ignore_zeroes && ch == '0' )
+      {
+      continue;
+      }
+    ignore_zeroes = false;
+    if( ch < '0' || ch > '9' )
+      {
+      break;
+      }
+    digits += 1;
+    rdigits += rdigit_delta;
+    value *= 10;
+    value += ch - '0';
+    }
+
+  if( exponent < 0 )
+    {
+    rdigits += -exponent;
+    }
+  else
+    {
+    while(exponent--)
+      {
+      if(rdigits)
+        {
+        rdigits -= 1;
+        }
+      else
+        {
+        digits += 1;
+        value *= 10;
+        }
+      }
+    }
+
+  if( (int32_t)digits < rdigits )
+    {
+    digits = rdigits;
+    }
+
+  // We now need to calculate the capacity.
+
+  unsigned int min_prec = wi::min_precision(value, UNSIGNED);
+  if( min_prec > 64 )
+    {
+    // Bytes 15 through 8 are non-zero
+    capacity = 16;
+    }
+  else if( min_prec > 32 )
+    {
+    // Bytes 7 through 4 are non-zero
+    capacity = 8;
+    }
+  else if( min_prec > 16 )
+    {
+    // Bytes 3 and 2
+    capacity = 4;
+    }
+  else if( min_prec > 8 )
+    {
+    // Byte 1 is non-zero
+    capacity = 2;
+    }
+  else
+    {
+    // The value is zero through 0xFF
+    capacity = 1;
+    }
+
+  value *= sign;
+
+  // One last adjustment.  The number is signable, so the binary value
+  // is going to be treated as twos complement.  That means that the highest
+  // bit has to be 1 for negative signable numbers, and 0 for positive.  If
+  // necessary, adjust capacity up by one byte so that the variable fits:
+
+  if( capacity < 16 && (attr & signable_e) )
+    {
+    FIXED_WIDE_INT(128) mask
+      = wi::set_bit_in_zero<FIXED_WIDE_INT(128)>(capacity * 8 - 1);
+    if( wi::neg_p (value) && (value & mask) == 0 )
+      {
+      capacity *= 2;
+      }
+    else if( !wi::neg_p (value) && (value & mask) != 0 )
+      {
+      capacity *= 2;
+      }
+    }
+
+  return value;
+  }
+
+static void
+digits_from_int128( char                *ach,
+                    cbl_field_t         *field,
+                    uint32_t             desired_digits,
+                    FIXED_WIDE_INT(128)  value128, // cppcheck-suppress unknownMacro
+                    int32_t              rdigits)
+  {
+  if( value128 < 0 )
+    {
+    value128 = -value128;
+    }
+
+  // 'rdigits' are the number of rdigits in value128.
+
+  int scaled_rdigits = get_scaled_rdigits(field);
+
+  int i = field->data.rdigits;
+  while( i<0 )
+    {
+    value128 = value128/10;
+    i += 1;
+    }
+
+  // We take the digits of value128, and put them into ach.  We line up
+  // the rdigits, and we truncate the string after desired_digits
+  while(rdigits < scaled_rdigits)
+    {
+    value128 *= 10;
+    rdigits += 1;
+    }
+  while(rdigits > scaled_rdigits)
+    {
+    value128 = value128 / 10;
+    rdigits -= 1;
+    }
+  char conv[128];
+  print_dec (value128, conv, SIGNED);
+  size_t len = strlen(conv);
+
+  if( len<desired_digits )
+    {
+    memset(ach, ascii_0, desired_digits - len);
+    strcpy(ach+desired_digits - len, conv);
+    }
+  else
+    {
+    strcpy(ach, conv + len-desired_digits);
+    }
+  }
+
+static
+void
+binary_initial( char *retval,
+                cbl_field_t *field,
+                FIXED_WIDE_INT(128) value,
+                int drdigits)
+  {
+  // This routine returns an xmalloced buffer designed to replace the
+  // data.initial member of the incoming field
+
+  int scaled_rdigits = get_scaled_rdigits(field);
+
+  int i = field->data.rdigits;
+  while( i<0 )
+    {
+    value = value/10;
+    i += 1;
+    }
+
+  // We take the digits of value, and put them into ach.  We line up
+  // the rdigits, and we truncate the string after desired_digits
+  while(drdigits < scaled_rdigits)
+    {
+    value *= 10;
+    drdigits += 1;
+    }
+  while(drdigits > scaled_rdigits)
+    {
+    value = value / 10;
+    drdigits -= 1;
+    }
+
+  switch(field->data.capacity())
+    {
+    tree type;
+    case 1:
+    case 2:
+    case 4:
+    case 8:
+    case 16:
+      type = build_nonstandard_integer_type ( field->data.capacity()
+                                              * BITS_PER_UNIT, 0);
+      native_encode_wide_int (type, value, PTRCAST(unsigned char, retval),
+                              field->data.capacity());
+      break;
+    default:
+      fprintf(stderr,
+              "Trouble in binary_initial at %s() %s:%d\n",
+              __func__,
+              __FILE__,
+              __LINE__);
+      abort();
+      break;
+    }
+  }
+
+/*
+ * Preconditions:
+ *  1.  input is not NULL
+ *  2.  type is numeric
+ *  3.  input conforms to type (will fit, allows sign, etc.)
+ *  4.  capacity set per PICTURE and USAGE, or
+ *      type == FldLiteralN and data.capacity == 0
+ *  5.  data_initial has been established with data.capacity() bytes, unless
+ *      FldLiteralN, in which case we will malloc data.initial.
+ *
+ * Process:
+ *  Convert input string to binary Host representation:
+ *    FldFloat: as tree
+ *    other (fixed point): as FIXED_WIDE_INT(128)
+ *  Set etc union via assignment, cbl_field_data_t::operator=().
+ *  That sets the correct member in the union and etc_type.
+ *
+ *  As of Mon Jan 5 13:32:32 2026, use gcc_assert for preconditions. A location
+ *  is provided so diagnositics can be issued.  We may remove precondition
+ *  verification from the caller and move error handling here.
+ *
+ *  Post condition:
+ *    etc union holds Host numeric value.
+ *    data.initial is NULL for error, else points to data.etc.
+ */
+
 void
+cbl_field_t::encode_numeric( const char input[], cbl_loc_t loc,
+                             const REAL_VALUE_TYPE& /*rvt*/ ) {
+  gcc_assert(input);
+  gcc_assert(is_numeric(this) || type == FldNumericEdited);
+
+  // The following are intended to test the preconditions....
+  if( type == FldLiteralN ) {
+    if( 0 < data.capacity() ) {
+      error_msg(loc, "unexpected nonzero numeric literal capacity");
+    }
+    if( data.initial != nullptr ) {
+      error_msg(loc, "unexpected initial value for numeric literal");
+    }
+  } else {
+    if( 0 == data.capacity() ) {
+      error_msg(loc, "unexpected zero capacity numeric nonliteral");
+    }
+  }
+
+  gcc_assert(0 < data.capacity() || type == FldLiteralN);
+  gcc_assert( data.initial == nullptr
+              || type == FldLiteralN
+              || data.capacity() <= strlen(data.initial)
+              || 1 < codeset.stride() );
+
+  if( type == FldFloat )
+    {
+    double d;
+    int n;
+    int erc = sscanf(input, "%lf%n", &d, &n);
+    if( erc < 0 || size_t(n) != strlen(input) )
+      {
+      dbgmsg("%s: error: could not interpret '%s' of '%s' as a number",
+             __func__, input + n, input);
+      gcc_assert(false);
+      }
+    REAL_VALUE_TYPE value;
+    real_from_string (&value, input);
+    value = real_value_truncate (TYPE_MODE (float128_type_node), value);
+    data = build_real(float128_type_node, value);
+    // Turn that back into a REAL_VALUE_TYPE with
+    // REAL_VALUE_TYPE readback_value = TREE_REAL_CST(data.etc.value);
+
+#define FOR_JIM 0
+#if FOR_JIM
+    {
+    // When you know data.etc.value was created with build_real()
+    enum tree_code code = TREE_CODE(TREE_TYPE(data.etc.value));
+    // code will be REAL_TYPE
+
+    REAL_VALUE_TYPE readback_value = TREE_REAL_CST(data.etc.value);
+    char ach[48];
+    size_t number_of_digits = 33;
+    bool crop_trailing_zeroes = true;
+    real_to_decimal(ach,
+                    &readback_value,
+                    sizeof(ach),
+                    number_of_digits,
+                    crop_trailing_zeroes);
+    fprintf(stderr, "FOR_JIM: %s real_value: %s\n", get_tree_code_name(code), ach);
+    }
+#endif
+
+    unsigned char *retval =
+                        static_cast<unsigned char *>(xmalloc(data.capacity()));
+    assert(retval);
+    switch( data.capacity() )
+      {
+      case 4:
+        value = real_value_truncate (TYPE_MODE (float32_type_node), value);
+        native_encode_real(SCALAR_FLOAT_TYPE_MODE (float32_type_node), &value,
+                            retval, 4, 0);
+        break;
+      case 8:
+        value = real_value_truncate (TYPE_MODE (float64_type_node), value);
+        native_encode_real(SCALAR_FLOAT_TYPE_MODE (float64_type_node), &value,
+                            retval, 8, 0);
+        break;
+      case 16:
+        // 'value' is already a truncated float128
+        native_encode_real(SCALAR_FLOAT_TYPE_MODE (float128_type_node), &value,
+                            retval, 16, 0);
+        break;
+      default:
+        gcc_assert(false);
+        break;
+      }
+    data.initial = reinterpret_cast<char *>(retval);
+    }
+  else
+    {
+    uint32_t l_capacity;
+    uint32_t l_digits;
+    int32_t  l_rdigits;
+    uint64_t l_attr;
+    // The following returned capacity is 1, 2, 4, 8, or 16, for the binary
+    // value.
+    FIXED_WIDE_INT(128)value = dirty_to_binary(input,
+                                               l_capacity,
+                                               l_digits,
+                                               l_rdigits,
+                                               l_attr);
+    data = wide_int_to_tree(intTI_type_node, value);
+    // turn that back into a FIXED_WIDE_INT with
+    // wi::tree_to_wide_ref iii = wi::to_wide( data.etc.value );
+
+#if FOR_JIM
+    {
+    // When you know data.etc.value was created with wide_int_to_tree.
+    enum tree_code code = TREE_CODE(TREE_TYPE(data.etc.value));
+    // code will be INTEGER_TYPE
+
+    wi::tree_to_wide_ref iii = wi::to_wide( data.etc.value );
+    char ach[60];
+    print_dec(iii, ach, SIGNED);
+    fprintf(stderr, "FOR_JIM: %s fixed_value: %s\n", get_tree_code_name(code), ach);
+    }
+#endif
+
+    if( data.capacity() == 0 )
+      {
+      // It falls to us to establish these parameters:
+      data.capacity(  l_capacity );
+      data.digits   = l_digits;
+      data.rdigits  = l_rdigits;
+      attr         |= l_attr;
+      data.initial = static_cast<char *>(xmalloc(data.capacity()));
+      gcc_assert(data.initial);
+      }
+    else if( !(attr & quoted_e) )
+      {
+      // quoted_e at this point means numeric edited, which gets the initial
+      // value verbatim.
+      if( l_attr & signable_e && !(attr & signable_e) && value < 0)
+        {
+          if( type != FldNumericEdited || (data.picture && data.picture[0] != '-')) {
+          error_msg(loc, "%qs has unsigned PICTURE but signed VALUE %qs",
+                    this->name, data.original());
+          }
+        }
+
+      if( data.digits && value != 0)
+        {
+        // We were supplied with parameters.  We now make sure they are
+        // consistent.
+        if( attr & scaled_e )
+          {
+          if( data.rdigits > 0 )
+            {
+            // This is like PIC PPP9999, with digits=4 and rdigits=3
+            if( l_digits != static_cast<uint32_t>(l_rdigits) )
+              {
+              error_msg(loc, "The magnitude is too large");
+              }
+            else
+              {
+              // This is like PIC PP999, with digits=3 and rdigits=2
+              if( data.digits + data.rdigits < l_digits )
+                {
+                error_msg(loc, "Too many significant digits");
+                }
+              else
+                {
+                // We know the abs(value) is less than 1, and we know that the
+                // fractional part fits into (data.digits + data.rdigits) to
+                // the right of the decimal point.  We need to make sure that
+                // the top rdigits of value are zero.
+
+                FIXED_WIDE_INT(128)tester = value;
+                if( tester < 0 )
+                  {
+                  tester = - tester;
+                  }
+                // The final value will have data.digits + l_rdigits decimal
+                // places.  Let's scale rvalue to that range, taking into
+                // account that we already have l_rdigits of those places.
+                tester *= 
+                     get_power_of_ten(data.digits + data.rdigits - l_rdigits);
+
+                // In the case of PPP9999, tester needs to be between 1 and
+                // 9999.  data.digits is 4, so....
+                if( tester >= get_power_of_ten(data.digits) )
+                  {
+                  error_msg(loc, "The fractional part is too large");
+                  }
+                }
+              }
+            }
+          else
+            {
+            // This is like PIC 999PP, with digits=3 and rdigits=-2
+            if( data.digits-data.rdigits < l_digits )
+              {
+              error_msg(loc, "Too many leading digits");
+              }
+            // We need to make sure the bottom -rdigits places are zero:
+            FIXED_WIDE_INT(128)v = value;
+            for(int32_t i=0; i < -data.rdigits; i++)
+              {
+              if( v % 10 != 0)
+                {
+                error_msg(loc, "P-scaled digits are nonzero");
+                break;
+                }
+              v = v / 10;
+              }
+            }
+          }
+        else if( !(attr & quoted_e) )
+          {
+          if( data.rdigits == 0 && l_rdigits > 0)
+            {
+            // This is a condition that the parser finds before we can:
+            }
+          if( data.rdigits && data.rdigits < l_rdigits )
+            {
+            // This is a condition that the parser finds before we can:
+            }
+          if( l_digits - l_rdigits > data.digits - data.rdigits )
+            {
+            error_msg(loc, "VALUE has too many integer digits");
+            }
+          }
+        }
+      }
+
+    char *retval;
+    if( data.initial )
+      {
+      retval = const_cast<char *>(data.initial);
+      }
+    else
+      {
+      retval = static_cast<char *>(xmalloc(data.capacity()));
+      data.initial = retval;
+      }
+
+    switch(type)
+      {
+      case FldNumericBin5:
+      case FldLiteralN:
+        {
+        binary_initial(retval, this, value, l_rdigits);
+        break;
+        }
+      case FldNumericBinary:
+        {
+        binary_initial(retval, this, value, l_rdigits);
+        if( attr & big_endian_e )
+          {
+          // This is a big-endian value, so swap retval end-for-end:
+          size_t left = 0;
+          size_t right = data.capacity() - 1;
+          while(left < right)
+            {
+            std::swap(retval[left++], retval[right--]);
+            }
+          }
+        break;
+        }
+      case FldPacked:
+        {
+        char *pretval = retval;
+        char ach[128];
+
+        bool negative;
+        if( value < 0 )
+          {
+          negative = true;
+          value = -value;
+          }
+        else
+          {
+          negative = false;
+          }
+
+        // For COMP-6 (flagged by packed_no_sign_e), the number of required
+        // digits is twice the capacity.
+
+        // For COMP-3, the number of digits is 2*capacity minus 1, because the
+        // the final "digit" is a sign nybble.
+
+        size_t ndigits =   (attr & packed_no_sign_e)
+                         ? data.capacity() * 2
+                         : data.capacity() * 2 - 1;
+
+        digits_from_int128(ach, this, ndigits, value, l_rdigits);
+
+        const char *digits = ach;
+        for(size_t i=0; i<ndigits; i++)
+          {
+          if( !(i & 0x01) )
+            {
+            *pretval    = ((*digits++) & 0x0F)<<4;;
+            }
+          else
+            {
+            *pretval++ += (*digits++) & 0x0F;
+            }
+          }
+        if( !(attr & packed_no_sign_e) )
+          {
+          // This is COMP-3, so put in a sign nybble
+          if( attr & signable_e )
+            {
+            if( negative )
+              {
+              *pretval++ += 0x0D;   // Means signable and negative
+              }
+            else
+              {
+              *pretval++ += 0x0C;   // Means signable and non-negative
+              }
+            }
+          else
+            {
+            *pretval++ += 0x0F;     // Means not signable
+            }
+          }
+        break;
+        }
+
+      case FldNumericDisplay:
+        {
+        // We are going to take the numerical value and convert it to the form
+        // specified by the attributes, digits, and rdigits.
+
+        char *pretval = retval;
+        char ach[128];
+
+        bool negative;
+        if( value < 0 )
+          {
+          negative = true;
+          value = - value;
+          }
+        else
+          {
+          negative = false;
+          }
+        digits_from_int128(ach, this, data.digits, value, l_rdigits);
+        const char *digits = ach;
+        if(    (attr & signable_e)
+            && (attr & separate_e)
+            && (attr & leading_e ) )
+          {
+          // This zoned decimal value is signable, separate, and leading.
+          if( negative )
+            {
+            *pretval++ = ascii_minus;
+            }
+          else
+            {
+            *pretval++ = ascii_plus;
+            }
+          }
+        for(size_t i=0; i<data.digits; i++)
+          {
+          // Start by assuming it's an value that can't be signed
+          *pretval++ = ascii_0 + ((*digits++) & 0x0F);
+          }
+        if(     (attr & signable_e)
+            &&  (attr & separate_e)
+            && !(attr & leading_e ) )
+          {
+          // The value is signable, separate, and trailing
+          if( negative )
+            {
+            *pretval++ = ascii_minus;
+            }
+          else
+            {
+            *pretval++ = ascii_plus;
+            }
+          }
+
+        // It's at this point we convert to the target encoding:
+        charmap_t *charmap = __gg__get_charmap(codeset.encoding);
+        size_t retval_length = pretval - retval;
+        if( retval_length != char_capacity() ) {
+          cbl_errx( "%s: %s %lu %s %lu",
+                    name,
+                    "retval_length",
+                    (unsigned long)retval_length,
+                    "!= char_capacity()",
+                    (unsigned long)char_capacity());
+        }
+        gcc_assert(retval_length == char_capacity());
+        size_t nbytes;
+        const char *converted = __gg__iconverter(DEFAULT_SOURCE_ENCODING,
+                                                 codeset.encoding,
+                                                 retval,
+                                                 retval_length,
+                                                 &nbytes);
+        if( nbytes != data.capacity() ) {
+          cbl_errx( "%s: nbytes %lu %s %lu",
+                    name,
+                    (unsigned long)nbytes,
+                    "!= data.capacity()",
+                    (unsigned long)data.capacity());
+        }
+        gcc_assert(nbytes == data.capacity());
+        memcpy(retval, converted, data.capacity());
+        if(     (attr & signable_e)
+            && !(attr & separate_e) )
+          {
+          // This value is signable, and not separate.  So, the sign
+          // information goes into the first or last byte:
+          char *sign_location = attr & leading_e
+                        ? retval
+                        : retval + (data.digits-1) * charmap->stride() ;
+          cbl_char_t schar = charmap->set_digit_negative(*sign_location,
+                                                          negative);
+          memcpy(sign_location, &schar, charmap->stride());
+          }
+        break;
+        }
+
+      case FldNumericEdited:
+        {
+        if( attr & quoted_e )
+          {
+          // What the programmer says the value is, the value stays, no
+          // matter how weird it might be.
+          }
+        else
+          {
+          // It's not a quoted string, so we use data.value:
+          bool negative;
+          if( value < 0 )
+            {
+            negative = true;
+            value = -value;
+            }
+          else
+            {
+            negative = false;
+            }
+
+          char ach[128];
+          memset(ach, 0, sizeof(ach));
+          memset(retval, 0, data.capacity());
+
+          if( (attr & blank_zero_e) && value == 0 )
+            {
+            memset( retval,
+                    ascii_space,
+                    data.capacity());
+            }
+          else
+            {
+            digits_from_int128(ach, this, char_capacity(), value, l_rdigits);
+
+            // __gg__string_to_numeric_edited operates in ASCII space:
+            __gg__string_to_numeric_edited( reinterpret_cast<char *>(retval),
+                                            ach,
+                                            data.rdigits,
+                                            negative,
+                                            data.picture);
+            // So now we convert it to the target encoding:
+            size_t nbytes;
+            const char *converted = __gg__iconverter(DEFAULT_SOURCE_ENCODING,
+                                                     codeset.encoding,
+                                                     retval,
+                                                     char_capacity(),
+                                                     &nbytes);
+            memcpy(retval, converted, nbytes);
+            }
+          }
+        break;
+        }
+
+      default:
+        cbl_errx( "%s:%d: type %s, who woulda thunk?",
+                  __func__, __LINE__, cbl_field_type_str(type) );
+        gcc_assert(false);
+        break;
+      }
+    }
+  gcc_assert(data.etc_type != cbl_field_data_t::no_value_e);
+}
+
+size_t parse_error_inc();
+size_t parse_error_count();
+
+bool // true if error reported
 cbl_field_t::report_invalid_initial_value(const YYLTYPE& loc) const {
 
-  if( ! data.initial ) return;
+  if( ! data.original() ) return false;
 
-  auto fig = cbl_figconst_of(data.initial);
+  const auto nerr = parse_error_count();
 
-  // numeric initial value
+  auto orig = data.original();
+
+  auto fig = cbl_figconst_of(orig);
+
+  // numeric orig value
   if( is_numeric(type) ) {
     if( has_attr(quoted_e) ) {
       error_msg(loc, "numeric type %s VALUE '%s' requires numeric VALUE",
-               name, data.initial);
-      return;
+               name, orig);
+      return true;
     }
     if( ! (fig == normal_value_e || fig == zero_value_e)  ) {
         error_msg(loc, "numeric type %s VALUE '%s' requires numeric VALUE",
                  name, cbl_figconst_str(fig));
-        return;
+        return true;
       }
 
     switch( type ) {
@@ -1038,18 +1882,18 @@ cbl_field_t::report_invalid_initial_value(const YYLTYPE& loc) const {
         // We are dealing with a pure binary type.  If the capacity is
         // 8 or more, we need do no further testing because we assume
         // everything fits.
-        if( data.capacity < 8 ) {
-          const char *p = strchr(data.initial, symbol_decimal_point());
+        if( data.capacity() < 8 ) {
+          const char *p = strchr(orig, symbol_decimal_point());
           if( p && atoll(p+1) != 0 ) {
             error_msg(loc, "integer type %s VALUE '%s' "
                      "requires integer VALUE",
-                     name, data.initial);
+                     name, orig);
           } else {
             // Calculate the maximum possible value that a binary with this
             // many bytes can hold
             size_t max_possible_value;
             max_possible_value = 1;
-            max_possible_value <<= data.capacity*8;
+            max_possible_value <<= data.capacity()*8;
             max_possible_value -= 1;
             if( attr & signable_e )
               {
@@ -1059,22 +1903,22 @@ cbl_field_t::report_invalid_initial_value(const YYLTYPE& loc) const {
               }
             // Pick up the given VALUE
             size_t candidate;
-            if( *data.initial == '-' ) {
+            if( *orig == '-' ) {
               // We care about the magnitude, not the sign
               if( !(attr & signable_e) ){
                 error_msg(loc, "integer type %s VALUE '%s' "
                          "requires a non-negative integer",
-                         name, data.initial);
+                         name, orig);
               }
-              candidate = atoll(data.initial+1);
+              candidate = atoll(orig+1);
             }
             else {
-              candidate = (size_t)atoll(data.initial);
+              candidate = (size_t)atoll(orig);
             }
             if( candidate > max_possible_value ) {
               error_msg(loc, "integer type %s VALUE '%s' "
                        "requires an integer of magnitude no greater than %zu",
-                       name, data.initial, max_possible_value);
+                       name, orig, max_possible_value);
             }
           }
         }
@@ -1087,7 +1931,7 @@ cbl_field_t::report_invalid_initial_value(const YYLTYPE& loc) const {
         /*
          * Check fraction for excess precision
          */
-        const char *p = strchr(data.initial, symbol_decimal_point());
+        const char *p = strchr(orig, symbol_decimal_point());
         if( p ) {
           auto pend = std::find(p, p + strlen(p), 0x20);
           int n = std::count_if( ++p, pend, isdigit );
@@ -1095,7 +1939,7 @@ cbl_field_t::report_invalid_initial_value(const YYLTYPE& loc) const {
           if( data.precision() < n) {
             if( 0 == data.rdigits ) {
               error_msg(loc, "integer type %s VALUE '%s' requires integer VALUE",
-                       name, data.initial);
+                       name, orig);
             } else {
               auto has_exponent = std::any_of( p, pend,
                                                []( char ch ) {
@@ -1103,55 +1947,88 @@ cbl_field_t::report_invalid_initial_value(const YYLTYPE& loc) const {
                                                } );
               if( !has_exponent && data.precision() < pend - p ) {
                 error_msg(loc, "%s cannot represent VALUE %qs exactly (max %c%ld)",
-                          name, data.initial, '.', (long)(pend - p));
+                          name, orig, '.', (long)(pend - p));
               }
             }
           }
         } else {
-          p = data.initial + strlen(data.initial);
+          p = orig + strlen(orig);
         }
 
         /*
          * Check magnitude, whether or not there's a decimal point.
          */
         // skip leading zeros
-        auto first_digit = std::find_if( data.initial, p,
+        auto first_digit = std::find_if( orig, p,
                                          []( char ch ) {
                                            return ch != '0'; } );
         // count remaining digits, up to the decimal point
         auto n = std::count_if( first_digit, p, isdigit );
         if( data.ldigits() < n ) {
           error_msg(loc, "numeric %s VALUE '%s' holds only %u digits",
-                   name, data.initial,
+                   name, orig,
                    data.digits);
         }
       }
       break;
-    } // end type switch for normal string initial value
-    return;
+    } // end type switch for normal string orig value
+    return nerr < parse_error_count();
   } // end numeric
   assert( ! is_numeric(type) );
 
   // consider all-alphabetic
   if( has_attr(all_alpha_e) ) {
-    bool alpha_value = fig != zero_value_e;
+    bool is_alpha_only = fig != zero_value_e;
+
+    if( fig == normal_value_e && ! has_attr(hex_encoded_e)) {
+      // Test the input, not the converted initial value
+      is_alpha_only = std::none_of( orig, orig + strlen(orig),
+                                    []( char ch ) {
+                                      return
+                                        ISPUNCT(ch) ||
+                                        ISDIGIT(ch); } );
+    }
+    /*
+     * This is overspecific: It catches numeric literal VALUE for all_alpha_e\
+     *  only.
+     * The general error is: 
+     * - alphanumeric type
+     * - data.initial is all spaces (based on PICTURE)
+     * - data.original() is numeric or data.etc_type == value_e
+     * - quoted_e clear, of course
+     * 
+     * This happens because VALUE was captured as a cce and stored in
+     * data.original for encode_numeric.  But encode_numeric was never called
+     * because it's not a numeric field.
+     *
+     * It is also insufficient.  It does not deal with VALUE LENGTH OF.  
+     */
+    if( is_alpha_only ) {
+      charmap_t *charmap = __gg__get_charmap(codeset.encoding);
+      auto spc = charmap->mapped_character(ascii_space);
+      bool spacey = std::all_of( data.initial,
+                                 data.initial + char_capacity(),
+             [spc]( char ch ) { return static_cast<cbl_char_t>(ch) == spc; } );
+      if( spacey ) {
+        if( ISDIGIT(orig[0]) || orig[0] == '-' || orig[0] == '+' ) {
+          gcc_assert( ! has_attr(quoted_e) );
+          is_alpha_only = false; // alpha field supplied with VALUE numeric
+        }
+      }
+    }
     
-    if( fig == normal_value_e ) {
-      alpha_value = std::none_of( data.initial,
-                                  data.initial +
-                                  data.capacity,
-                                  []( char ch ) {
-                                    return 
-                                      ISPUNCT(ch) ||
-                                      ISDIGIT(ch); } );
-    }
-    if( ! alpha_value ) {
+    if( ! is_alpha_only ) {
       error_msg(loc, "alpha-only %s VALUE '%s' contains non-alphabetic data",
-               name, fig == zero_value_e? cbl_figconst_str(fig) : data.initial);
+               name, fig == zero_value_e? cbl_figconst_str(fig) : orig);
+      
+      auto pend = orig + strlen(orig);
+      auto p = std::find_if( orig, pend, 
+                             []( char ch ) { return ! ISALPHA(ch); } );
+      dbgmsg("%zu nonalpha '%.*s'", pend - p, int(pend - p), p);
     }
   }
 
-  return;
+  return nerr < parse_error_count();
 }
 
 // Return the field representing the subscript whose literal value
@@ -1344,15 +2221,7 @@ valid_move( const struct cbl_field_t *tgt, const struct cbl_field_t *src )
     case 0:
       if( src->type == FldLiteralA && is_numericish(tgt) && !is_literal(tgt) ) {
         // Allow if input string is an integer.
-        size_t outcount;
-        char *in_ascii = static_cast<char *>(xmalloc(4 * src->data.capacity));
-        const char *in_asciip = __gg__iconverter( src->codeset.encoding,
-                                                  DEFAULT_SOURCE_ENCODING,
-                                                  src->data.initial,
-                                                  src->data.capacity,
-                                                  &outcount );
-        memcpy(in_ascii, in_asciip, outcount);
-        const char *p = in_ascii, *pend = p + src->data.capacity;
+        const char *p = src->data.original(), *pend = p + strlen(src->data.original());
         if( (p[0] == ascii_plus) || (p[0] == ascii_minus) ) p++;
         retval = std::all_of( p, pend, isdigit );
         if( yydebug && ! retval ) {
@@ -1362,7 +2231,6 @@ valid_move( const struct cbl_field_t *tgt, const struct cbl_field_t *src )
                  HOST_SIZE_T_PRINT_UNSIGNED,
                  __func__, __LINE__, *bad, (fmt_size_t)(bad - p));
         }
-      free(in_ascii);
       }
       break;
     case 1:
@@ -1388,7 +2256,7 @@ valid_move( const struct cbl_field_t *tgt, const struct cbl_field_t *src )
     }
 
   if( retval && src->has_attr(embiggened_e) ) {
-    if( is_numeric(tgt) && tgt->data.capacity < src->data.capacity ) {
+    if( is_numeric(tgt) && tgt->data.capacity() < src->data.capacity() ) {
       dbgmsg("error: source no longer fits in target");
       return false;
     }
@@ -1484,7 +2352,8 @@ type_capacity( enum cbl_field_type_t type, uint32_t digits )
 
     auto psize = std::find_if( sizes, esizes,
                          [digits]( sizes_t sizes ) {
-                           return sizes.bounds.first <= digits && digits <= sizes.bounds.second;
+                           return sizes.bounds.first <= digits
+                                                      && digits <= sizes.bounds.second;
                          } );
     if( psize != esizes ) return psize->size;
 
@@ -2129,16 +2998,16 @@ cobol_filename_restore() {
   linemap_add(line_table, LC_LEAVE, sysp, NULL, 0);
 }
 
-size_t
+uint64_t
 symbol_unique_index( const struct symbol_elem_t *e ) {
   assert(e);
-  size_t usym = symbol_index(e);
-#if READY_FOR_INODE
+  uint64_t usym = symbol_index(e);
   if( ! input_filenames.empty() ) {
-    size_t inode = input_filenames.top().inode;
-    usym = usym ^ inode;
+    uint64_t inode = input_filenames.top().inode;
+    static const int half_bits = sizeof(uint64_t)*4;
+    usym ^= inode>>half_bits;
+    usym ^= inode<<half_bits;
   }
-#endif
   return usym;
 }
 
@@ -2227,7 +3096,6 @@ verify_format( const char gmsgid[] ) {
 #endif
 
 static const diagnostics::option_id option_zero;
-size_t parse_error_inc();
 
 void gcc_location_dump() {
     linemap_dump_location( line_table, token_location, stderr );
@@ -2567,7 +3435,7 @@ cbl_unimplementedw(cbl_diag_id_t id, const char *gmsgid, ...) {
     msg = xasprintf("%s [%s]", gmsgid, option);
     gmsgid = msg;
   }
+
   va_list ap;
 
   va_start(ap, gmsgid);
index f6f6bbcbf01b33792388c4587f7f7f3af84d21c8..082e6fb0fc699e50df4dcc7bbbc36cc8db595aea 100644 (file)
@@ -1,5 +1,5 @@
        *> { dg-do run }
-       *> { dg-options "-finternal-ebcdic" }
+       *> { dg-options "-fexec-charset=cp1140" }
        *> { dg-output-file "group2/CHAR_and_ORD_with_COLLATING_sequence_-_EBCDIC.out" }
         IDENTIFICATION      DIVISION.
         PROGRAM-ID.         prog.
index 6aa938800205128a94ce4f2f1d38f81d59345070..508e2590ac10559a51cae22d5d7bb5f0f363cfd7 100644 (file)
@@ -6,10 +6,10 @@
         DATA DIVISION.
         WORKING-STORAGE SECTION.
         01 FILLER.
-          02 ADATA VALUE "654321".
+          02 ADATA PIC X(6) VALUE "654321".
           02 A REDEFINES ADATA PIC 9 OCCURS 6 TIMES.
           02 B PIC 9.
-          02 CDATA VALUE "999999".
+          02 CDATA PIC X(6) VALUE "999999".
           02 C REDEFINES CDATA PIC 9 OCCURS 6 TIMES.
         01 TEMP PIC 9.
         PROCEDURE DIVISION.
index e0bf4c9baa9e6bc0e36ff39d7ae5e340c267fb70..73c545a961f2d9f547ee762f0d26cf8764fd5d82 100644 (file)
@@ -1,8 +1,7 @@
-555.10 555.10 555.10 555.10 555.0999756 555.100000000000023 555.1000000000000000000000000000000197
-555.10 555.10 555.10 555.10 555.0999756 555.100000000000023 555.1000000000000000000000000000000197
-555.10 555.10 555.10 555.10 555.0999756 555.100000000000023 555.1000000000000000000000000000000197
-555.10 555.10 555.10 555.10 555.0999756 555.100000000000023 555.1000000000000000000000000000000197
-555.10 555.10 555.10 555.10 555.0999756 555.100000000000023 555.1000000000000000000000000000000197
-555.10 555.10 555.10 555.10 555.0999756 555.100000000000023 555.1000000000000000000000000000000197
-555.10 555.10 555.10 555.10 555.0999756 555.100000000000023 555.1000000000000000000000000000000197
-
+555.10 555.10 555.10 555.10 555.0999756 555.099999999999909 555.0999999999999999999999999999999211
+555.10 555.10 555.10 555.10 555.0999756 555.099999999999909 555.0999999999999999999999999999999211
+555.10 555.10 555.10 555.09 555.0999756 555.099999999999909 555.0999999999999999999999999999999211
+555.10 555.10 555.09 555.09 555.0999756 555.099999999999909 555.0999999999999999999999999999999211
+555.10 555.09 555.09 555.10 555.0999756 555.099999999999909 555.0999999999999999999999999999999211
+555.09 555.09 555.10 555.10 555.0999756 555.099999999999909 555.1000030517578124999999999999999606
+555.09 555.10 555.10 555.10 555.0999756 555.100003051757767 555.0999999999999971578290569595992171
\ No newline at end of file
index 6417d0193821a082c67493a800dc16c47e3b32a9..6500a6e58fd081c2547616bba1d579358ead90b8 100644 (file)
@@ -1,9 +1,8 @@
 -555
 -555.55
--5.5555E+208
+-555.55e206
 555
 555.55
-5.5555E+208
+555.55e206
 333.3300 333.3300 333.3300 333.3300 3.333300083E+22 3.33329999999999994E+102 3.333300000000000000000000000000000168E+202
-555.5500 555.5500 555.5500 555.5500 5.555499988E+22 5.55549999999999973E+102 5.555500000000000000000000000000000029E+202
-
+555.5500 555.5500 555.5500 555.5500 5.555499988E+22 5.55549999999999973E+102 5.555500000000000000000000000000000029E+202
\ No newline at end of file
index d5220aecf397db812a8a93cdd921d135537ba213..001e29142bee6d5fa6c11c4572ee98162faab61a 100644 (file)
@@ -40,6 +40,7 @@
 #include <algorithm>
 #include <unordered_map>
 #include <vector>
+#include <langinfo.h>
 
 #include "ec.h"
 #include "common-defs.h"
@@ -55,10 +56,14 @@ int __gg__decimal_separator    = ','  ;
 int __gg__quote_character      = '"'  ;
 int __gg__low_value_character  = 0x00 ;
 int __gg__high_value_character = 0xFF ;
+cbl_char_t __gg__working_init  = NOT_A_CHARACTER;
+cbl_char_t __gg__local_init    = NOT_A_CHARACTER;
+uint32_t   __gg__wsclear       = NOT_A_CHARACTER;
 std::vector<std::string> __gg__currency_signs(256) ;
 int __gg__default_currency_sign;
 char *__gg__ct_currency_signs[256];  // Compile-time currency signs
 
+cbl_encoding_t __gg__console_encoding  = no_encoding_e    ;
 cbl_encoding_t __gg__display_encoding  = no_encoding_e;
 cbl_encoding_t __gg__national_encoding = no_encoding_e;
 
@@ -1412,6 +1417,29 @@ static encodings_t encodings[] = {
   { false, iconv_YU_e, "YU" },
 };
 
+/*
+ * Because this variable is static, the contructor runs before main and is
+ * guaranted to run.
+ */
+static class rt_encoding_t
+  {
+  const char *ctype, *lc_ctype;
+  public:
+  rt_encoding_t() : ctype( setlocale(LC_CTYPE, "") )
+    {
+    lc_ctype =  nl_langinfo(CODESET);
+    // Let's learn what the computer is using for the console:
+    // We need to establish the codeset used by the system console:
+  __gg__console_encoding = use_locale();
+    }
+  cbl_encoding_t use_locale() const
+    {
+    auto encoding = strstr(ctype, "UTF-8") ?
+      iconv_UTF_8_e : __gg__encoding_iconv_type(lc_ctype);
+    return encoding;
+    }
+  } rt_encoding;
+
 static const encodings_t *
 encoding_descr( cbl_encoding_t encoding ) {
   static encodings_t *eoencodings = encodings + COUNT_OF(encodings);
@@ -1423,20 +1451,8 @@ encoding_descr( cbl_encoding_t encoding ) {
   return p < eoencodings? p : nullptr;
 }
 
-const char *
-__gg__encoding_iconv_name( cbl_encoding_t encoding ) {
-  auto p = encoding_descr(encoding);
-  return p? p->name : nullptr;
-}
-
-bool
-__gg__encoding_iconv_valid( cbl_encoding_t encoding ) {
-  auto p = encoding_descr(encoding);
-  return p? p->supported : false;
-}
-
-cbl_encoding_t
-__gg__encoding_iconv_type( const char *name ) {
+static const encodings_t *
+encoding_descr( const char name[] ) {
   static encodings_t *eoencodings = encodings + COUNT_OF(encodings);
 
   char *slashless = strdup(name);
@@ -1453,40 +1469,92 @@ __gg__encoding_iconv_type( const char *name ) {
                          } );
   free(slashless);
 
-  return p < eoencodings? p->type : no_encoding_e;
+  return p < eoencodings? p : nullptr;
+}
+
+const encodings_t *
+__gg__encoding_iconv_descr( const char name[] ) {
+  return encoding_descr(name);
+}
+
+const encodings_t *
+__gg__encoding_iconv_descr( cbl_encoding_t encoding ) {
+  return encoding_descr(encoding);
+}
+
+const char *
+__gg__encoding_iconv_name( cbl_encoding_t encoding ) {
+  auto p = encoding_descr(encoding);
+  return p? p->name : nullptr;
+}
+
+bool
+__gg__encoding_iconv_valid( cbl_encoding_t encoding ) {
+  auto p = encoding_descr(encoding);
+  return p? p->supported : false;
+}
+
+cbl_encoding_t
+__gg__encoding_iconv_type( const char *name ) {
+  auto p = encoding_descr(name);
+  return p? p->type : no_encoding_e;
 }
 
 char *
 __gg__iconverter( cbl_encoding_t from,
                   cbl_encoding_t to,
-            const char *str,
+            const void *str_,
                   size_t length,
-                  size_t *outlength)
+                  size_t *outlength_p,
+                  size_t *iconv_retval_p )
   {
+  const char *str = static_cast<const char *>(str_);
+
+  // Attempts to convert 'length' bytes 'str' in 'from' encoding to
+  // the 'to' encoding.
+
+  // The return value points to a static memory area in this function, the
+  // caller has to respect that and make copies before doing something that
+  // will call this routine again.  Note that __gg__get_charmap, and
+  // charmap_t::mapped_character can call this routine.
+
+  // The routine optionally returns the number of bytes generated, the number
+  // of bytes eaten by iconv, and the actual return value from the iconv call.
+
+  // Let's consider the possibility of each input character needing four output
+  // characters.  We increase it by one to leave room for the terminating NUL,
+  // which itself might be four bytes of 0x00. The static area keeps growing
+  // as necessary.
+
+  // Get charmap first, because we might need it in the event of a conversion
+  // error, and we have to avoid problems with recursion clobbering the return
+  // buffer, because __gg__get_charmap can call us:
+  charmap_t *charmap_to = __gg__get_charmap(to);
+
   static size_t retsize = 1;
   static char *retval = static_cast<char *>(malloc(retsize));
 
-  // Let's consider the possibility of each input character needed four output
-  // characters:
-  size_t needed = 4*length;
+  size_t needed = 4*(length+1);
   if( retsize < needed )
     {
     retsize = needed;
     retval = static_cast<char *>(realloc(retval, retsize));
     }
 
+  size_t outlength;
+  size_t iconv_retval;
+
   if( from == to )
     {
+    // There is no need to actually convert.  Simulate a successful iconv()
+    // call:
+
     memcpy(retval, str, length);
-    *outlength = length;
+    outlength = length;
+    iconv_retval = 0;
     }
   else
     {
-    // Converts the given string from from to to using iconv.
-
-    // The return value points to a static memory area in this function, the
-    // caller has to respect that.
-
     // We attempt to minimize overhead by using a map to call
     // iconv_open but once for each from/to pairing.
 
@@ -1514,40 +1582,150 @@ __gg__iconverter( cbl_encoding_t from,
 
     char *inbuf  = const_cast<char *>(str);
     char *outbuf = retval;
-    size_t incount = length;
-    size_t outcount = retsize;
-    *outlength = iconv( cd,
-                        &inbuf, &incount,
-                        &outbuf, &outcount);
-    *outlength = retsize - outcount;
-
-    if( *outlength == length )
+    size_t inbytesleft  = length;
+    size_t outbytesleft = retsize;
+
+    /* It's time for some COBOL magic.  The default HIGH-VALUE in COBOL is
+       0xFF.  CP1252, UTF-16, and UTF32 all happily interconvert 0xFF, 0x00FF,
+       and 0x000000FF.  But CP1140 is a pain.
+
+       A CP1252 0xFF becomes a CP1140 DF, which converts back to 0xFF
+       CP1140 DF becomes FF, 00ff and 000000FF.
+
+       So, we need to intervene when the source, or dest, is ebcdic.  */
+
+    char *inbuf_cpy = nullptr;
+    if( __gg__high_value_character == DEGENERATE_HIGH_VALUE )
+      {
+      const charmap_t *map_from = __gg__get_charmap(from);
+      if( map_from->is_like_ebcdic() )
+        {
+        inbuf_cpy = static_cast<char *>(malloc(length));
+        assert(inbuf_cpy);
+        memcpy(inbuf_cpy, inbuf, length);
+        inbuf = inbuf_cpy;
+        for(size_t i=0; i<length; i++)
+          {
+          if( (unsigned char)inbuf[i] == (unsigned char)0xFF )
+            {
+            inbuf[i] = (char)0xDF;
+            }
+          }
+        }
+      }
+
+    // When the caller supplies iconv_retval_p, we only try to convert once,
+    // because they are telling us they will handle errors.
+
+    // Otherwise, we just keep trying to convert, replacing unconvertable
+    // characters with a replacement.
+
+    iconv_retval = 1; // This primes the pump:
+    for(;;)
       {
-      /*  In a kind of shortsighted way, we are going to assume
-          single-byte-coding, and we are going to cope here with the
-          COBOL-world reality of HIGH-VALUE being, by default, the value 0xFF.
-          This is required by IBM in the EBCDIC and ASCII worlds.  The
-          implications for other locales are being left for another time.
-
-          So, for now, we are regarding 0xFF as invariant.  Thus, at this
-          point, we have to scan the input and make sure the output has 0xFF
-          where the input does.  */
-      for(size_t i=0; i<length; i++)
+      iconv_retval = iconv( cd,
+                            &inbuf, &inbytesleft,
+                            &outbuf, &outbytesleft);
+      if( iconv_retval_p || iconv_retval == 0 )
         {
-        if( static_cast<unsigned char>(str[i]) == 0xFF )
+        // Either there was no conversion error, or else our caller wants
+        // to know about the error
+        break;
+        }
+      // Arriving here means that there has been a conversion error.
+      if( charmap_to->stride() >= 2 )
+        {
+        // Put in the value for the U+FFFD Replacement Character
+        charmap_to->putch(REPLACEMENT_CHARACTER, outbuf, size_t(0));
+        outbuf += charmap_to->stride();
+        outbytesleft -= charmap_to->stride();
+        }
+      else if( charmap_to->is_like_utf8() )
+        {
+        // Put in the UTF-8 bytes for the U+FFFD Replacement Character
+        *outbuf++ = static_cast<char>(0xEF);
+        *outbuf++ = static_cast<char>(0xBF);
+        *outbuf++ = static_cast<char>(0xBD);
+        outbytesleft -= 3;
+        }
+      else
+        {
+        // This is some kind of single-byte-coded character set.  We just use
+        // a question mark as the replacement character.
+        *outbuf++ = charmap_to->mapped_character(ascii_query);
+        outbytesleft -= 1;
+        }
+      // skip past the byte that caused the conversion error:
+      inbuf += 1;
+      inbytesleft -= 1;
+      // Raise the run-time error:
+#ifdef IN_TARGET_LIBS
+      exception_raise(ec_data_conversion_e);
+      // And then loop around and try it again.
+#endif
+      }
+
+    free(inbuf_cpy);
+    // Calculate the number of bytes generated:
+    outlength = retsize - outbytesleft;
+
+    if( __gg__high_value_character == DEGENERATE_HIGH_VALUE )
+      {
+      const charmap_t *map_to = __gg__get_charmap(to);
+      if( map_to->is_like_ebcdic() )
+        {
+        for(size_t i=0; i<length; i++)
           {
-          retval[i] = static_cast<char>(0xFF);
+          if( (unsigned char)retval[i] == (unsigned char)0xDF )
+            {
+            retval[i] = (char)0xFF;
+            }
           }
         }
       }
     }
   // For the convenience of those who call this routine, we are sticking a
-  // terminating NUL on the end of the generated string
-  retval[*outlength] = '\0';
+  // terminating NUL on the end of the generated string.  Keeping in mind that
+  // a NUL isn't always a single byte, we are going to lay down four of them.
+  retval[outlength+0] = '\0';
+  retval[outlength+1] = '\0';
+  retval[outlength+2] = '\0';
+  retval[outlength+3] = '\0';
+
+  if( outlength_p )
+    {
+    *outlength_p = outlength;
+    }
+  if( iconv_retval_p )
+    {
+    *iconv_retval_p = iconv_retval;
+    }
 
   return retval;
   }
 
+char *
+__gg__miconverter( cbl_encoding_t from,
+                   cbl_encoding_t to,
+             const void *str_,
+                   size_t length,
+                   size_t *outlength_p,
+                   size_t *iconv_retval_p )
+  {
+  const char *converted = __gg__iconverter(from,
+                                           to,
+                                           str_,
+                                           length,
+                                           outlength_p,
+                                           iconv_retval_p);
+  char *retval = static_cast<char *>(malloc(*outlength_p + 4));
+  assert(retval);
+  memcpy(retval, converted, *outlength_p);
+  // Tack on four zeros to be a NUL in any encoding.
+  memset(retval + *outlength_p, 0, 4);
+  return retval;
+  }
+
 static
 std::unordered_map<cbl_encoding_t, charmap_t *>map_of_encodings;
 
index 60068a7fb711fd5062494951658b78385e6ae383..44e327227b6dcd6767fe79cb22b13429c46e59af 100644 (file)
@@ -35,6 +35,8 @@
 #include <vector>
 
 #include <unistd.h>
+#include <limits.h>
+#include <iconv.h>
 
 /*  There are four distinct codeset domains in the COBOL compiler.
  *
 
     Stay alert!    */
 
+typedef uint32_t cbl_char_t;
+#define NOT_A_CHARACTER (0xbadbeef)
+
 extern int    __gg__decimal_point        ;
 extern int    __gg__decimal_separator    ;
 extern int    __gg__quote_character      ;
@@ -115,11 +120,64 @@ extern std::vector<std::string> __gg__currency_signs       ;
 extern int    __gg__default_currency_sign;
 extern cbl_encoding_t __gg__display_encoding ;
 extern cbl_encoding_t __gg__national_encoding ;
+extern cbl_char_t __gg__working_init;
+extern cbl_char_t __gg__local_init;
+extern uint32_t __gg__wsclear;
+
+enum
+  {
+  /* HIGH-VALUE is an endless source of irritation.
+  
+     0xFF is the default value for COBOL since time immemorial.  Its use that
+     way long predates the existence of code pages.  0xFF is a valid character
+     in many code pages, which make a muddle of the original intent of a
+     default value of 0xFF for high-value.
+
+     We want older programs to continue to work.  And we want to use 0xFF for
+     ascii and ebcdic, and it turns out that 0xFFFF works for UTF-16; it is
+     specifically designed in UNICODE as a well-formed non-character.
+     
+     0xFFFFFFFF, however, is not readily usable in UTF-32.  It is not well-
+     formed, and it is not a character.  Technically, the largest value in
+     UTF-32 is the largest UNICODE code point, which is 0x10FFFF.  It's
+     tempting to use that value as the UTF32 HIGH-VALUE, except that it doesn't
+     map into a single 16-bit value in UTF-16 (it takes a pair of 16-bit
+     values), and it doesn't map into anything sensible in ASCII or EBCDIC, and
+     it takes multiple bytes in UTF-8.
+     
+     So, we are going to work with the following observations:
+     
+     0xFF   in CP1252 <==> 0x000000FF in UTF32
+     0xFF   in CP1140 <==> 0x0000009F in UTF32
+     0xFFFF in UTF-16 <==> 0x0000FFFF in UTF32
+
+     Be it hereby acknowledged that not all possibilities for encoding inter-
+     conversion have been explored, and we anticipate finding and eliminating
+     HIGH-VALUE problems will be Whac-A-Mole territory for some time to come.
+     
+     Please use these constants for that kind of work, because otherwise
+     finding anomalies will be even more frustrating than I currently
+     anticipate.  Dubner, 2025-11-24  */
+  DEFAULT_HIGH_VALUE_8  =       0xFF,
+  DEFAULT_HIGH_VALUE_16 =     0x00FF,
+  DEFAULT_HIGH_VALUE_32 = 0x000000FF,
+  
+  /* These values are used as figurative constants when interconverting from
+     and encoding to UTF32.  Examine, for example, the implementation for
+     the INSPECT statement: */
+  ASCII_HIGH_VALUE_32   = 0x000000FF,
+  EBCDIC_HIGH_VALUE_32  = 0x000000FF,
+  UTF16_HIGH_VALUE_32   = 0x000000FF,
+  UTF32_HIGH_VALUE_32   = 0x000000FF,
+
+  REPLACEMENT_CHARACTER = 0xFFFD,
+  };
 
 #define NULLCH ('\0')
 #define DEGENERATE_HIGH_VALUE 0xFF
 #define DEGENERATE_LOW_VALUE 0x00
 
+#define ascii_nul              ((uint8_t)('\0'))
 #define ascii_A                ((uint8_t)('A'))
 #define ascii_B                ((uint8_t)('B'))
 #define ascii_C                ((uint8_t)('C'))
@@ -189,6 +247,7 @@ extern cbl_encoding_t __gg__national_encoding ;
 #define ascii_colon            ((uint8_t)(':'))
 #define ascii_comma            ((uint8_t)(','))
 #define ascii_dollar_sign      ((uint8_t)('$'))
+#define ascii_bang             ((uint8_t)('!'))
 #define ascii_dquote           ((uint8_t)('"'))
 #define ascii_oparen           ((uint8_t)('('))
 #define ascii_caret            ((uint8_t)('^'))
@@ -223,14 +282,31 @@ extern const unsigned short __gg__ebcdic_to_cp1252_collation[256];
 
 const char * __gg__encoding_iconv_name( cbl_encoding_t encoding );
 cbl_encoding_t __gg__encoding_iconv_type( const char *name );
+extern cbl_encoding_t __gg__console_encoding;
 
+// returns a pointer to a static buffer.  Beware!
 char * __gg__iconverter(cbl_encoding_t from,
                         cbl_encoding_t to,
-                  const char *str,
+                  const void *str,
                         size_t length,
-                        size_t *outlength);
+                        size_t *outlength = nullptr,     // Bytes produced
+                        size_t *iconv_retval = nullptr);
+
+// returns a malloced buffer.  Remember to free it.
+char * __gg__miconverter(cbl_encoding_t from,
+                         cbl_encoding_t to,
+                   const void *str,
+                         size_t length,
+                         size_t *outlength = nullptr,     // Bytes produced
+                         size_t *iconv_retval = nullptr);
+
 
 #define DEFAULT_SOURCE_ENCODING (iconv_CP1252_e)
+#define DEFAULT_32_ENCODING (iconv_UTF32LE_e)
+
+class charmap_t;
+
+charmap_t *__gg__get_charmap(cbl_encoding_t encoding);
 
 class charmap_t
   {
@@ -240,7 +316,8 @@ class charmap_t
     bool m_is_valid;
     bool m_is_big_endian;
     bool m_has_bom = false;
-    int  m_stride; // Number of bytes between one character and the next
+    bool m_is_like_utf8;
+    uint8_t  m_stride; // Number of bytes between one character and the next
 
     enum
       {
@@ -250,7 +327,7 @@ class charmap_t
 
     // This map retains the ASCII-to-encoded value in m_encoding, so that iconv
     // need be called but once for each ASCII value.
-    std::unordered_map<int, int>m_map_of_encodings;
+    std::unordered_map<cbl_char_t, cbl_char_t>m_map_of_encodings;
 
   public:
     explicit charmap_t(cbl_encoding_t e) : m_encoding(e)
@@ -263,18 +340,31 @@ class charmap_t
       // what we get back.
       
       size_t outlength = 0;
-      const char challenge[] = "0";
-      const unsigned char *response = PTRCAST(unsigned char,
-                                   __gg__iconverter(DEFAULT_SOURCE_ENCODING,
-                                                    m_encoding,
-                                                    challenge,
-                                                    1,
-                                                    &outlength));
+      char challenge[] = "0";
+      char response_[8];
+
+      iconv_t cd = iconv_open(
+                          __gg__encoding_iconv_name(m_encoding),
+                          __gg__encoding_iconv_name(DEFAULT_SOURCE_ENCODING));
+      char *inbuf  = challenge;
+      char *outbuf = response_;
+      size_t inbytesleft = 1;
+      size_t outbytesleft = sizeof(response_);
+      /*size_t nret = */ iconv( cd,
+                            &inbuf,  &inbytesleft,
+                            &outbuf, &outbytesleft);
+      outlength = sizeof(response_) - outbytesleft;
+      iconv_close(cd);
+      
+      const unsigned char *response = 
+                                  reinterpret_cast<unsigned char *>(response_);
+      
       unsigned char char_0 = 0x00;
 
       m_is_valid = false;
       m_has_bom  = false;
       m_is_big_endian = false;
+      m_is_like_utf8 = false;
 
       if( outlength == 1 )
         {
@@ -349,20 +439,37 @@ class charmap_t
         m_is_valid = true;
         m_numeric_sign_type = sign_type_ebcdic;
         }
+
+      // Let's see if this encoding is UTF-8.  We will do that by converting
+      // the single-byte CP1252 code for the Euro symbol to our encoding.
+      cd = iconv_open(
+                    __gg__encoding_iconv_name(iconv_CP1252_e),
+                    __gg__encoding_iconv_name(m_encoding));
+      challenge[0] = static_cast<char>(0x80);// This is the CP1252 Euro symbol.
+      inbuf  = challenge;
+      outbuf = response_;
+      inbytesleft = 1;
+      outbytesleft = sizeof(response_);
+      iconv(cd,
+            &inbuf,  &inbytesleft,
+            &outbuf, &outbytesleft);
+      outlength = sizeof(response_) - outbytesleft;
+      iconv_close(cd);
+      m_is_like_utf8 = (outlength == 3);
       }
 
-    bool is_valid()      const{return m_is_valid     ;}
-    bool is_big_endian() const{return m_is_big_endian;}
-    bool has_bom()       const{return m_has_bom      ;}
-    int  stride()        const{return m_stride       ;}
+    bool is_valid()      const { return m_is_valid     ; }
+    bool is_big_endian() const { return m_is_big_endian; }
+    bool has_bom()       const { return m_has_bom      ; }
+    uint8_t stride()     const { return m_stride       ; }
 
-    int mapped_character(int ch)
+    cbl_char_t mapped_character(cbl_char_t ch) 
       {
       // The assumption is that anybody calling this routine is providing
       // a single-byte character in the DEFAULT_SOURCE_ENCODING encoding.  We
       // return the equivalent character in the m_encoding
-      int retval;
-      std::unordered_map<int, int>::const_iterator it =
+      cbl_char_t retval;
+      std::unordered_map<cbl_char_t, cbl_char_t>::const_iterator it =
                                                    m_map_of_encodings.find(ch);
       if( it != m_map_of_encodings.end() )
         {
@@ -399,18 +506,38 @@ class charmap_t
       {
       return mapped_character(__gg__low_value_character);
       }
-    int high_value_character()
+    cbl_char_t high_value_character()
       {
-      return mapped_character(__gg__high_value_character);
+      cbl_char_t retval = 0;
+      if( __gg__high_value_character == DEFAULT_HIGH_VALUE_8 )
+        {
+        switch(m_stride)
+          {
+          case 1:
+            retval = DEFAULT_HIGH_VALUE_8;
+            break;
+          case 2:
+            retval = DEFAULT_HIGH_VALUE_16;
+            break;
+          case 4:
+            retval = DEFAULT_HIGH_VALUE_32 ;
+            break;
+          }
+        }
+      else
+        {
+        retval = mapped_character(__gg__high_value_character);
+        }
+      return retval;
       }
 
-    int figconst_character(cbl_figconst_t figconst)
+    cbl_char_t figconst_character(cbl_figconst_t figconst)
       {
-      int const_char = 0;  // Head off a compiler warning
+      cbl_char_t const_char = 0;  // Head off a compiler warning
       switch(figconst)
         {
         case normal_value_e :
-          const_char = -1;
+          abort();
           break;
         case low_value_e    :
           const_char = low_value_character();
@@ -454,9 +581,11 @@ class charmap_t
     return retval;
     }
 
-  int
-  set_digit_negative(int digit, bool is_negative)
+  cbl_char_t
+  set_digit_negative(cbl_char_t digit, bool is_negative)
     {
+    // Returns a 0-9 digit with the internal sign bit altered for ascii or
+    // ebcdic.
     switch(m_numeric_sign_type)
       {
       case sign_type_ascii:
@@ -490,8 +619,302 @@ class charmap_t
     return m_numeric_sign_type == sign_type_ebcdic;
     }
 
-  };
+  bool
+  is_like_utf8() const
+    {
+    return m_is_like_utf8;
+    }
 
-charmap_t *__gg__get_charmap(cbl_encoding_t encoding);
+  void
+  memset(void *dest_, cbl_char_t ch, size_t bytelength)
+    {
+    uint8_t *dest = static_cast<uint8_t *>(dest_);
+    switch(m_stride)
+      {
+      case 1:
+        {
+        if( (ch & 0xFFFFFF00) == 0x00000000 )
+          {
+          // This is the normal case of filling a buffer with a single byte
+          ::memset(dest, ch & 0xff, bytelength);
+          }
+        else
+          {
+          // We are being asked to fill a byte-wide buffer with a multi-byte
+          // character.
+          uint8_t byte3 = ch >> 24;
+          uint8_t byte2 = ch >> 16;
+          uint8_t byte1 = ch >>  8;
+          uint8_t byte0 = ch;
+          size_t fill;
+          size_t i=0;
+          if( byte3 )
+            {
+            fill = bytelength / 4;
+            while( i<fill )
+              {
+              dest[i++] = byte0;
+              dest[i++] = byte1;
+              dest[i++] = byte2;
+              dest[i++] = byte3;
+              }
+            }
+          else if( byte2 )
+            {
+            fill = bytelength / 3;
+            while( i<fill )
+              {
+              dest[i++] = byte0;
+              dest[i++] = byte1;
+              dest[i++] = byte2;
+              }
+            }
+          else
+            {
+            fill = bytelength / 2;
+            while( i<fill )
+              {
+              dest[i++] = byte0;
+              dest[i++] = byte1;
+              }
+            }
+          while( i < bytelength )
+            {
+            dest[i++] = mapped_character(ascii_space);
+            }
+          }
+        break;
+        }
+
+      case 2:
+        {
+        assert( !(bytelength&1) );
+        // We know the target has an even number of bytes available.  We also
+        // know that each codepoint is usually one, but sometimes two, pairs
+        // of bytes
+        uint16_t top_half    = ch>>16;
+        uint16_t bottom_half = ch;
+        size_t fill = bytelength;
+        size_t i = 0;
+        uint16_t *p = PTRCAST(uint16_t, dest);
+        while( i<fill )
+          {
+          p[i/2] = bottom_half;
+          i += 2;
+          if( i>= fill )
+            {
+            break;
+            }
+          if( top_half )
+            {
+            p[i/2] = bottom_half;
+            i += 2;
+            }
+          }
+        if( i < bytelength )
+          {
+          // We were trying to put two-pair values into the destination, but
+          // there were an odd number of pairs available.
+          p[i] = mapped_character(ascii_space);
+          i += 2; // cppcheck-suppress unreadVariable
+          }
+        break;
+        }
+
+      case 4:
+        {
+        assert( !(bytelength&3) );
+        // We know the target has multiple of four bytes available.
+        uint32_t *p = PTRCAST(uint32_t, dest);
+        size_t i = 0;
+        while( i<bytelength )
+          {
+          p[i/4] = ch;
+          i += 4;
+          }
+        break;
+        }
+      }
+    }
+
+  void putch(cbl_char_t ch, void *base_, size_t location)
+    {
+    // This routine puts a character at a byte location.  It's up to the
+    // user to provide the correct byte location, and update it by the stride
+    // when necessary.
+    uint8_t *base = static_cast<uint8_t *>(base_);
+    memcpy(base+location, &ch, m_stride);
+    if( m_stride < 4 )
+      {
+      location += m_stride;
+      ch >>= (8 * m_stride);
+      while(ch)
+        {
+        memcpy(base+location, &ch, m_stride);
+        location += m_stride;
+        ch >>= (8 * m_stride);
+        }
+      }
+    }
+
+  void putch(cbl_char_t ch, void *base_, size_t *location)
+    {
+    // This routine puts a character at a location, and updates the location
+    uint8_t *base = static_cast<uint8_t *>(base_);
+    memcpy(base+*location, &ch, m_stride);
+    *location += m_stride;
+    if( m_stride < 4 )
+      {
+      ch >>= 8 * m_stride;
+      while(ch)
+        {
+        memcpy(base+*location, &ch, m_stride);
+        *location += m_stride;
+        ch >>= 8 * m_stride;
+        }
+      }
+    }
+
+  cbl_char_t getch(const void *base_, size_t location) const
+    {
+    // This routine gets a character at a location, and updates the location
+    cbl_char_t retval = 0;
+    const uint8_t *base = static_cast<const uint8_t *>(base_);
+
+    memcpy(&retval, base+location, m_stride);
+////    location += m_stride;
+////  We need to do something about UTF-8 snd UTF-16
+////    while(ch)
+////      {
+////      memcpy(base+*location, &ch, m_stride);
+////      *location += m_stride;
+////      ch >>= 8 * m_stride;
+////      }
+    return retval;
+    }
+
+  cbl_char_t getch(const void *base_, size_t *location) const
+    {
+    // This routine gets a character at a location, and updates the location
+    cbl_char_t retval = 0;
+    const uint8_t *base = static_cast<const uint8_t *>(base_);
+
+    memcpy(&retval, base+*location, m_stride);
+    *location += m_stride;
+////  We need to do something about UTF-8 snd UTF-16
+////    while(ch)
+////      {
+////      memcpy(base+*location, &ch, m_stride);
+////      *location += m_stride;
+////      ch >>= 8 * m_stride;
+////      }
+    return retval;
+    }
+
+  unsigned long long strtoull(char *in, char **end, int /*base*/)
+    {
+    // This is like strtoull(3), but the base is restricted to 10.
+    size_t index = 0;
+    unsigned long long retval = 0;
+    cbl_char_t mapped_0 = mapped_character(ascii_0);
+    cbl_char_t mapped_9 = mapped_character(ascii_9);
+    for(;;)
+      {
+      cbl_char_t ch = getch(in, &index);
+      if( ch < mapped_0 || ch > mapped_9 )
+        {
+        break;
+        }
+      retval *= 10;
+      retval += ch & 0x0F;
+      }
+    *end = in + index-m_stride ;
+    return retval;
+    }
+
+    template <typename T>
+    size_t
+    Strlen( T *input, ssize_t limit = SSIZE_MAX ) {
+      size_t i;
+      for( i = 0; i < (limit / sizeof(T)) && input[i] != 0; i++ )
+        ;
+      return i;
+    }
+    size_t strlen2( const void *converted, ssize_t limit = SSIZE_MAX ) {
+      switch(m_stride) {
+      case 1:
+        return Strlen( reinterpret_cast<const char*>(converted), limit );
+      case 2:
+        return Strlen( reinterpret_cast<const uint16_t*>(converted), limit );
+      case 4:
+        return Strlen( reinterpret_cast<const uint16_t*>(converted), limit );
+      }
+      //// gcc_unreachable();
+      return -1; // Mollify cppcheck.
+    }
+    
+  size_t
+  strlen( const void *converted,
+          ssize_t limit = SSIZE_MAX)
+    {
+    size_t retval;
+
+    union
+      {
+      const uint8_t  *p8 ;
+      const uint16_t *p16;
+      const uint32_t *p32;
+      } ;
+    const uint8_t *p_start = reinterpret_cast<const uint8_t *>(converted);
+    p8 = p_start;
+    switch(m_stride)
+      {
+      case 1:
+        {
+        // Loop until the pointer is past the limit, or until we hit
+        // a character that is all zeroes
+        while(*p8)
+          {
+          if( p8 - p_start > limit )
+            {
+            break;
+            }
+          p8 += 1;
+          }
+        break;
+        }
+      case 2:
+        {
+        // Loop until the pointer is past the limit, or until we hit
+        // a character that is all zeroes
+        while(*p16)
+          {
+          if( p8 - p_start > limit )
+            {
+            break;
+            }
+          p8 += 2;
+          }
+        break;
+        }
+      case 4:
+        {
+        // Loop until the pointer is past the limit, or until we hit
+        // a character that is all zeroes
+        while(*p32)
+          {
+          if( p8 - p_start > limit )
+            {
+            break;
+            }
+          p8 += 4;
+          }
+        break;
+        }
+      }
+    retval = p8 - p_start;
+    return retval;
+    }
+  };
 
 #endif
index 5c35dc78009645a7b8d36d8b56c2f42261b6a597..f4f4fca1d089c8b8a4dd8f5c5e823edbe9a1bf49 100644 (file)
@@ -60,7 +60,7 @@
 
 /*  COBOL has the concept of Numeric Display values, which use an entire byte
     per digit.  IBM also calls this "Zoned Decimal".
-    
+
     In ASCII, the digits are '0' through '9' (0x30 through 0x39'.  Signed
     values are indicated by turning on the 0x40 bit in either the first
     byte (for LEADING variables) or the last byte (for TRAILING).
@@ -187,49 +187,49 @@ enum cbl_field_type_t {
  *   (But maybe the fill character should just be an explicit character.)
  */
 enum cbl_field_attr_t : uint64_t {
-  none_e            = 0x0000000000,
-  figconst_1_e      = 0x0000000001, // This needs to be 1 - don't change the position
-  figconst_2_e      = 0x0000000002, // This needs to be 2
-  figconst_4_e      = 0x0000000004, // This needs to be 4
-  rjust_e           = 0x0000000008, // justify right
-  ljust_e           = 0x0000000010, // justify left
-  zeros_e           = 0x0000000020, // zero fill
-  signable_e        = 0x0000000040,
-  constant_e        = 0x0000000080, // pre-assigned constant
-  function_e        = 0x0000000100,
-  quoted_e          = 0x0000000200,
-  filler_e          = 0x0000000400,
-  register_e        = 0x0000000800, // Data definition is found in constants.cc
-  intermediate_e    = 0x0000001000, // Compiler-defined temporary variable
-  embiggened_e      = 0x0000002000, // redefined numeric made 64-bit by USAGE POINTER
-  all_alpha_e       = 0x0000004000, // FldAlphanumeric, but all A's
-  all_x_e           = 0x0000008000, // picture is all X's
-  all_ax_e          = 0x000000a000, // picture is all A's or all X's
-  prog_ptr_e        = 0x0000010000, // FUNCTION-POINTER or PROGRAM-POINTER
-  scaled_e          = 0x0000020000,
-  refmod_e          = 0x0000040000, // Runtime; indicates a refmod is active
-  based_e           = 0x0000080000, // pointer capacity, for ADDRESS OF or ALLOCATE
-  any_length_e      = 0x0000100000, // inferred length of linkage in nested program
-  global_e          = 0x0000200000, // field has global scope
-  external_e        = 0x0000400000, // field has external scope
-  blank_zero_e      = 0x0000800000, // BLANK WHEN ZERO
+  none_e            =  0x0000000000,
+  figconst_1_e      =  0x0000000001, // This needs to be 1 - don't change the position
+  figconst_2_e      =  0x0000000002, // This needs to be 2
+  figconst_4_e      =  0x0000000004, // This needs to be 4
+  rjust_e           =  0x0000000008, // justify right
+  ljust_e           =  0x0000000010, // justify left
+  zeros_e           =  0x0000000020, // zero fill
+  signable_e        =  0x0000000040,
+  constant_e        =  0x0000000080, // pre-assigned constant
+  function_e        =  0x0000000100,
+  quoted_e          =  0x0000000200,
+  filler_e          =  0x0000000400,
+  register_e        =  0x0000000800, // Data definition is found in constants.cc
+  intermediate_e    =  0x0000001000, // Compiler-defined temporary variable
+  embiggened_e      =  0x0000002000, // redefined numeric made 64-bit by USAGE POINTER
+  all_alpha_e       =  0x0000004000, // FldAlphanumeric, but all A's
+  all_x_e           =  0x0000008000, // picture is all X's
+  all_ax_e          =  0x000000a000, // picture is all A's or all X's
+  prog_ptr_e        =  0x0000010000, // FUNCTION-POINTER or PROGRAM-POINTER
+  scaled_e          =  0x0000020000,
+  refmod_e          =  0x0000040000, // Runtime; indicates a refmod is active
+  based_e           =  0x0000080000, // pointer capacity, for ADDRESS OF or ALLOCATE
+  any_length_e      =  0x0000100000, // inferred length of linkage in nested program
+  global_e          =  0x0000200000, // field has global scope
+  external_e        =  0x0000400000, // field has external scope
+  blank_zero_e      =  0x0000800000, // BLANK WHEN ZERO
   // data division uses 2 low bits of high byte
-  linkage_e         = 0x0001000000, // field is in linkage section
-  local_e           = 0x0002000000, // field is in local section
-  leading_e         = 0x0004000000, // leading sign (signable_e alone means trailing)
-  separate_e        = 0x0008000000, // separate sign
-  envar_e           = 0x0010000000, // names an environment variable
-  encoded_e         = 0x0020000000, // data.initial matches codeset.encoding
-  bool_encoded_e    = 0x0040000000, // data.initial is a boolean string
-  hex_encoded_e     = 0x0080000000, // data.initial is a hex-encoded string
-  depends_on_e      = 0x0100000000, // A group hierachy contains a DEPENDING_ON
-  initialized_e     = 0x0200000000, // Don't call parser_initialize from parser_symbol_add
-  has_value_e       = 0x0400000000, // Flag to hierarchical descendents to ignore .initial
-  ieeedec_e         = 0x0800000000, // Indicates a FldFloat is IEEE 754 decimal, rather than binary
-  big_endian_e      = 0x1000000000, // Indicates a value is big-endian
-  same_as_e         = 0x2000000000, // Field produced by SAME AS (cannot take new members)
-  record_key_e      = 0x4000000000,
-  typedef_e         = 0x8000000000, // IS TYPEDEF
+  linkage_e         =  0x0001000000, // field is in linkage section
+  local_e           =  0x0002000000, // field is in local section
+  leading_e         =  0x0004000000, // leading sign (signable_e alone means trailing)
+  separate_e        =  0x0008000000, // separate sign
+  envar_e           =  0x0010000000, // names an environment variable
+  encoded_e         =  0x0020000000, // data.initial matches codeset.encoding
+  bool_encoded_e    =  0x0040000000, // data.initial is a boolean string
+  hex_encoded_e     =  0x0080000000, // data.initial is a hex-encoded string
+  depends_on_e      =  0x0100000000, // A group hierachy contains a DEPENDING_ON
+  initialized_e     =  0x0200000000, // Don't call parser_initialize from parser_symbol_add
+  has_value_e       =  0x0400000000, // Flag to hierarchical descendents to ignore .initial
+  ieeedec_e         =  0x0800000000, // Indicates a FldFloat is IEEE 754 decimal, rather than binary
+  big_endian_e      =  0x1000000000, // Indicates a value is big-endian
+  same_as_e         =  0x2000000000, // Field produced by SAME AS (cannot take new members)
+  record_key_e      =  0x4000000000,
+  typedef_e         =  0x8000000000, // IS TYPEDEF
   strongdef_e       = typedef_e + intermediate_e, // STRONG TYPEDEF (not temporary)
 };
 // The separate_e value does double-duty for FldPacked/COMP-6, which is not
@@ -239,7 +239,13 @@ enum cbl_field_attr_t : uint64_t {
 // that there is no sign nybble.
 #define packed_no_sign_e separate_e
 
-enum cbl_figconst_t
+#define LOW_VALUE_E   figconst_1_e
+#define ZERO_VALUE_E  (figconst_2_e|figconst_1_e)
+#define SPACE_VALUE_E figconst_4_e
+#define QUOTE_VALUE_E  (figconst_4_e|figconst_1_e)
+#define HIGH_VALUE_E  (figconst_4_e|figconst_2_e)
+
+enum cbl_figconst_t : uint64_t
     {
     normal_value_e = 0, // This one must be zero
     low_value_e    = 1, // The order is important, because
@@ -431,6 +437,18 @@ enum module_type_t {
   module_toplevel_e,
 };
 
+enum convert_type_t {
+  convert_alpha_e      = 0x01,
+  convert_nat_e        = 0x02,
+  convert_any_e        = 0x03, // i.e., both
+  convert_byte_e       = 0x04,
+  convert_hex_e        = 0x08, // may be combined with alpha or national
+  convert_just_bit_e   = 0x10,
+  convert_just_e       = 0x18, // combined with HEX
+  convert_rjust_bit_e  = 0x20,
+  convert_rjust_e      = 0x38, // combined with JUSTIFY
+};
+
 /*
  * Compare a "raised" EC to an enabled EC or of a declarative.  "raised" may in
  * fact not be raised; in the compiler this function is used to compare a TURN
@@ -490,7 +508,8 @@ struct cbl_declarative_t {
   size_t section; // implies program
   bool global;
   ec_type_t type;
-  uint32_t nfile, files[files_max];
+  size_t nfile;
+  uint64_t files[files_max];
   cbl_file_mode_t mode;
 
   explicit cbl_declarative_t( cbl_file_mode_t mode = file_mode_none_e )
@@ -544,6 +563,7 @@ struct cbl_declarative_t {
   std::vector<uint64_t> encode() const;
 
   /*
+   * Sort file names before file modes, and file modes before non-IO.
    * Sort file names before file modes, and file modes before non-IO.
    */
   bool operator<( const cbl_declarative_t& that ) const {
@@ -566,7 +586,7 @@ struct cbl_declarative_t {
 
     // TRUE if there are no files to match, or the provided file is in the list.
     bool match_file( size_t file ) const {
-    static const uint32_t * pend = files + nfile;
+    static const uint64_t * pend = files + nfile;
 
     return nfile == 0 || pend != std::find(files, files + nfile, file);
   }
index 37bcde305fc3332577fd076f07ed6bcaf7a32d3b..7d92f2d7a0d8ea9c01074a4fc157b4d26807ce28 100644 (file)
@@ -1209,7 +1209,7 @@ valid_encoding( cbl_encoding_t enc ) {
 struct encodings_t {
   bool supported;
   cbl_encoding_t type;
-  const char name[32];
+  char name[32];
 };
 
 #endif
index 14ef069ccb7339ef7e52de825f902e7a793d2e90..064af5bad54045a3acd32361ef1eb2b6a2b4b3cc 100644 (file)
@@ -123,9 +123,10 @@ typedef struct cblc_file_t
     int                  errnum;           // most recent errno; can't reuse "errno" as the name
     file_status_t        io_status;        // See 2014 standard, section 9.1.12
     int                  padding;          // Actually a char
-    int                  delimiter;        // ends a record; defaults to '\n'.
+    uint32_t             delimiter;        // ends a record; defaults to '\n'.
+    int                  stride;           // Width of a character
     int                  flags;            // cblc_file_flags_t
-    int                  recent_char;      // This is the most recent char sent to the file
+    uint32_t             recent_char;      // This is the most recent char sent to the file
     int                  recent_key;
     cblc_file_prior_op_t prior_op;         // run-time type is INT
     cbl_encoding_t       encoding;         // We assume size int
index 7c01f39a8c066fb018a1c46b4baba69a9a9becbe..c6813be6ef1939318dbcb27b783a890e09d52e2f 100644 (file)
@@ -190,55 +190,83 @@ handle_errno(cblc_file_t *file, const char *function, const char *msg)
   }
 
 static
-char *
-get_filename( const cblc_file_t *file)
+void
+establish_filename(       cblc_file_t  *file,
+                    const cblc_field_t *field_of_name,
+                    char               *filename)
   {
-  bool is_quoted = !!(file->flags & file_name_quoted_e);
+  // This routine sets file->filename to the provided name.  The name might
+  // ultimately be filename, which if present has to be in the system encoding
+  // and is flagged as not-quoted, meaning that it could have been from an
+  // environment variable.  It could be from a FldLiteral, which for a SELECT
+  // clause is not encoded in field_of_name->encoding.  Or it could be from a
+  // variable, in which case it is encoded.
 
-  static size_t fname_size = MINIMUM_ALLOCATION_SIZE;
-  static char *fname = static_cast<char *>(malloc(MINIMUM_ALLOCATION_SIZE));
-  massert(fname);
-  if( strlen(file->filename)+1 > fname_size)
-    {
-    fname_size = strlen(file->filename)+1 ;
-    fname = static_cast<char *>(realloc(fname, fname_size));
-    }
+  // Whenever anybody establishes file->filename, it should be done through
+  // a malloc.
 
-  strcpy(fname, file->filename);
+  // The field_of_name was not encoded in any way.  The field_of_name->codeset
+  // might the same as ordinary alphanumerics, but the field->data for literals
+  // was not encoded.
 
-  if( !is_quoted )
+  char *allocated_here = nullptr;
+  if( !filename )
     {
-    // We have been given something that might be the name of an
-    // environment variable that contains the filename:
-    const char *p_from_environment = getenv(fname);
-    if( p_from_environment )
+    // We weren't given a filename, so we extract it from the field_of_name
+
+    file->flags |= file_name_quoted_e;
+    if( field_of_name->type == FldLiteralA )
       {
-      if( strlen(p_from_environment)+1 > fname_size )
+      // For literals in SELECT clauses, the initial value is the
+      // nul-terminated filename in the source-code encoding.  The
+      // field->encoding is possibly wrong, but irrelevant.
+      allocated_here = static_cast<char *>(malloc(field_of_name->capacity+1));
+      massert(allocated_here);
+      memcpy(allocated_here, field_of_name->data, field_of_name->capacity);
+      allocated_here[field_of_name->capacity] = '\0';
+      filename = allocated_here;
+      }
+    else
+      {
+      // We need to convert from the designated encoding to the system
+      // encoding:
+      filename = __gg__iconverter(field_of_name->encoding,
+                                 __gg__console_encoding,
+                                 reinterpret_cast<char *>(field_of_name->data),
+                                 field_of_name->capacity);
+      // COBOL strings are space-filled to the right, so we have to get rid
+      // of any spaces out there.  If somebody *wants* a filename space-filled
+      // to the right, well, at this juncture I am not prepared to be complicit
+      // in that particular flavor of lunacy.
+      size_t n = strlen(filename)-1;
+      // Note the conditional that terminates the loop when n goes from zero
+      // to a huge positive number in the event that the string is all SPACES
+      while( n < strlen(filename) && filename[n] == ascii_space )
         {
-        fname_size = strlen(p_from_environment)+1;
-        free(fname);
-        fname = static_cast<char *>(malloc(fname_size));
-        massert(fname);
+        filename[n--] = '\0';
         }
-      strcpy(fname, p_from_environment);
       }
     }
+  else
+    {
+    file->flags &= ~file_name_quoted_e;
+    }
+
+  // At this point, we have a trimmed filename in the system encoding:
 
-  if(*fname)
+  if( !(file->flags & file_name_quoted_e) )
     {
-    // COBOL strings are space-filled to the right, so we have to get rid
-    // of any spaces out there.  If somebody *wants* a filename space-filled
-    // to the right, well, at this juncture I am not prepared to be complicit
-    // in that particular flavor of lunacy.
-    size_t n = strlen(fname)-1;
-    // Note the conditional that terminates the loop when n goes from zero
-    // to a huge positive number in the event that the string is all SPACES
-    while( n < strlen(fname) && fname[n] == ascii_space )
+    // We have been given something that might be the name of an
+    // environment variable that contains the filename:
+    char *p_from_environment = getenv(filename);
+    if( p_from_environment )
       {
-      fname[n--] = '\0';
+      filename = p_from_environment;
       }
     }
-  return fname;
+  free(file->filename);
+  file->filename = strdup(filename);
+  free(allocated_here);
   }
 
 static void
@@ -308,7 +336,7 @@ void
 __gg__file_init(
   cblc_file_t   *file,
   const char    *name,
-  size_t         symbol_table_index,
+  uint64_t       symbol_table_index,
   cblc_field_t **keys,
   int           *key_numbers,
   int           *uniques,
@@ -353,6 +381,7 @@ __gg__file_init(
     file->errnum              = 0 ;
     file->io_status           = FsSuccess ;
     file->delimiter           = charmap->mapped_character(ascii_newline) ;
+    file->stride              = charmap->stride();
     file->flags               = file_flag_none_e;
         file->flags          |= (optional ? file_flag_optional_e : file_flag_none_e)
                                 + file_flag_initialized_e;
@@ -666,7 +695,7 @@ relative_file_delete(cblc_file_t *file, bool is_random)
   file->errnum = 0;
   file->io_status = FsErrno;
 
-  char record_marker;
+  cbl_char_t record_marker;
 
   unsigned char *stash = static_cast<unsigned char *>(malloc(file->default_record->capacity));
   massert(stash);
@@ -731,6 +760,7 @@ relative_file_delete(cblc_file_t *file, bool is_random)
 
     errno = 0;
     file->errnum = 0;
+    record_marker = 0;
     ssize_t presult = pread(rfp.fd, &record_marker, 1, rfp.flag_position);
     if( presult < 0 )
       {
@@ -1153,45 +1183,55 @@ __io__file_delete(cblc_file_t *file, bool is_random)
   }
 
 static void
-__io__file_remove(cblc_file_t *file, char *filename, int is_quoted)
+trim_in_place(char *psz)
   {
-  // filename is the result of a strdup or malloc.  Because both FILE OPEN
-  // and FILE DELETE can establish or change a name, we free it here and
-  // replace it.  The same is true in FILE DELETE Format 2
-  free(file->filename);
-  file->filename = filename;
-  file->flags &= ~file_name_quoted_e;
-  file->flags |= is_quoted ? file_name_quoted_e : file_flag_none_e;
-  int erc;
-
-  // This code copied from reopen
-  const char *trimmed_name = get_filename(file);
-  if( !trimmed_name[0] )
+  // Get rid of leading spaces:
+  if( *psz == ascii_space )
     {
-    bool all_spaces = true;
-    for(size_t i=0; i<strlen(file->filename); i++)
+    char *p = psz;
+    while(*p++ == ascii_space)
       {
-      if( file->filename[i] != ascii_space )
-        {
-        all_spaces = false;
-        }
-      break;
+      // Just trim them away:
       }
-    if( all_spaces )
+    p -= 1;
+    size_t i=0;
+    while(*p)
       {
-      warnx("Warning: %s specified with a filename that is all spaces",
-            file->name);
-      file->io_status = FsNameError;    // "31"
-      goto done;
+      psz[i++] = *p++;
       }
+    psz[i++] = '\0';
+    }
+  // Get rid of trailing spaces:
+  size_t len = strlen(psz);
+  size_t i   = len-1;
+  while( i < len && psz[i] == ascii_space )
+    {
+    psz[i--] = '\0';
+    }
+  }
 
-    warnx(  "%s(): There is no environment variable named \"%s\"\n",
-            __func__,
-            file->filename);
-    file->io_status = FsNoFile;    // "35"
+static void
+__io__file_remove(cblc_file_t  *file,
+            const char *filename)
+  {
+  file->errnum = 0;
+  file->io_status = FsErrno;
+  int erc;
+
+  if( filename )
+    {
+    free(file->filename);
+    file->filename = strdup(filename);
+    trim_in_place(file->filename);
+    }
+
+  if( !strlen(file->filename) )
+    {
+    warnx("Warning: %s specified with a filename that is empty",
+          file->name);
+    file->io_status = FsNameError;    // "31"
     goto done;
     }
-  // trimmed_name is now the file system name of the file to be removed.
 
   // If the file is open, we flag that with "41"
   if( file->file_pointer )
@@ -1202,7 +1242,7 @@ __io__file_remove(cblc_file_t *file, char *filename, int is_quoted)
 
   // There's been a lot of buildup.  We can now try to remove the file:
   errno = 0;
-  erc = remove(trimmed_name);
+  erc = remove(file->filename);
   if( erc == 0 )
     {
     // All is copacetic.  There was a file, and now it's gone.
@@ -1210,13 +1250,13 @@ __io__file_remove(cblc_file_t *file, char *filename, int is_quoted)
     }
   else if( errno == ENOENT )
     {
-    // The file didn't exist.  
+    // The file didn't exist.
     file->io_status = FsUnavail;    // "05"
     }
   else
     {
     // We have some other kind of error.  Lack of credentials, or whatever.
-    file->io_status = FsErrno;    // 
+    file->io_status = FsErrno;    //
     goto done;
     }
 
@@ -1504,7 +1544,8 @@ relative_file_start(cblc_file_t *file,
   while(      rfp.record_position >= 0
           &&  rfp.record_position+total_record_length <= rfp.file_size )
     {
-    char record_marker;
+    cbl_char_t record_marker;
+    record_marker = 0;
     ssize_t presult = pread(rfp.fd, &record_marker, 1, rfp.flag_position);
     if( presult < 0 )
       {
@@ -1952,7 +1993,7 @@ relative_file_rewrite( cblc_file_t *file, size_t length, bool is_random )
     {
     // This is like a write, except the place we are putting
     // it has to be occupied instead of empty.
-    char record_marker;
+    cbl_char_t record_marker;
     if( relative_file_parameters_get(   rfp,
                                         rfm_microfocus_e,
                                         file,
@@ -1963,6 +2004,7 @@ relative_file_rewrite( cblc_file_t *file, size_t length, bool is_random )
       goto done;
       }
 
+    record_marker = 0;
     ssize_t presult = pread(rfp.fd, &record_marker, 1, rfp.flag_position);
     if( presult < 0 )
       {
@@ -2513,7 +2555,8 @@ relative_file_write(cblc_file_t    *file,
         }
       }
     // Let's check to make sure the slot for this record is currently available:
-    char record_marker;
+    cbl_char_t record_marker;
+    record_marker = 0;
     ssize_t presult = pread(rfp.fd, &record_marker, 1, rfp.flag_position);
     if( presult < 0 )
       {
@@ -2599,9 +2642,11 @@ sequential_file_write(cblc_file_t    *file,
   {
   // This code handles SEQUENTIAL and LINE SEQUENTIAl
   charmap_t *charmap = __gg__get_charmap(file->encoding);
+  int stride = charmap->stride();
 
-  char ch = '\0';
-  size_t characters_to_write;
+  // ch is the vertical control character
+  cbl_char_t ch = '\0';
+  size_t bytes_to_write;
 
   int lcount;
 
@@ -2628,13 +2673,13 @@ sequential_file_write(cblc_file_t    *file,
     }
 
   // By default, we write out the number of characters in the record area
-  characters_to_write = length;
+  bytes_to_write = length;
 
   // That gets overridden if there is a record_length
   if( file->record_length )
     {
     int rdigits;
-    characters_to_write = (int)__gg__binary_value_from_field(
+    bytes_to_write = stride * (int)__gg__binary_value_from_field(
                                                       &rdigits,
                                                       file->record_length);
     }
@@ -2642,20 +2687,23 @@ sequential_file_write(cblc_file_t    *file,
   if( file->org == file_line_sequential_e )
     {
     // If file-sequential, then trailing spaces are removed:
-    while(     characters_to_write > 0
-               && location[characters_to_write-1] == charmap->mapped_character(ascii_space) )
+    while(bytes_to_write > 0
+           && charmap->getch(location, bytes_to_write-stride) 
+                                  == charmap->mapped_character(ascii_space) )
       {
-      characters_to_write -= 1;
+      bytes_to_write -= stride;
       }
     }
 
-  if( after && file->org == file_line_sequential_e && ch == charmap->mapped_character(ascii_newline) )
+  if( after && file->org == file_line_sequential_e 
+                           && ch == charmap->mapped_character(ascii_newline) )
     {
     // In general, we terminate every line with a newline.  Because this
     // line is supposed to start with a newline, we decrement the line
     // counter by one if we had already sent one.
-    if( lcount && (   file->recent_char == charmap->mapped_character(ascii_newline)
-                     || file->recent_char == charmap->mapped_character(ascii_ff)) )
+    if( lcount && 
+            (   file->recent_char == charmap->mapped_character(ascii_newline)
+                || file->recent_char == charmap->mapped_character(ascii_ff)) )
       {
       lcount -= 1;
       }
@@ -2665,7 +2713,10 @@ sequential_file_write(cblc_file_t    *file,
     {
     while(lcount--)
       {
-      fputc(ch, file->file_pointer);
+      fwrite( &ch,
+              stride,
+              1,
+              file->file_pointer);
       if( handle_ferror(file, __func__, "fputc() error [3]") )
         {
         goto done;
@@ -2679,10 +2730,10 @@ sequential_file_write(cblc_file_t    *file,
   switch(file->org)
     {
     case file_line_sequential_e:
-      if( characters_to_write )
+      if( bytes_to_write )
         {
         fwrite( location,
-                characters_to_write,
+                bytes_to_write,
                 1,
                 file->file_pointer);
         if( handle_ferror(file, __func__, "fwrite() error") )
@@ -2694,13 +2745,13 @@ sequential_file_write(cblc_file_t    *file,
       break;
 
     case file_sequential_e:
-      if( characters_to_write )
+      if( bytes_to_write )
         {
         // File sequential records can start off with a four-byte
         // preamble.
 
-        if(    characters_to_write < file->record_area_min
-               || characters_to_write > file->record_area_max)
+        if(    bytes_to_write < file->record_area_min
+               || bytes_to_write > file->record_area_max)
           {
           file->io_status = FsBoundWrite; // "44"
           goto done;
@@ -2709,11 +2760,11 @@ sequential_file_write(cblc_file_t    *file,
         if( file->record_area_min != file->record_area_max )
           {
           // Because of the min/max mismatch, we require a preamble:
-          // The first two bytes are the big-endian character count
+          // The first two bytes are the big-endian byte count
           const unsigned char preamble[4] =
             {
-            (unsigned char)(characters_to_write>>8),
-            (unsigned char)(characters_to_write),
+            (unsigned char)(bytes_to_write>>8),
+            (unsigned char)(bytes_to_write),
             0,
             0
             };
@@ -2729,7 +2780,7 @@ sequential_file_write(cblc_file_t    *file,
           }
 
         fwrite( location,
-                characters_to_write,
+                bytes_to_write,
                 1,
                 file->file_pointer);
         if( handle_ferror(file, __func__, "fwrite() error") )
@@ -2753,7 +2804,10 @@ sequential_file_write(cblc_file_t    *file,
     {
     // Special case:  when AFTER NON-ZERO lines, we stick a newline on the
     // end of this record:
-    fputc(ch, file->file_pointer);
+    fwrite( &ch,
+            stride,
+            1,
+            file->file_pointer);
     if( handle_ferror(file, __func__, "fputc() error [4]") )
       {
       goto done;
@@ -2766,7 +2820,10 @@ sequential_file_write(cblc_file_t    *file,
     // We did the output BEFORE, so now it's time to send some newlines
     while(lcount--)
       {
-      fputc(ch, file->file_pointer);
+      fwrite( &ch,
+              stride,
+              1,
+              file->file_pointer);
       if( handle_ferror(file, __func__, "fputc() error [5]") )
         {
         goto done;
@@ -3044,8 +3101,7 @@ line_sequential_file_read(  cblc_file_t *file)
   {
   file->errnum = 0;
   file->io_status = FsErrno;
-  size_t characters_read = 0;
-  size_t remaining;
+  size_t bytes_read = 0;
   bool hit_eof;
 
   // According to IBM:
@@ -3065,8 +3121,11 @@ line_sequential_file_read(  cblc_file_t *file)
   // characters to the right as undefined.  I'm going with IBM,
   // it makes more sense to me.
 
+  charmap_t *charmap = __gg__get_charmap(file->encoding);
+  int stride = charmap->stride();
+
   // We first stage the data into the record area.
-  int ch;
+  cbl_char_t ch;
 
   long fpos = ftell(file->file_pointer);
   if( handle_ferror(file, __func__, "ftell() error") )
@@ -3076,15 +3135,16 @@ line_sequential_file_read(  cblc_file_t *file)
     }
 
   hit_eof = false;
-  while( characters_read < file->record_area_max )
+  while( bytes_read < file->record_area_max )
     {
-    ch = fgetc(file->file_pointer);
+    ch = 0;
+    fread(&ch, 1, stride, file->file_pointer);
     file->errnum = ferror(file->file_pointer);
     if( ch == file->delimiter )
       {
       break;
       }
-    if( ch == EOF )
+    if( feof(file->file_pointer) )
       {
       hit_eof = true;
       clearerr(file->file_pointer);
@@ -3095,19 +3155,15 @@ line_sequential_file_read(  cblc_file_t *file)
       fpos = -1;
       goto done;
       }
-    file->default_record->data[characters_read] = (char)ch;
-    characters_read += 1;
-    }
-  remaining = characters_read;
-  while(remaining < file->record_area_max )
-    {
-    // Space fill shorty records
-    charmap_t *charmap = __gg__get_charmap(file->encoding);
-    file->default_record->data[remaining++] =
-                                        charmap->mapped_character(ascii_space);
+    memcpy(file->default_record->data+bytes_read, &ch, stride);
+    bytes_read += stride;
     }
+  // Space fill shorty records
+    charmap->memset(file->default_record->data+bytes_read,
+                    charmap->mapped_character(ascii_space),
+                    file->record_area_max  - bytes_read);
 
-  if( hit_eof && !characters_read)
+  if( hit_eof && !bytes_read)
     {
     // We got an end-of-file without characters
     file->io_status = FsEofSeq; // "10"
@@ -3120,7 +3176,7 @@ line_sequential_file_read(  cblc_file_t *file)
     // does another READ:
     file->io_status = FsErrno;
     }
-  else if (characters_read < file->record_area_max)
+  else if (bytes_read < file->record_area_max )
     {
     // Just discard an early record delimiter
     file->io_status = FsRecordLength;   // "04"
@@ -3133,18 +3189,6 @@ line_sequential_file_read(  cblc_file_t *file)
     // Manual: "If the first unread character is the record delimiter, it
     // is discarded. Otherwise, the first unread character becomes the first
     // character read by the next READ statement."
-    ch = fgetc(file->file_pointer);
-    file->errnum = ferror();
-    // If that next character isn't a delimiter, put it back:
-    if( ch != file->delimiter && ch != EOF)
-      {
-      ungetc(ch, file->file_pointer);
-      }
-    else if( handle_ferror(file->file_pointer, __func__, "fgetc() error") )
-      {
-      fpos = -1;
-      goto done;
-      }
 #else
     // In this code, extra characters before the newline
     // are read next time are discarded.  GnuCOBOL works this way, and
@@ -3153,11 +3197,12 @@ line_sequential_file_read(  cblc_file_t *file)
     // SEQUENTIAL; it describes only SEQUENTIAL.
     for(;;)
       {
-      ch = fgetc(file->file_pointer);
+      ch = 0;
+      fread(&ch, 1, stride, file->file_pointer);
       file->errnum = ferror(file->file_pointer);
       // We can't use handle_ferror() directly, because an EOF is
       // a legitimate way to end the last line.
-      if( ch == file->delimiter || ch == EOF)
+      if( ch == file->delimiter || feof(file->file_pointer) )
         {
         clearerr(file->file_pointer);
         break;
@@ -3176,7 +3221,7 @@ line_sequential_file_read(  cblc_file_t *file)
   if( file->record_length )
     {
     __gg__int128_to_field(file->record_length,
-                                    characters_read,
+                                    bytes_read/stride,
                                     0,
                                     truncation_e,
                                     NULL);
@@ -3266,14 +3311,27 @@ sequential_file_read(  cblc_file_t  *file)
   if( characters_read < bytes_in_record )
     {
     charmap_t *charmap = __gg__get_charmap(file->encoding);
-    memset( file->default_record->data,
-            charmap->mapped_character(ascii_space),
-            bytes_to_read);
+    charmap->memset(file->default_record->data,
+                    charmap->mapped_character(ascii_space),
+                    bytes_to_read);
     file->io_status = FsEofSeq; // "10"
     fpos = -1;
     goto done;
     }
 
+  if( characters_read < file->default_record->capacity )
+    {
+    // The record area is longer than the characters we read.  Space-fill out
+    // to the end:
+
+    charmap_t *charmap = __gg__get_charmap(file->encoding);
+    charmap->memset( file->default_record->data + characters_read,
+                     charmap->mapped_character(ascii_space),
+                     file->default_record->capacity - characters_read );
+    }
+
+
+
   // Let the caller know if we got too few or too many characters
   if(     bytes_in_record < file->record_area_min
           ||  bytes_in_record > file->record_area_max )
@@ -3292,7 +3350,7 @@ sequential_file_read(  cblc_file_t  *file)
   if( file->record_length )
     {
     __gg__int128_to_field(file->record_length,
-                                    characters_read,
+                                    characters_read/file->stride,
                                     0,
                                     truncation_e,
                                     NULL);
@@ -3570,7 +3628,8 @@ relative_file_read( cblc_file_t *file,
       file->prior_read_location = -1;
       goto done;
       }
-    char record_marker;
+    cbl_char_t record_marker;
+    record_marker = 0;
     if( pread(rfp.fd, &record_marker, 1, rfp.flag_position) <= 0)
       {
       goto done;
@@ -4189,9 +4248,7 @@ __gg__file_reopen(cblc_file_t *file, int mode_char)
 
   // Stash the mode_char for later analysis during READ and WRITE operations
   file->mode_char = mode_char;
-  char *trimmed_name;
-  trimmed_name = get_filename(file);
-  if( !trimmed_name[0] )
+  if( !file->filename[0] )
     {
     bool all_spaces = true;
     for(size_t i=0; i<strlen(file->filename); i++)
@@ -4220,7 +4277,7 @@ __gg__file_reopen(cblc_file_t *file, int mode_char)
   // achMode is the mode string that gets passed down below to fopen().
   random_access_mode = (    file->access == file_access_rnd_e
                               || file->access == file_access_dyn_e);
-  the_file_exists = access(trimmed_name, F_OK) == 0;
+  the_file_exists = access(file->filename, F_OK) == 0;
   file->flags |= the_file_exists ? file_flag_existed_e : file_flag_none_e ;
 
   // We have four operations: INPUT (r) OUTPUT (w) I-O (+) and EXTEND (a)
@@ -4362,7 +4419,7 @@ __gg__file_reopen(cblc_file_t *file, int mode_char)
       }
     }
 
-  file->file_pointer = fopen(trimmed_name, achMode);
+  file->file_pointer = fopen(file->filename, achMode);
   if( file->file_pointer == NULL )
     {
     file->errnum = errno;
@@ -4406,21 +4463,27 @@ __gg__file_reopen(cblc_file_t *file, int mode_char)
 
 static void
 __io__file_open(cblc_file_t *file,
-                char *filename,
-                int mode_char,
-                int is_quoted)
+          const char *filename,
+                int mode_char)
   {
-  // 'filename' is a pointer to a malloc() buffer.
-  // The 'filename' has to be in the system encoding, typically ASCII
-
-  // The complication:  A filename can be literal text, it can be from a COBOL
-  // alphanumeric variable, or it can be the name of an environment variable
-  // that contains the actual name of the file.  The consequence is that if
-  // you want to call __gg__file_open from anywhere except the parser_file_open
-  // routine, then you had best really know what you are doing.
-
   file->errnum = 0;
   file->io_status = FsErrno;
+
+  if( filename )
+    {
+    free(file->filename);
+    file->filename = strdup(filename);
+    trim_in_place(file->filename);
+    }
+
+  if( !strlen(file->filename) )
+    {
+    warnx("Warning: %s specified with a filename that is empty",
+          file->name);
+    file->io_status = FsNameError;    // "31"
+    goto done;
+    }
+
   if( file->file_pointer )
     {
     // The file is already open:
@@ -4428,16 +4491,9 @@ __io__file_open(cblc_file_t *file,
     }
   else
     {
-    // filename is the result of a strdup or malloc.  Because both FILE OPEN
-    // and FILE DELETE can establish or change a name, we free it here and
-    // replace it.  The same is true in FILE DELETE Format 2
-    free(file->filename);
-    file->filename = filename;
-    file->flags &= ~file_name_quoted_e;
-    file->flags |= is_quoted ? file_name_quoted_e : file_flag_none_e;
-
     __gg__file_reopen(file, mode_char);
     }
+  done:
   file->prior_op = file_op_open;
   establish_status(file, -1);
   }
@@ -4491,10 +4547,6 @@ __io__file_close( cblc_file_t *file, int how )
     file_indexed_close(file);
     }
 
-  // The filename was malloced.  So, we get rid of it here.
-  free(file->filename);
-  file->filename = NULL;
-
   done:
   file->prior_op = file_op_close;
   establish_status(file, fpos);
@@ -4548,9 +4600,8 @@ public:
   static const char constexpr marquee[64] = "libgcobol: gfileio.cc";
 
   typedef void (open_t)( cblc_file_t *file,
-                         char *filename,
-                         int mode_char,
-                         int is_quoted );
+                   const char *filename,
+                         int mode_char);
   typedef void (close_t)( cblc_file_t *file,
                           int how );
   typedef void (start_t)( cblc_file_t *file,
@@ -4565,14 +4616,12 @@ public:
                           int after,
                           int lines,
                           int is_random );
-  typedef void (rewrite_t)( cblc_file_t *file,
-                            size_t length, bool is_random );
-  typedef void (delete_t)( cblc_file_t *file,
+  typedef void (rewrite_t)(cblc_file_t *file,
+                           size_t length, bool is_random );
+  typedef void (delete_t)(cblc_file_t *file,
                           bool is_random );
-  typedef void (remove_t)( cblc_file_t *file,
-                          char *filename,
-                          int is_quoted);
-
+  typedef void (remove_t)(cblc_file_t  *file,
+                    const char         *filename);
   open_t      *Open;
   close_t     *Close;
   start_t     *Start;
@@ -4651,9 +4700,9 @@ gcobol_fileops() {
  * Then, in libgcobol, replace direct calls with calls through fileops.
  * That is, instead of
  *
- *     __gg__file_open("foo", "r", false );
+ *     __gg__file_open("foo", "r" );
  * use
- *     gfile->Open("foo", "r", false );
+ *     gfile->Open("foo", "r" );
  *
  * You'll probably want some kind of trampoline to avoid the need to
  * generate the Gimple to call through a pointer to a structure:
@@ -4680,13 +4729,13 @@ gcobol_io_funcs() {
 extern "C"
 void
 __gg__file_open(cblc_file_t *file,
+          const cblc_field_t *field_of_name,
                 char *filename,
-                int mode_char,
-                int is_quoted)
+                int mode_char)
   {
-  // The 'filename' has to be in the system encoding, typically ASCII
+  establish_filename(file, field_of_name, filename);
   gcobol_io_t *functions = gcobol_io_funcs();
-  functions->Open(file, filename, mode_char, is_quoted);
+  functions->Open(file, NULL, mode_char);
   }
 
 extern "C"
@@ -4749,11 +4798,14 @@ __gg__file_delete(cblc_file_t *file, bool is_random)
 extern "C"
 
 void
-__gg__file_remove(cblc_file_t *file, char *name, int is_quoted)
+__gg__file_remove(      cblc_file_t *file,
+                  const cblc_field_t *field_of_name,
+                        char *filename)
   {
     // DELETE FILE Format 2 - removes a file.
+    establish_filename(file, field_of_name, filename);
     gcobol_io_t *functions = gcobol_io_funcs();
-    functions->Remove(file, name, is_quoted);
+    functions->Remove(file, NULL);
   }
 
 /* end interface functions */
index d525500dffecfb2ca61c2b00e84c405285419718..36df81bb2eab6e65fefab146e8581dd62395270b 100644 (file)
@@ -59,9 +59,9 @@ extern "C"
 void __gg__handle_error(const char *function, const char *msg);
 
 void __gg__file_open(   cblc_file_t *file,
+                  const cblc_field_t *fname,
                         char *filename,
-                        int mode_char,
-                        int is_quoted);
+                        int mode_char);
 
 void __gg__file_reopen(cblc_file_t *file, int mode_char);
 
index 8a9880ba2139df9b56cbcc5e38c2400d7f9728e2..be98f7eb522c9ba7be3b7c21f24a5bcb60baca81 100644 (file)
@@ -1097,7 +1097,7 @@ __gg__subtractf1_float_phase2(cbl_arith_format_t ,
   const size_t       *C_s = __gg__treeplet_3s;
 
   bool on_size_error = !!(on_error_flag & ON_SIZE_ERROR);
-  // This is the assignment phase of an ADD Format 2
+  // This is the assignment phase of an SUBTRACT Format 2
   // We take phase1_result and subtract it from C
 
   GCOB_FP128 temp = __gg__float128_from_qualified_field(C[0], C_o[0], C_s[0]);
@@ -1135,11 +1135,10 @@ __gg__subtractf2_float_phase1(cbl_arith_format_t ,
                           compute_error
                           );
 
-  // Subtract that from the B value:
+  // Subtract that subtotal from the B value:
   GCOB_FP128 value_b = __gg__float128_from_qualified_field(B[0], B_o[0], B_s[0]);
 
-  // The two numbers have the same number of rdigits.  It's now safe to add
-  // them.
+
   phase1_result_float = subtraction_helper_float(value_b, phase1_result_float, compute_error);
   }
 
@@ -1366,7 +1365,7 @@ __gg__multiplyf1_phase2(cbl_arith_format_t ,
     {
     if( C[0]->type == FldFloat )
       {
-      // gixed * float
+      // fixed * float
       a_value = (GCOB_FP128) multiply_intermediate_int128;
       if( multiply_intermediate_rdigits )
         {
index bf9396ea0bbe18b7825f272aa63549509b9c37cc..af8f3ac42c0e35e75cbfbd577da0208cce4a2122 100644 (file)
@@ -38,6 +38,7 @@
 #include <langinfo.h>
 
 #include <cctype>
+#include <cwctype>
 #include <cmath>
 #include <cstring>
 #include <ctime>
@@ -100,11 +101,17 @@ trim_trailing_spaces(PCHAR left, PCHAR &right, int mapped_space)
 static bool
 is_zulu_format(PCHAR left, PCHAR &right, charmap_t *charmap)
   {
-  int char_Z = charmap->mapped_character(ascii_Z);
+  cbl_char_t char_Z = charmap->mapped_character(ascii_Z);
+  cbl_char_t char_z = charmap->mapped_character(ascii_z);
+  int stride = charmap->stride();
   bool retval = false;
-  if( right > left )
+  if( right - left >= stride)
     {
-    retval = std::toupper((unsigned char)*(right-1)) == char_Z;
+    cbl_char_t last_character = charmap->getch(right-stride, size_t(0));
+    if( last_character == char_Z || last_character == char_z )
+      {
+      retval = true;
+      }
     }
   return retval;
   }
@@ -228,11 +235,11 @@ static
 void
 string_to_dest(cblc_field_t *dest, const char *psz)
   {
+  charmap_t *charmap = __gg__get_charmap(dest->encoding);
   size_t dest_length = dest->capacity;
-  size_t source_length = strlen(psz);
+  size_t source_length = charmap->strlen(psz);
   size_t length = std::min(dest_length, source_length);
-  charmap_t *charmap = __gg__get_charmap(dest->encoding);
-  memset(dest->data, charmap->mapped_character(ascii_space), dest_length);
+  charmap->memset(dest->data, charmap->mapped_character(ascii_space), dest_length);
   memcpy(dest->data, psz, length);
   }
 
@@ -574,6 +581,7 @@ get_all_time( const cblc_field_t *dest, // needed for the target encoding
           ctm.day_of_week+1,
           ctm.day_of_year,
           ctm.ZZZZ);
+
   __gg__convert_encoding(PTRCAST(char, stime),
                          DEFAULT_SOURCE_ENCODING,
                          dest->encoding);
@@ -801,7 +809,7 @@ ftime_replace(char *dest,
               char const * const dest_end,
               char const *       source,
               char const * const source_end,
-              charmap_t  *       charmap_source,
+              charmap_t  *       charmap,
               char const * const ftime)
   {
   // This routine is highly dependent on the source format being correct.
@@ -824,33 +832,38 @@ ftime_replace(char *dest,
   static const int OFFSET_TO_DOY            = 34;
   static const int OFFSET_TO_ZZZZ           = 37;
 
-  unsigned int decimal_point =
-                   charmap_source->mapped_character(__gg__get_decimal_point());
-  unsigned int source_Y    = charmap_source->mapped_character(ascii_Y   );
-  unsigned int source_W    = charmap_source->mapped_character(ascii_W   );
-  unsigned int source_s    = charmap_source->mapped_character(ascii_s   );
-  unsigned int source_m    = charmap_source->mapped_character(ascii_m   );
-  unsigned int source_h    = charmap_source->mapped_character(ascii_h   );
-  unsigned int source_plus = charmap_source->mapped_character(ascii_plus);
-  unsigned int source_D    = charmap_source->mapped_character(ascii_D   );
-  unsigned int source_M    = charmap_source->mapped_character(ascii_M   );
+  int stride = charmap->stride();
+
+  cbl_char_t decimal_point =
+                   charmap->mapped_character(__gg__get_decimal_point());
+  cbl_char_t source_Y    = charmap->mapped_character(ascii_Y   );
+  cbl_char_t source_W    = charmap->mapped_character(ascii_W   );
+  cbl_char_t source_s    = charmap->mapped_character(ascii_s   );
+  cbl_char_t source_m    = charmap->mapped_character(ascii_m   );
+  cbl_char_t source_h    = charmap->mapped_character(ascii_h   );
+  cbl_char_t source_plus = charmap->mapped_character(ascii_plus);
+  cbl_char_t source_D    = charmap->mapped_character(ascii_D   );
+  cbl_char_t source_M    = charmap->mapped_character(ascii_M   );
 
   while( source < source_end && dest < dest_end )
     {
-    unsigned char fchar = *source;
+    cbl_char_t fchar = charmap->getch(source, size_t(0));
     if( fchar == source_Y )
       {
       // This can only be a YYYY
       // But, we have a choice.  If there is a 'W' in the format, then we
       // need to use ZZZZ rather than YYYY:
-      src = ftime + OFFSET_TO_YYYY;
+      src = ftime + OFFSET_TO_YYYY*stride;
       const char *p = source;
+      size_t index = 0;
       while(p < source_end)
         {
-        if( (unsigned char)*p++ == source_W )
+        //if( (unsigned char)*p++ == source_W )
+        if( charmap->getch(source, &index) == source_W )
           {
-          src = ftime + OFFSET_TO_ZZZZ;
+          src = ftime + OFFSET_TO_ZZZZ*stride;
           }
+        p += stride;
         }
 
       ncount = 4;
@@ -859,43 +872,43 @@ ftime_replace(char *dest,
       {
       // This can only be a MM
       ncount = 2;
-      src = ftime + OFFSET_TO_MM;
+      src = ftime + OFFSET_TO_MM*stride;
       }
     else if( fchar == source_D )
       {
       // It can be a D, DD or DDD
-      if( (unsigned char)source[2] == source_D )
+      if( charmap->getch(source, 2*stride) == source_D )
         {
         ncount = 3;
-        src = ftime + OFFSET_TO_DOY;
+        src = ftime + OFFSET_TO_DOY*stride;
         }
-      else if( (unsigned char)source[1] == source_D )
+      else if( charmap->getch(source, 1*stride) == source_D )
         {
         ncount = 2;
-        src = ftime + OFFSET_TO_DD;
+        src = ftime + OFFSET_TO_DD*stride;
         }
       else
         {
         ncount = 1;
-        src = ftime + OFFSET_TO_DOW;
+        src = ftime + OFFSET_TO_DOW*stride;
         }
       }
     else if( fchar == source_plus )
       {
       saw_plus_sign = true;
       ncount = 1;
-      src = ftime + OFFSET_TO_OFFSET;
+      src = ftime + OFFSET_TO_OFFSET*stride;
       }
     else if( fchar == source_h )
       {
       ncount = 2;
       if(saw_plus_sign)
         {
-        src = ftime + OFFSET_TO_OFFSET_HOUR;
+        src = ftime + OFFSET_TO_OFFSET_HOUR*stride;
         }
       else
         {
-        src = ftime + OFFSET_TO_HOUR;
+        src = ftime + OFFSET_TO_HOUR*stride;
         }
       }
     else if( fchar == source_m )
@@ -903,11 +916,11 @@ ftime_replace(char *dest,
       ncount = 2;
       if(saw_plus_sign)
         {
-        src = ftime + OFFSET_TO_OFFSET_MINUTE;
+        src = ftime + OFFSET_TO_OFFSET_MINUTE*stride;
         }
       else
         {
-        src = ftime + OFFSET_TO_MINUTE;
+        src = ftime + OFFSET_TO_MINUTE*stride;
         }
       }
     else if( fchar == decimal_point )
@@ -922,18 +935,18 @@ ftime_replace(char *dest,
         {
         // There can be a variable number of fractional 's'
         ncount = -1;
-        src = ftime + OFFSET_TO_FRACTION;
+        src = ftime + OFFSET_TO_FRACTION*stride;
         }
       else
         {
         ncount = 2;
-        src = ftime + OFFSET_TO_SECOND;
+        src = ftime + OFFSET_TO_SECOND*stride;
         }
       }
     else if( fchar == source_W )
       {
       ncount = 3;
-      src = ftime + OFFSET_TO_WEEK;
+      src = ftime + OFFSET_TO_WEEK*stride;
       }
     else
       {
@@ -946,18 +959,22 @@ ftime_replace(char *dest,
       {
       // This indicates special processing for a variable number of 's'
       // characters
-      while((unsigned char)*source == source_s && dest < dest_end)
+      while(charmap->getch(source, size_t(0)) == source_s && dest < dest_end)
         {
-        source += 1;
-        *dest++ = *src++;
+        source += stride;
+        memcpy(dest, src, stride);
+        dest += stride;
+        src  += stride;
         }
       }
     else
       {
-      source += ncount;
+      source += ncount*stride;
       while(ncount-- && dest < dest_end)
         {
-        *dest++ = *src++;
+        memcpy(dest, src, stride);
+        dest += stride;
+        src += stride;
         }
       }
     }
@@ -1177,26 +1194,31 @@ __gg__char( cblc_field_t *dest,
     }
 
   // We need to convert the ch character to the destination encoding.
-  const char achFrom[2] = {static_cast<char>(ch), '\0'};
+  // THIS IS A KLUDGE UNTIL WE MAKE THE CURRENT_COLLATION TO BE A MAP OF
+  // WIDE CHARACTERS!
+  charmap_t *charmap_dest = __gg__get_charmap(dest->encoding);
+
+  cbl_char_t achFrom = 0;
+  memcpy(&achFrom, &ch, 1);
   size_t charsout;
   const char *converted = __gg__iconverter(__gg__display_encoding,
                                            dest->encoding,
-                                           achFrom,
+                                           &achFrom,
                                            1,
                                            &charsout );
   // Pick up our character, because mapped_character() might clobber
   // the converted contents.
-  int converted_char = *converted; // cppcheck-suppress variableScope
+  int converted_char = 0;
+  memcpy(&converted_char, converted, charmap_dest->stride());
   // Space fill the dest:
-  charmap_t *charmap_dest = __gg__get_charmap(dest->encoding);
-  memset(dest->data,
-         charmap_dest->mapped_character(ascii_space),
-         dest->capacity);
+  charmap_dest-> memset(dest->data,
+                        charmap_dest->mapped_character(ascii_space),
+                        dest->capacity);
   // Make the first character of the destination equal to our converted
   // character:
   if( ch > -1 && charsout == 1 )
     {
-    dest->data[0] = converted_char;
+    charmap_dest->putch(converted_char, dest->data, size_t(0));
     }
   }
 
@@ -1278,10 +1300,16 @@ __gg__current_date(cblc_field_t *dest)
 
   char retval[DATE_STRING_BUFFER_SIZE];
   timespec_to_string(retval, tp);
-  __gg__convert_encoding(PTRCAST(char, retval),
-                         DEFAULT_SOURCE_ENCODING,
-                         dest->encoding);
-  string_to_dest(dest, retval);
+
+  size_t bytes_converted;
+  char *converted = __gg__miconverter(__gg__console_encoding,
+                                      dest->encoding,
+                                      retval,
+                                      strlen(retval),
+                                      &bytes_converted);
+  __gg__field_from_string(dest, 0, dest->capacity, converted, bytes_converted);
+  __gg__adjust_dest_size(dest, bytes_converted);
+  free(converted);
   }
 
 extern "C"
@@ -1524,29 +1552,30 @@ __gg__formatted_current_date( cblc_field_t *dest, // Destination string
   charmap_t *charmap_from = __gg__get_charmap(from);
   charmap_t *charmap_to   = __gg__get_charmap(to);
 
-  int dest_space = charmap_to->mapped_character(ascii_space);
-  int format_Z   = charmap_from->mapped_character(ascii_Z);
+  cbl_char_t dest_space = charmap_to->mapped_character(ascii_space);
+  cbl_char_t format_Z   = charmap_from->mapped_character(ascii_Z);
+  cbl_char_t format_z   = charmap_from->mapped_character(ascii_z);
 
   // Establish the destination, and set it to spaces
   char *d    = PTRCAST(char, dest->data);
   const char *dend = d + dest->capacity;
-  memset(d, dest_space, dest->capacity);
+  charmap_to->memset(d, dest_space, dest->capacity);
 
   // Establish the formatting string:
   const char *format     = PTRCAST(char, (input->data+input_offset));
   const char *format_end = format + input_size;
 
   bool is_zulu = false;
-
   const char *p = format;
   while( p < format_end )
     {
-    int ch = *p++;
-    if( ch == format_Z )
+    cbl_char_t ch = charmap_from->getch(p, size_t(0));
+    if( ch == format_Z || ch == format_z)
       {
       is_zulu = true;
       break;
       }
+    p += charmap_from->stride();
     }
 
   struct cbl_timespec ts = {};
@@ -1574,9 +1603,10 @@ __gg__formatted_current_date( cblc_field_t *dest, // Destination string
   // Convert seconds west of UTC to minutes east of UTC
   ctm.tz_offset = -timezone/60;
 
-  char achftime[64];
+  char achftime[256];
   get_all_time(dest, achftime, ctm);
   ftime_replace(d, dend, format, format_end, charmap_from, achftime);
+  return;
   }
 
 extern "C"
@@ -1586,22 +1616,23 @@ __gg__formatted_date(cblc_field_t *dest, // Destination string
                      size_t arg1_offset,
                      size_t arg1_size,
                const cblc_field_t *arg2, // integer date
-                     size_t arg2_offset,
+                     size_t arg2_offset,  
                      size_t arg2_size)
   {
   // FUNCTION FORMATTED-DATE
 
-  cbl_encoding_t from = arg1->encoding;
   cbl_encoding_t to   = dest->encoding;
-  charmap_t *charmap_from = __gg__get_charmap(from);
+  cbl_encoding_t from = arg1->encoding;
+
   charmap_t *charmap_to   = __gg__get_charmap(to);
+  charmap_t *charmap_from = __gg__get_charmap(from);
 
-  int dest_space = charmap_to->mapped_character(ascii_space);
+  cbl_char_t dest_space = charmap_to->mapped_character(ascii_space);
 
   // Establish the destination, and set it to spaces
   char *d    = PTRCAST(char, dest->data);
   const char *dend = d + dest->capacity;
-  memset(d, dest_space, dest->capacity);
+  charmap_to->memset(d, dest_space, dest->capacity);
 
   // Establish the formatting string:
   char *format     = PTRCAST(char, (arg1->data+arg1_offset));
@@ -1611,11 +1642,11 @@ __gg__formatted_date(cblc_field_t *dest, // Destination string
 
   populate_ctm_from_date(ctm, arg2, arg2_offset, arg2_size);
 
-  char achftime[64];
+  char achftime[256];
   get_all_time(dest, achftime, ctm);
   if( __gg__exception_code )
     {
-    memset(d, dest_space, dend-d);
+     charmap_to->memset(d, dest_space, dend-d);
     }
   else
     {
@@ -1671,7 +1702,7 @@ __gg__formatted_datetime( cblc_field_t *dest, // Destination string
     convert_to_zulu(ctm);
     }
 
-  char achftime[64];
+  char achftime[256];
   get_all_time(dest, achftime, ctm);
   if( __gg__exception_code )
     {
@@ -1710,7 +1741,7 @@ __gg__formatted_time( cblc_field_t *dest,// Destination string
   // Establish the destination, and set it to spaces
   char *d          = PTRCAST(char, dest->data);
   const char *dend = d + dest->capacity;
-  memset(d, dest_space, dest->capacity);
+  charmap_to->memset(d, dest_space, dest->capacity);
 
   // Establish the formatting string:
   char *format     = PTRCAST(char, (par1->data+par1_o));
@@ -1734,11 +1765,11 @@ __gg__formatted_time( cblc_field_t *dest,// Destination string
     convert_to_zulu(ctm);
     }
 
-  char achftime[64];
+  char achftime[256];
   get_all_time(dest, achftime, ctm);
   if( __gg__exception_code )
     {
-    memset(d, dest_space, dend-d);
+    charmap_to->memset(d, dest_space, dend-d);
     }
   else
     {
@@ -2066,6 +2097,54 @@ __gg__max(cblc_field_t *dest,
     }
   }
 
+static void
+change_case( cblc_field_t *dest,
+       const cblc_field_t *input,
+             size_t        input_offset,
+             size_t        input_size,
+             std::wint_t (changer)( std::wint_t ch )
+             )
+  {
+  cbl_encoding_t enc_to   = dest->encoding;
+  cbl_encoding_t enc_from = input->encoding;
+  cbl_encoding_t enc_work = DEFAULT_32_ENCODING;
+
+  // In order to handle any input encoding, we convert to UTF32:
+  size_t converted_bytes;
+  const char *converted = __gg__iconverter(enc_from,
+                                           enc_work,
+                                           input->data+input_offset,
+                                           input_size,
+                                           &converted_bytes);
+  // Make a copy of it to prevent the static nature of iconverter from causing
+  // trouble:
+  cbl_char_t *duped = 
+          static_cast<cbl_char_t *>(__gg__memdup(converted, converted_bytes));
+  cbl_char_t *pend = duped + converted_bytes / width_of_utf32;
+
+  // Use the designated case changer:
+  std::transform(duped, pend, duped,
+                 [&changer](cbl_char_t c) { return changer(c); });
+
+  // Convert that modified string to the destination encoding:
+  converted = __gg__iconverter(enc_work,
+                               enc_to,
+                               duped,
+                               converted_bytes,
+                               &converted_bytes);
+  free(duped);
+
+  char *duped2 = static_cast<char *>(__gg__memdup(converted, converted_bytes));
+  __gg__field_from_string(dest,
+                          0,
+                          dest->capacity,
+                          duped2,
+                          converted_bytes);
+  free(duped2);
+  __gg__adjust_dest_size(dest, converted_bytes);
+  }
+
+
 extern "C"
 void
 __gg__lower_case( cblc_field_t *dest,
@@ -2073,27 +2152,17 @@ __gg__lower_case( cblc_field_t *dest,
                   size_t        input_offset,
                   size_t        input_size)
   {
-  cbl_encoding_t from = input->encoding;
-  cbl_encoding_t to   = dest->encoding;
-  charmap_t *charmap_dest = __gg__get_charmap(to);
+  return change_case(dest, input, input_offset, input_size, std::towlower);
+  }
 
-  size_t dest_length = dest->capacity;
-  size_t source_length = input_size;
-  size_t length = std::min(dest_length, source_length);
-  memset( dest->data,
-          charmap_dest->mapped_character(ascii_space),
-          dest_length);
-  memcpy(dest->data, input->data+input_offset, length);
-  __gg__convert_encoding_length(PTRCAST(char, dest->data),
-                                length,
-                                from,
-                                DEFAULT_SOURCE_ENCODING);
-  std::transform(dest->data, dest->data + dest_length, dest->data,
-                 [](unsigned char c) { return std::tolower(c); });
-  __gg__convert_encoding_length(PTRCAST(char, dest->data),
-                                length,
-                                DEFAULT_SOURCE_ENCODING,
-                                to);
+extern "C"
+void
+__gg__upper_case( cblc_field_t *dest,
+            const cblc_field_t *input,
+                  size_t        input_offset,
+                  size_t        input_size)
+  {
+  return change_case(dest, input, input_offset, input_size, std::towupper);
   }
 
 extern "C"
@@ -2401,7 +2470,7 @@ numval( cblc_field_t *dest,
                                    PTRCAST(char, input->data + input_offset),
                                    input_size,
                                    &nbytes);
-  const char *pend = p + input_size;
+  const char *pend = p + nbytes;
 
   int errpos = 0;
   __int128 retval = 0;
@@ -2702,7 +2771,7 @@ numval_c( cblc_field_t *dest,
                                   &nbytes);
   char *pstart = strdup(converted);
   massert(pstart);
-  char *pend   = pstart + src_size;
+  char *pend   = pstart + nbytes;
   char *p      = pstart;
 
   GCOB_FP128 retval = 0;
@@ -3447,7 +3516,8 @@ __gg__trim( cblc_field_t *dest,
   cbl_encoding_t from = arg1->encoding;
   cbl_encoding_t to   = dest->encoding;
   charmap_t *charmap = __gg__get_charmap(to);
-  int mapped_space = charmap->mapped_character(ascii_space);
+  int stride = charmap->stride();
+  cbl_char_t mapped_space = charmap->mapped_character(ascii_space);
 
   int rdigits;
   __int128 type = __gg__binary_value_from_qualified_field(&rdigits,
@@ -3466,6 +3536,8 @@ __gg__trim( cblc_field_t *dest,
             "be an intermediate alphanumeric\n");
     abort();
     }
+
+  // What is this all about?
   dest->capacity = dest->offset;
 
   // Make a copy of the input:
@@ -3476,27 +3548,29 @@ __gg__trim( cblc_field_t *dest,
   // Convert it to the destination encoding
   __gg__convert_encoding_length(copy, arg1_size, from, to);
 
-
   // No matter what, we want to find the leftmost non-space and the
   // rightmost non-space:
 
   char *left  = copy;
-  char *right = left + arg1_size-1;
+  char *right = left + arg1_size-stride;
 
   // Find left and right: the first and last non-spaces
   while( left <= right )
     {
-    if( *left != mapped_space && *right != mapped_space )
+    cbl_char_t cleft  = charmap->getch(left,  (size_t)0);
+    cbl_char_t cright = charmap->getch(right, (size_t)0);
+
+    if( cleft != mapped_space && cright != mapped_space )
       {
       break;
       }
-    if( *left == mapped_space )
+    if( cleft == mapped_space )
       {
-      left += 1;
+      left += stride;
       }
-    if( *right == mapped_space )
+    if( cright == mapped_space )
       {
-      right -= 1;
+      right -= stride;
       }
     }
   if( type == LEADING )
@@ -3517,24 +3591,13 @@ __gg__trim( cblc_field_t *dest,
     // When the arg1 input string was empty, we want left to be right+1.
     // The left/right loop can sometimes end up with left equal to right+2.
     // That needs to be fixed:
-    left = right+1;
+    left = right+stride;
     }
 
-  size_t ncount = right+1 - left;
+  size_t ncount = right+stride - left;
   __gg__adjust_dest_size(dest, ncount);
 
-  char *dest_left  = PTRCAST(char, dest->data);
-  char *dest_right = dest_left + dest->capacity - 1;
-  const char *dest_end   = dest_left + dest->capacity;
-
-  while( dest_left <= dest_right && left <= right )
-    {
-    *dest_left++ = *left++;
-    }
-  while(dest_left < dest_end)
-    {
-    *dest_left++ = mapped_space;
-    }
+  memmove(dest->data, left, ncount);
   }
 
 #if HAVE_INITSTATE_R && HAVE_SRANDOM_R && HAVE_RANDOM_R
@@ -3632,34 +3695,29 @@ __gg__reverse(cblc_field_t *dest,
   cbl_encoding_t from = input->encoding;
   cbl_encoding_t to   = dest->encoding;
 
-  size_t dest_length = dest->capacity;
-  size_t source_length = input_size;
-  size_t length = std::min(dest_length, source_length);
+  charmap_t *charmap = __gg__get_charmap(to);
+  size_t stride = charmap->stride();
 
-  // Make a copy of the input
-  char *copy = static_cast<char *>(malloc(length));
-  massert(copy);
-  memcpy(copy, input->data+input_offset, length);
+  size_t dest_length = dest->capacity;
 
   // Convert the input to the destination encoding
-  __gg__convert_encoding_length(copy,
-                                length,
-                                from,
-                                to);
-
-  // Set the destination to all spaces
-  charmap_t *charmap = __gg__get_charmap(to);
-  memset(dest->data, charmap->mapped_character(ascii_space), dest_length);
-  for(size_t i=0; i<length; i++)
-    {
-    dest->data[i] = copy[source_length-1-i];
-    }
-  if( (dest->attr & intermediate_e) )
-    {
-    dest->capacity = std::min(dest_length, source_length);
-    }
-
-  free(copy);
+  size_t bytes_converted;
+  const char *converted = __gg__iconverter(from,
+                                           to,
+                                           input->data+input_offset,
+                                           input_size,
+                                           &bytes_converted);
+  // copy over characters from the end of the copy to the beginning of dest:
+  size_t i_from = bytes_converted - stride;
+  size_t i_to = 0;
+  while( i_from < bytes_converted && i_to < dest_length )
+    {
+    cbl_char_t ch = charmap->getch(converted, i_from);
+    charmap->putch(ch, dest->data+dest->offset, i_to);
+    i_from -= stride;
+    i_to   += stride;
+    }
+  __gg__adjust_dest_size(dest, i_to);
   }
 
 extern "C"
@@ -3892,36 +3950,6 @@ __gg__test_day_yyyyddd( cblc_field_t *dest,
                         NULL);
   }
 
-extern "C"
-void
-__gg__upper_case( cblc_field_t *dest,
-            const cblc_field_t *input,
-                  size_t        input_offset,
-                  size_t        input_size)
-  {
-  cbl_encoding_t from = input->encoding;
-  cbl_encoding_t to   = dest->encoding;
-  charmap_t *charmap_dest = __gg__get_charmap(to);
-
-  size_t dest_length = dest->capacity;
-  size_t source_length = input_size;
-  size_t length = std::min(dest_length, source_length);
-  memset( dest->data,
-          charmap_dest->mapped_character(ascii_space),
-          dest_length);
-  memcpy(dest->data, input->data+input_offset, length);
-  __gg__convert_encoding_length(PTRCAST(char, dest->data),
-                                length,
-                                from,
-                                DEFAULT_SOURCE_ENCODING);
-  std::transform(dest->data, dest->data + dest_length, dest->data,
-                 [](unsigned char c) { return std::toupper(c); });
-  __gg__convert_encoding_length(PTRCAST(char, dest->data),
-                                length,
-                                DEFAULT_SOURCE_ENCODING,
-                                to);
-  }
-
 extern "C"
 void
 __gg__variance( cblc_field_t *dest,
@@ -4552,43 +4580,32 @@ fill_cobol_tm(cobol_tm &ctm,
               size_t par2_offset,
               size_t par2_size)
   {
-  // Establish the formatting string:
-  char *format     = PTRCAST(char, (par1->data+par1_offset));
-  char *format_end = format + par1_size;
-
-  // Establish the string to be checked:
-  char *source     = PTRCAST(char, (par2->data+par2_offset));
-  char *source_end = source + par2_size;
-
-  charmap_t *charmap_format = __gg__get_charmap(par1->encoding);
-  charmap_t *charmap_checked = __gg__get_charmap(par2->encoding);
-  int checked_space = charmap_checked->mapped_character(ascii_space);
-  int source_plus  = charmap_checked->mapped_character(ascii_plus);
-  int source_minus = charmap_checked->mapped_character(ascii_minus);
-  int source_zero  = charmap_checked->mapped_character(ascii_zero);
-
-  int format_space  = charmap_format->mapped_character(ascii_space);
-  int format_T      = charmap_format->mapped_character(ascii_T      );
-  int format_colon  = charmap_format->mapped_character(ascii_colon  );
-  int format_plus   = charmap_format->mapped_character(ascii_plus   );
-  int format_minus  = charmap_format->mapped_character(ascii_minus  );
-  int format_W      = charmap_format->mapped_character(ascii_W      );
-  int format_Z      = charmap_format->mapped_character(ascii_Z      );
-  int format_z      = charmap_format->mapped_character(ascii_z      );
-  int format_s      = charmap_format->mapped_character(ascii_s      );
-  int format_m      = charmap_format->mapped_character(ascii_m      );
-  int format_h      = charmap_format->mapped_character(ascii_h      );
-  int format_w      = charmap_format->mapped_character(ascii_w      );
-  int format_Y      = charmap_format->mapped_character(ascii_Y      );
-  int format_M      = charmap_format->mapped_character(ascii_M      );
-  int format_D      = charmap_format->mapped_character(ascii_D      );
-  int format_zero   = charmap_format->mapped_character(ascii_zero   );
-  char decimal_point
-                = charmap_format->mapped_character(__gg__get_decimal_point());
+  // It turns out to be just easier to convert the strings to ASCII space to
+  // do the conversion of par2 against the format in par1:
+  charmap_t *charmap = __gg__get_charmap(DEFAULT_SOURCE_ENCODING);
+
+  size_t bytes_converted;
+  char *par1_c = __gg__miconverter(par1->encoding,
+                                           DEFAULT_SOURCE_ENCODING,
+                                           par1->data+par1_offset,
+                                           par1_size,
+                                           &bytes_converted);
+  char *format = par1_c;
+  char *format_end = format + bytes_converted;
+
+  char *par2_c = __gg__miconverter(par2->encoding,
+                                           DEFAULT_SOURCE_ENCODING,
+                                           par2->data+par2_offset,
+                                           par2_size,
+                                           &bytes_converted);
+  char *source = par2_c;
+  char *source_end = source + bytes_converted;
+
+  char decimal_point = __gg__get_decimal_point();
 
   // Let's eliminate trailing spaces...
-  trim_trailing_spaces(format, format_end, format_space);
-  trim_trailing_spaces(source, source_end, checked_space);
+  trim_trailing_spaces(format, format_end, ascii_space);
+  trim_trailing_spaces(source, source_end, ascii_space);
 
   bool in_offset = false;
   bool in_nanoseconds = false;
@@ -4605,10 +4622,10 @@ fill_cobol_tm(cobol_tm &ctm,
     {
     unsigned char ch = *format;
 
-    if(    ch == format_T
-           || ch == format_colon
-           || ch == format_minus
-           || ch == format_W)
+    if(    ch == ascii_T
+           || ch == ascii_colon
+           || ch == ascii_minus
+           || ch == ascii_W)
       {
       // These are just formatting characters.  They need to be duplicated,
       // but are otherwise ignored.
@@ -4620,34 +4637,34 @@ fill_cobol_tm(cobol_tm &ctm,
       goto proceed;
       }
 
-    if( ch == format_plus )
+    if( ch == ascii_plus )
       {
       // This flags a following hhmm offset.  It needs to match a '+' or '-'
-      if(    (unsigned char)*source != source_plus
-          && (unsigned char)*source != source_minus
-          && (unsigned char)*source != source_zero)
+      if(    (unsigned char)*source != ascii_plus
+          && (unsigned char)*source != ascii_minus
+          && (unsigned char)*source != ascii_zero)
         {
         break;
         }
-      if( (unsigned char)*source == format_zero )
+      if( (unsigned char)*source == ascii_zero )
         {
         // The next four characters have to be zeroes
-        if( (unsigned char)source[1] != format_zero )
+        if( (unsigned char)source[1] != ascii_zero )
           {
           retval += 1;
           break;
           }
-        if( (unsigned char)source[2] != format_zero )
+        if( (unsigned char)source[2] != ascii_zero )
           {
           retval += 2;
           break;
           }
-        if( (unsigned char)source[3] != format_zero )
+        if( (unsigned char)source[3] != ascii_zero )
           {
           retval += 3;
           break;
           }
-        if( (unsigned char)source[4] != format_zero )
+        if( (unsigned char)source[4] != ascii_zero )
           {
           retval += 4;
           break;
@@ -4671,9 +4688,9 @@ fill_cobol_tm(cobol_tm &ctm,
       goto proceed;
       }
 
-    if( ch == format_Y )
+    if( ch == ascii_Y )
       {
-      errpos = gets_year(source, source_end, charmap_checked, ctm);
+      errpos = gets_year(source, source_end, charmap, ctm);
       if( errpos > 0 )
         {
         retval += errpos - 1;
@@ -4683,9 +4700,9 @@ fill_cobol_tm(cobol_tm &ctm,
       goto proceed;
       }
 
-    if( ch == format_M )
+    if( ch == ascii_M )
       {
-      errpos = gets_month(source, source_end, charmap_checked, ctm);
+      errpos = gets_month(source, source_end, charmap, ctm);
       if( errpos > 0 )
         {
         retval += errpos - 1;
@@ -4695,13 +4712,13 @@ fill_cobol_tm(cobol_tm &ctm,
       goto proceed;
       }
 
-    if( ch == format_D )
+    if( ch == ascii_D )
       {
       // We have three possibilities: DDD, DD, and D
-      if( (unsigned char)format[1] != format_D )
+      if( (unsigned char)format[1] != ascii_D )
         {
         // A singleton 'D' is a day-of-week
-        errpos = gets_day_of_week(source, source_end, charmap_checked, ctm);
+        errpos = gets_day_of_week(source, source_end, charmap, ctm);
         if( errpos > 0)
           {
           retval += errpos - 1;
@@ -4709,10 +4726,10 @@ fill_cobol_tm(cobol_tm &ctm,
           }
         bump = 1;
         }
-      else if( (unsigned char)format[2] != format_D )
+      else if( (unsigned char)format[2] != ascii_D )
         {
         // This is DD, for day-of-month
-        errpos = gets_day(source, source_end, charmap_checked, ctm);
+        errpos = gets_day(source, source_end, charmap, ctm);
         if( errpos > 0)
           {
           retval += errpos - 1;
@@ -4724,7 +4741,7 @@ fill_cobol_tm(cobol_tm &ctm,
         {
         // Arriving here means that it is DDD, for day-of-year
         // This is DD, for day-of-month
-        errpos = gets_day_of_year(source, source_end, charmap_checked, ctm);
+        errpos = gets_day_of_year(source, source_end, charmap, ctm);
         if( errpos > 0)
           {
           retval += errpos - 1;
@@ -4735,9 +4752,9 @@ fill_cobol_tm(cobol_tm &ctm,
       goto proceed;
       }
 
-    if( ch == format_w )
+    if( ch == ascii_w )
       {
-      errpos = gets_week(source, source_end, charmap_checked, ctm);
+      errpos = gets_week(source, source_end, charmap, ctm);
       if( errpos > 0 )
         {
         retval += errpos - 1;
@@ -4747,9 +4764,9 @@ fill_cobol_tm(cobol_tm &ctm,
       goto proceed;
       }
 
-    if( ch == format_h )
+    if( ch == ascii_h )
       {
-      errpos = gets_hours(source, source_end, charmap_checked, ctm, in_offset);
+      errpos = gets_hours(source, source_end, charmap, ctm, in_offset);
       if( errpos > 0 )
         {
         retval += errpos - 1;
@@ -4759,9 +4776,9 @@ fill_cobol_tm(cobol_tm &ctm,
       goto proceed;
       }
 
-    if( ch == format_m )
+    if( ch == ascii_m )
       {
-      errpos = gets_minutes(source, source_end, charmap_checked, ctm, in_offset);
+      errpos = gets_minutes(source, source_end, charmap, ctm, in_offset);
       if( errpos > 0 )
         {
         retval += errpos - 1;
@@ -4771,9 +4788,9 @@ fill_cobol_tm(cobol_tm &ctm,
       goto proceed;
       }
 
-    if( ch == format_s && !in_nanoseconds )
+    if( ch == ascii_s && !in_nanoseconds )
       {
-      errpos = gets_seconds(source, source_end, charmap_checked, ctm);
+      errpos = gets_seconds(source, source_end, charmap, ctm);
       if( errpos > 0 )
         {
         retval += errpos - 1;
@@ -4783,7 +4800,7 @@ fill_cobol_tm(cobol_tm &ctm,
       goto proceed;
       }
 
-    if( ch == format_s && in_nanoseconds )
+    if( ch == ascii_s && in_nanoseconds )
       {
       // Peel off digits to the right of the decimal point one at a time
       errpos = gets_nanoseconds(format,
@@ -4791,8 +4808,8 @@ fill_cobol_tm(cobol_tm &ctm,
                                 source,
                                 source_end,
                                 ctm,
-                                charmap_format,
-                                charmap_checked);
+                                charmap,
+                                charmap);
       if( errpos > 0 )
         {
         retval += errpos - 1;
@@ -4802,11 +4819,11 @@ fill_cobol_tm(cobol_tm &ctm,
       goto proceed;
       }
 
-    if( ch == format_Z || ch == format_z )
+    if( ch == ascii_Z || ch == ascii_z )
       {
       // This has to be the end of the road
-      if(    (unsigned char)source[0] != format_Z
-          && (unsigned char)source[0] != format_z )
+      if(    (unsigned char)source[0] != ascii_Z
+          && (unsigned char)source[0] != ascii_z )
         {
         retval += 0;
         break;
@@ -4829,9 +4846,10 @@ proceed:
     {
     // This means we processed the entire format string without seeing an error
     retval = 0;
-
     // Otherwise, either the format or source was too short
     }
+  free(par1_c);
+  free(par2_c);
   return retval;
   }
 
@@ -4914,7 +4932,8 @@ __gg__seconds_from_formatted_time(cblc_field_t *dest,
     }
   else
     {
-    retval = (double)(ctm.hh * 3600 + ctm.mm * 60 + ctm.ss) + ctm.nanoseconds/1000000000.;
+    retval = (double)(ctm.hh * 3600 + ctm.mm * 60 + ctm.ss)
+                      + ctm.nanoseconds/1000000000.;
     }
   __gg__double_to_target( dest,
                           retval,
@@ -4928,16 +4947,34 @@ __gg__hex_of(cblc_field_t *dest,
              size_t field_offset,
              size_t field_size)
   {
-  charmap_t *charmap = __gg__get_charmap(dest->encoding);
+  // We are going to build the hex string up here, in ascii, and convert to the
+  // the destination encoding at the end.
+
   static const char hex[17] = "0123456789ABCDEF";
-  size_t bytes = field_size;
-  __gg__adjust_dest_size(dest, 2*bytes);
-  for(size_t i=0; i<bytes; i++)
+
+  // Dest size is two hex characters per input byte.
+  size_t build_size = 2 * field_size;
+
+  // Build up the hex string in ascii:
+  char *build = static_cast<char *>(malloc(build_size));
+  massert(build);
+  for(size_t i=0; i<field_size; i++)
     {
     unsigned char byte = (field->data+field_offset)[i];
-    dest->data[2*i  ] = charmap->mapped_character(hex[byte>>4]);
-    dest->data[2*i+1] = charmap->mapped_character(hex[byte&0xF]);
+    build[2*i  ] = hex[byte>>4];
+    build[2*i+1] = hex[byte&0xF];
     }
+  // Convert the hex string to the destination encoding:
+  size_t converted_bytes;
+  const char *converted = __gg__iconverter(DEFAULT_SOURCE_ENCODING,
+                                           dest->encoding,
+                                           build,
+                                           build_size,
+                                           &converted_bytes);
+  // And put it into place:
+  __gg__adjust_dest_size(dest, converted_bytes);
+  memcpy(dest->data, converted, converted_bytes);
+  free(build);
   }
 
 extern "C"
@@ -5249,11 +5286,11 @@ __gg__numval_f( cblc_field_t *dest,
                                   &nbytes);
   GCOB_FP128 value = 0;
   const char *data     = converted;
-  const char *data_end = data + source_size;
+  const char *data_end = data + nbytes;
 
   int error = floating_format_tester( data,
                                       data_end);
-  if( error || source_size >= 256 )
+  if( error || nbytes >= 256 )
     {
     exception_raise(ec_argument_function_e);
     }
@@ -5296,7 +5333,7 @@ __gg__test_numval_f(cblc_field_t *dest,
                                   &nbytes);
 
   const char *data     = converted;
-  const char *data_end = data + source_size;
+  const char *data_end = data + nbytes;
   int error = floating_format_tester( data,
                                       data_end);
   __gg__int128_to_field(dest,
@@ -5711,11 +5748,15 @@ __gg__locale_date(cblc_field_t *dest,
     strcpy(ach, nl_langinfo(D_FMT));
     strftime(ach, sizeof(ach), nl_langinfo(D_FMT), &tm);
     }
-  __gg__convert_encoding(ach,
-                         DEFAULT_SOURCE_ENCODING,
-                         dest->encoding);
-  memcpy(dest->data, ach, strlen(ach));
-  __gg__adjust_dest_size(dest, strlen(ach));
+  size_t bytes_converted;
+  char *converted = __gg__miconverter(__gg__console_encoding,
+                                      dest->encoding,
+                                      ach,
+                                      strlen(ach),
+                                      &bytes_converted);
+  __gg__field_from_string(dest, 0, dest->capacity, converted, bytes_converted);
+  __gg__adjust_dest_size(dest, bytes_converted);
+  free(converted);
   }
 
 extern "C"
@@ -5749,11 +5790,15 @@ __gg__locale_time(cblc_field_t *dest,
     strftime(ach, sizeof(ach), nl_langinfo(T_FMT), &tm);
     }
 
-  __gg__convert_encoding(ach,
-                         DEFAULT_SOURCE_ENCODING,
-                         dest->encoding);
-  memcpy(dest->data, ach, strlen(ach));
-  __gg__adjust_dest_size(dest, strlen(ach));
+  size_t bytes_converted;
+  char *converted = __gg__miconverter(__gg__console_encoding,
+                                      dest->encoding,
+                                      ach,
+                                      strlen(ach),
+                                      &bytes_converted);
+  __gg__field_from_string(dest, 0, dest->capacity, converted, bytes_converted);
+  __gg__adjust_dest_size(dest, bytes_converted);
+  free(converted);
   }
 
 extern "C"
@@ -5788,10 +5833,13 @@ __gg__locale_time_from_seconds( cblc_field_t *dest,
     tm.tm_sec    = seconds % 100;
     strftime(ach, sizeof(ach), nl_langinfo(T_FMT), &tm);
     }
-
-  __gg__convert_encoding(ach,
-                         DEFAULT_SOURCE_ENCODING,
-                         dest->encoding);
-  memcpy(dest->data, ach, strlen(ach));
-  __gg__adjust_dest_size(dest, strlen(ach));
+  size_t bytes_converted;
+  char *converted = __gg__miconverter(__gg__console_encoding,
+                                      dest->encoding,
+                                      ach,
+                                      strlen(ach),
+                                      &bytes_converted);
+  __gg__field_from_string(dest, 0, dest->capacity, converted, bytes_converted);
+  __gg__adjust_dest_size(dest, bytes_converted);
+  free(converted);
   }
index 20fc9751d8a110a7de73d7b1432c935a259bb3a9..7a4bde67da0ed6b851a12c34f07610d1fbb4ce11 100644 (file)
@@ -38,6 +38,7 @@
 #include <string>
 #include <unordered_map>
 #include <vector>
+#include <cwctype>
 
 #include <dirent.h>
 #include <dlfcn.h>
 #include "exceptl.h"
 #include "stringbin.h"
 
+#define NO_RDIGITS (0)
+
+// Forward reference:
+extern "C"
+int
+__gg__move( cblc_field_t        *fdest,
+            size_t               dest_offset,
+            size_t               dest_size,
+            cblc_field_t        *fsource,
+            size_t               source_offset,
+            size_t               source_size,
+            int                  source_flags,
+            cbl_round_t          rounded );
+
 
 /* BSD extension.  */
 #if !defined(LOG_PERROR)
@@ -140,7 +155,6 @@ int         __gg__rdigits                     = 0    ;
 int         __gg__nop                         = 0    ;
 int         __gg__main_called                 = 0    ;
 void       *__gg__entry_label                 = NULL ;
-cbl_encoding_t __gg__console_encoding         = no_encoding_e    ;
 
 // During SORT operations, we don't want the end-of-file condition, which
 // happens as a matter of course, from setting the EOF exception condition.
@@ -234,6 +248,9 @@ void       *__gg__entry_location = NULL;
 // nested PERFORM PROC statements.
 void       *__gg__exit_address = NULL;
 
+// This is the encoding used for sorting tables and files
+static cbl_encoding_t encoding_for_sort;
+
 /*
  * ec_status_t represents the runtime exception condition status for
  * any statement.  There are 4 states:
@@ -466,6 +483,8 @@ struct program_state
   cbl_encoding_t rt_display_encoding;
   cbl_encoding_t rt_national_encoding;
   char *rt_program_name;
+  cbl_char_t rt_working_init;
+  cbl_char_t rt_local_init;
 
   program_state() : rt_currency_signs(256)
     {
@@ -483,6 +502,8 @@ struct program_state
     rt_quote_character      = ascii_dquote  ;    // Change this with APOST
     rt_low_value_character  = DEGENERATE_LOW_VALUE ;
     rt_high_value_character = DEGENERATE_HIGH_VALUE ;
+    rt_working_init         = NOT_A_CHARACTER       ;
+    rt_local_init           = NOT_A_CHARACTER       ;
 
     // Set all the currency_sign pointers to NULL:
 
@@ -507,6 +528,8 @@ struct program_state
     rt_national_encoding    = ps.rt_national_encoding     ;
     rt_collation            = ps.rt_collation             ;
     rt_program_name         = ps.rt_program_name          ;
+    rt_working_init         = ps.rt_working_init          ;
+    rt_local_init           = ps.rt_local_init            ;
     }
   };
 
@@ -675,32 +698,9 @@ __gg__pop_program_state()
   __gg__display_encoding     = program_states.back().rt_display_encoding     ;
   __gg__national_encoding    = program_states.back().rt_national_encoding    ;
   __gg__currency_signs       = program_states.back().rt_currency_signs       ;
-  }
-
-static
-int
-cstrncmp(   char const * const left_,
-            char const * const right_,
-            size_t count)
-  {
-  const char *left  = left_;
-  const char *right = right_;
-  // This is the version of strncmp() that uses the current collation
+  __gg__working_init         = program_states.back().rt_working_init         ;
+  __gg__local_init           = program_states.back().rt_local_init           ;
 
-  // It also is designed to handle strings with embedded NUL characters, so
-  // it treats NULs like any other characters.
-  int retval = 0;
-  while( count-- )
-    {
-    unsigned char chl = *left++;
-    unsigned char chr = *right++;
-    retval = chl - chr;
-    if( retval )
-      {
-      break;
-      }
-    }
-  return retval;
   }
 
 extern "C"
@@ -713,480 +713,766 @@ __gg__decimal_point_is_comma()
   __gg__decimal_separator = ascii_period ;
   }
 
-extern "C"
-void
-__gg__init_program_state(cbl_encoding_t display_encoding,
-                         cbl_encoding_t national_encoding)
+static __int128
+edited_to_binary( const cblc_field_t *field,
+                  char *ps_,
+                  int length,
+                  int *rdigits)
   {
-  // This routine gets called at DATA DIVISION time.
+  charmap_t *charmap = __gg__get_charmap(field->encoding);
 
-  __gg__display_encoding  = display_encoding;
-  __gg__national_encoding = national_encoding;
+  const unsigned char *ps = const_cast<const unsigned char *>(PTRCAST(unsigned char, ps_));
+  // This routine is used for converting NumericEdited strings to
+  // binary.
 
-  // We need to make sure that the program_states vector has at least one
-  // entry in it.  This happens when we are the very first PROGRAM-ID called
-  // in this module.
-  if( program_states.empty() )
-    {
-    initialize_program_state();
-    }
-  }
+  // Numeric edited strings can have all kinds of crap in them: spaces,
+  // slashes, dollar signs...you name it.  It might have a minus sign at
+  // the beginning or end, or it might have CR or DB at the end.
 
-static int
-var_is_refmod( const cblc_field_t *var )
-  {
-  return (var->attr & refmod_e) != 0;
-  }
+  // We are going to look for a minus sign, D (or d) and use that to flag the
+  // result as negative.  We are going to look for a decimal point and count up
+  // the numerical digits to the right of it.  And we are going to pretend
+  // that nothing else matters.
 
-extern "C"
-__int128
-__gg__power_of_ten(int n)
-  {
-  // 2** 64 = 1.8E19
-  // 2**128 = 3.4E38
-  __int128 retval = 1;
-  static const int MAX_POWER = 19 ;
-  static const __int128 pos[MAX_POWER+1] =
-    {
-    1ULL,                       // 00
-    10ULL,                      // 01
-    100ULL,                     // 02
-    1000ULL,                    // 03
-    10000ULL,                   // 04
-    100000ULL,                  // 05
-    1000000ULL,                 // 06
-    10000000ULL,                // 07
-    100000000ULL,               // 08
-    1000000000ULL,              // 09
-    10000000000ULL,             // 10
-    100000000000ULL,            // 11
-    1000000000000ULL,           // 12
-    10000000000000ULL,          // 13
-    100000000000000ULL,         // 14
-    1000000000000000ULL,        // 15
-    10000000000000000ULL,       // 16
-    100000000000000000ULL,      // 17
-    1000000000000000000ULL,     // 18
-    10000000000000000000ULL,    // 19
-    };
-  if( n < 0 || n>MAX_POWER*2)     // The most we can handle is 10**38
-    {
-    fprintf(stderr,
-            "Trying to raise 10 to %d as an int128, which we can't do.\n",
-            n);
-    fprintf(stderr, "The problem is in %s %s:%d.\n", __func__, __FILE__, __LINE__);
-    abort();
-    }
-  if( n <= MAX_POWER )
+  int hyphen = 0;
+  *rdigits = 0;
+
+  // index into the ps string
+  int index = 0;
+
+  // Create a delta_r for counting digits to the right of
+  // any decimal point.  If and when we encounter a decimal point,
+  // we'll set this to one, otherwise it'll stay zero.
+  int delta_r = 0;
+
+  __int128 result = 0;
+
+  // We need to check the last two characters.  If CR or DB, then the result
+  // is negative:
+  if( length >= 2)
     {
-    // Up to 10**18 we do directly:
-    retval = pos[n];
+    if(         ((ps[length-2]&0xFF) == charmap->mapped_character(ascii_D)
+              || (ps[length-2]&0xFF) == charmap->mapped_character(ascii_d))
+            &&  ((ps[length-1]&0xFF) == charmap->mapped_character(ascii_B)
+              || (ps[length-1]&0xFF) == charmap->mapped_character(ascii_b)) )
+      {
+      hyphen = 1;
+      }
+    else if(    ((ps[length-2]&0xFF) == charmap->mapped_character(ascii_C)
+              || (ps[length-2]&0xFF) == charmap->mapped_character(ascii_c))
+            &&  ((ps[length-1]&0xFF) == charmap->mapped_character(ascii_R)
+              || (ps[length-1]&0xFF) == charmap->mapped_character(ascii_r)) )
+      {
+      hyphen = 1;
+      }
     }
-  else
+
+  while( index < length )
     {
-    // 19 through 38:
-    retval = pos[n/2];
-    retval *= retval;
-    if( n & 1 )
+    unsigned char ch = ps[index++] & 0xFF;
+    if( ch == charmap->mapped_character(__gg__decimal_point) )
       {
-      retval *= 10;
+      delta_r = 1;
+      continue;
+      }
+    if( ch == charmap->mapped_character(ascii_minus)  )
+      {
+      hyphen = 1;
+      continue;
+      }
+
+    if(  charmap->mapped_character(ascii_0) <= ch
+      && ch <= charmap->mapped_character(ascii_9) )
+      {
+      result *= 10;
+      // In both EBCDIC and ASCII, this works:
+      result += ch & 0x0F ;
+      *rdigits += delta_r ;
+      continue;
       }
     }
-  return retval;
+
+  if( hyphen )
+    {
+    result = -result;
+    }
+  return result;
   }
 
-extern "C"
+static
 __int128
-__gg__scale_by_power_of_ten_1(__int128 value, int N)
+big_endian_to_binary_signed(
+  const unsigned char *psource,
+  int   capacity
+)
   {
-  // This routine is called when the result of the scaling is not allowed to
-  // have non-zero rdigits.  __gg__rdigits is set to 1 when the result is
-  // in the bad zone.  The ultimate caller needs to examine __gg__rdigits to
-  // decide what to do about it.
+  // This subroutine takes a big-endian value of "capacity" bytes and
+  // converts it to a signed INT128.  The highest order bit of the big-endian
+  // value determines whether or not the highest-order bits of the INT128
+  // return value are off or on.
 
-  // This is a separate routine because of the performance hit caused by the
-  // value % pot operation, which is needed only when certain EC checking is
-  // turned on.
-  if( N > 0 )
+  __int128 retval;
+  if( *psource >= 128 )
     {
-    __gg__rdigits = 0;
-    value *= __gg__power_of_ten(N);
+    retval = -1;
     }
-  else if( N < 0)
+  else
     {
-    // We throwing away the N rightmost digits.  Use __gg__rdigits
-    // to let the calling chain know they were non-zero:
-    __int128 pot = __gg__power_of_ten(-N);
-    if( value % pot)
-      {
-      __gg__rdigits = 1;
-      }
-    else
-      {
-      __gg__rdigits = 0;
-      }
-
-    value /= pot;
+    retval = 0;
     }
-  else
+
+  // move the bytes of psource into retval, flipping them end-to-end
+  unsigned char *dest = PTRCAST(unsigned char, &retval);
+  while(capacity > 0)
     {
-    // N is zero
-    __gg__rdigits = 0;
+    *dest++ = psource[--capacity];
     }
-  return value;
+  return retval;
   }
 
-extern "C"
+static
 __int128
-__gg__scale_by_power_of_ten_2(__int128 value, int N)
+little_endian_to_binary_signed(
+  const unsigned char *psource,
+  int capacity
+)
   {
-  if( N > 0 )
+  // This subroutine takes a little-endian value of "capacity" bytes and
+  // converts it to a signed INT128.  The highest order bit of the little-endian
+  // value determines whether or not the highest-order bits of the INT128
+  // return value are off or on.
+
+  __int128 result;
+
+  // Set all the bits of the result based on the sign of the source:
+  if( psource[capacity-1] >= 128 )
     {
-    value *= __gg__power_of_ten(N);
+    result = -1;
     }
-  else if( N < 0)
+  else
     {
-    value /= __gg__power_of_ten(-N);
+    result = 0;
     }
-  return value;
+
+  // Copy the low-order bytes into place:
+  memcpy(&result, psource, capacity);
+  return result;
   }
 
-static bool
-value_is_too_big(const cblc_field_t *var,
-                 __int128            value,
-                 int                 source_rdigits)
+static
+__int128
+little_endian_to_binary_unsigned(
+  const unsigned char *psource,
+  int capacity
+)
   {
-  // This routine is in support of arithmetic ON SIZE ERROR.  It returns
-  // TRUE if var hasn't enough bytes to hold the decimal representation
-  // of value:
-  bool retval = false;
-
-  if( !(var->attr & intermediate_e) )
-    {
-    if( value < 0 )
-      {
-      value = -value;
-      }
-    if( var->digits )
-      {
-      // I don't know how to describe this calculation.  I came up with the
-      // equation by working a few examples.  For instance, if value is 12345 and
-      // source_rdigits is two, then we are trying to cram 123.45 into 99v99999
-      // and we have a size error.  So, digits is 7, rdigits is 5 and source_rdigits
-      // 2.  That means we compare 12345 with 10^(7 - 5 + 2), which is 12345 versus
-      // 10000, which is too big, which means we have a size error.
-      retval =
-          value >= __gg__power_of_ten( var->digits - var->rdigits + source_rdigits);
-      }
-    else
-      {
-      // var->digits is zero.  We are dealing with a binary-style number that
-      // fills the whole of the value
-      if( !(  var->type == FldNumericBin5
-              || var->type == FldPointer
-              || var->type == FldIndex) )
-        {
-        __gg__abort("value_is_too_big() was given a type it doesn't know about");
-        }
-      if( var->capacity < 16 )
-        {
-        __int128 max_possible = 1;
-        max_possible = max_possible << (var->capacity * 8);
-        retval = value >= max_possible;
-        }
-      }
-    }
+  __int128 result = 0;
 
-  return retval;
+  // Copy the low-order bytes into place:
+  memcpy(&result, psource, capacity);
+  return result;
   }
 
-static void
-binary_to_big_endian(   unsigned char *dest,
-                        int            bytes,
-                        __int128       value
-                    )
+static
+__int128
+big_endian_to_binary_unsigned(
+  const unsigned char *psource,
+  int   capacity
+)
   {
-  if( value < 0 )
-    {
-    memset(dest, 0xFF, bytes);
-    }
-  else
-    {
-    memset(dest, 0x00, bytes);
-    }
+  // This subroutine takes an unsigned big-endian value of "capacity" bytes and
+  // converts it to an INT128.
 
-  dest += bytes-1;
-  while( bytes-- )
-    {
-    *dest-- = (unsigned char) value;
-    value >>= 8;
-    }
-  }
+  __int128 retval = 0 ;
 
-static void
-binary_to_little_endian(   unsigned char *dest,
-                           int            bytes,
-                           __int128       value
-                       )
-  {
-  if( value < 0 )
-    {
-    memset(dest, 0xFF, bytes);
-    }
-  else
+  // move the bytes of psource into retval, flipping them end-to-end
+  unsigned char *dest = PTRCAST(unsigned char, &retval);
+  while(capacity > 0)
     {
-    memset(dest, 0x00, bytes);
+    *dest++ = psource[--capacity];
     }
-  memcpy(dest, &value, bytes);
+  return retval;
   }
 
-static __int128
-int128_to_int128_rounded( cbl_round_t rounded,
-                          __int128    value,
-                          __int128    factor,
-                          __int128    remainder,
-                          int        *compute_error)
+static
+__int128
+get_binary_value_local(  int                 *rdigits,
+                         const cblc_field_t  *resolved_var,
+                         unsigned char       *resolved_location,
+                         size_t               resolved_length)
   {
-  // value is signed, and is scaled to the target
-  GCOB_FP128 fpart = ((GCOB_FP128)remainder) / ((GCOB_FP128)factor);
-  __int128 retval = value;
+  __int128 retval = 0;
 
-  if(rounded == nearest_even_e
-     && fpart != GCOB_FP128_LITERAL (-0.5)
-     && fpart != GCOB_FP128_LITERAL (0.5))
+  switch( resolved_var->type )
     {
-    // "bankers rounding" has been requested.
-    //
-    // Since the fraction is not 0.5, this is an ordinary rounding
-    // problem
-    rounded =  nearest_away_from_zero_e;
-    }
+    case FldLiteralA :
+      fprintf(stderr, "%s(): is trying to handle a FldLiteralA\n", __func__);
+      abort();
+      break;
 
-  switch(rounded)
-    {
-    case truncation_e:
+    case FldGroup :
+    case FldAlphanumeric :
+      // Read the data area as a dirty string:
+      retval = __gg__dirty_to_binary(
+                        PTRCAST(const char, resolved_location),
+                        resolved_var->encoding,
+                        resolved_length,
+                        rdigits );
       break;
 
-    case nearest_away_from_zero_e:
+    case FldNumericDisplay:
       {
-      // This is ordinary rounding, like you learned in grade school
-      // 0.0 through 0.4 becomes 0
-      // 0.5 through 0.9 becomes 1
-      if( value < 0 )
+      *rdigits = resolved_var->rdigits;
+      if( resolved_location[resolved_length-1] == DEGENERATE_HIGH_VALUE )
         {
-        if( fpart <= GCOB_FP128_LITERAL(-0.5) )
-          {
-          retval -= 1;
-          }
+        // This is a degenerate case, which violates the language
+        // specification, but nonetheless seems to be a thing.  By
+        // default, HIGH-VALUE is usually assumed to be 0xFF.  This is
+        // not necessarily true; HIGH-VALUE can be changed by the
+        // SPECIAL-NAMES ALPHABET clause.  Furthermore, by definition,
+        // HIGH-VALUE applies *only* to text literals.  However, there
+        // seems to be code out in the universe that wants to be able
+        // to compare NumericDisplay values that have been set to
+        // HIGH-VALUE.  Consider, for example, code that reads from
+        // a disk file which sets the input field to HIGH-VALUE upon
+        // an end-of-file condition.
+
+        // This code detects that particular condition, and sets the
+        // resulting binary number to the maximum possible positive
+        // value.
+
+        // Turn all the bits on
+        memset( &retval, 0xFF, sizeof(retval) );
+
+        // Make it positive by turning off the highest order bit:
+        (PTRCAST(unsigned char, &retval))[sizeof(retval)-1] = 0x3F;
         }
       else
         {
-        if( fpart >= GCOB_FP128_LITERAL(0.5) )
+        const charmap_t *charmap = __gg__get_charmap(resolved_var->encoding);
+        int stride = charmap->stride();
+        unsigned char *digits;
+        unsigned char *sign_byte_location;
+        int ndigits;
+        if( resolved_var->attr & signable_e )
           {
-          retval += 1;
+          // Pick up the sign byte, and force our value to be positive
+          if(   (resolved_var->attr  & separate_e )
+             && (resolved_var->attr  & leading_e  ) )
+            {
+            // LEADING SEPARATE
+            digits             = resolved_location+stride;
+            sign_byte_location = resolved_location;
+            ndigits = resolved_length - stride;
+            }
+          else if(    (resolved_var->attr & separate_e)
+                  && !(resolved_var->attr & leading_e ) )
+            {
+            // TRAILING SEPARATE
+            digits             = resolved_location;
+            sign_byte_location = resolved_location + resolved_length - stride;
+            ndigits = resolved_length - stride;
+            }
+          else if( (resolved_var->attr & leading_e) )
+            {
+            // LEADING
+            digits             = resolved_location;
+            sign_byte_location = resolved_location;
+            ndigits = resolved_length;
+            }
+          else // if( !(resolved_var->attr & leading_e) )
+            {
+            // TRAILING
+            digits             = resolved_location;
+            sign_byte_location = resolved_location + resolved_length - stride;
+            ndigits = resolved_length;
+            }
+          }
+        else
+          {
+          digits             = resolved_location;
+          sign_byte_location = resolved_location;
+          ndigits = resolved_length;
           }
+        ndigits /= stride;
+        retval = __gg__numeric_display_to_binary(sign_byte_location,
+                                                 digits,
+                                                 ndigits,
+                                                 resolved_var->encoding);
         }
       break;
       }
 
-    case away_from_zero_e:
-      {
-      // zero stays zero, otherwise head for the next number away from zero
-      if( value < 0 )
+    case FldNumericEdited :
+      retval = edited_to_binary(resolved_var,
+                                PTRCAST(char, resolved_location),
+                                resolved_length,
+                                rdigits);
+      break;
+
+    case FldNumericBinary :
+      if( resolved_var->attr & signable_e)
         {
-        if( fpart != 0 )
-          {
-          retval -= 1;
-          }
+        retval = big_endian_to_binary_signed(
+                        PTRCAST(const unsigned char, resolved_location),
+                        resolved_length);
         }
       else
         {
-        if( fpart != 0 )
-          {
-          retval += 1;
-          }
+        retval = big_endian_to_binary_unsigned(
+                        PTRCAST(const unsigned char, resolved_location),
+                        resolved_length);
         }
+      *rdigits = resolved_var->rdigits;
       break;
-      }
 
-    case nearest_toward_zero_e:
+    case FldLiteralN:
       {
-      // 0.0 through 0.5 becomes 0
-      // 0.6 through 0.9 becomes 1
-      if( value < 0 )
+      if( resolved_var->attr & signable_e)
         {
-        if( fpart < GCOB_FP128_LITERAL(-0.5) )
-          {
-          retval -= 1;
-          }
+        retval = little_endian_to_binary_signed(resolved_var->data,
+                                                resolved_var->capacity);
         }
       else
         {
-        if( fpart > GCOB_FP128_LITERAL(0.5) )
-          {
-          retval += 1;
-          }
+        retval = little_endian_to_binary_unsigned(resolved_var->data,
+                                                  resolved_var->capacity);
         }
+      *rdigits = resolved_var->rdigits;
       break;
       }
 
-    case toward_greater_e:
-      {
-      if( value > 0 )
+    case FldNumericBin5:
+    case FldIndex:
+    case FldPointer:
+      if( resolved_var->attr & signable_e)
         {
-        if( fpart != 0 )
-          {
-          retval += 1;
-          }
+        retval = little_endian_to_binary_signed(
+                      PTRCAST(const unsigned char, resolved_location),
+                      resolved_length);
         }
-      break;
-      }
-
-    case toward_lesser_e:
-      {
-      if( value < 0 )
+      else
         {
-        if(fpart != 0)
-          {
-          retval -= 1;
-          }
+        retval = little_endian_to_binary_unsigned(
+                      PTRCAST(const unsigned char, resolved_location),
+                      resolved_length);
         }
+      *rdigits = resolved_var->rdigits;
       break;
-      }
 
-    case nearest_even_e:
+    case FldPacked:
       {
-      // This is "banker's rounding"
-      // 3.4 -> 3.0
-      // 3.5 -> 4.0
-      // 3.6 -> 4.0
-
-      // 4.4 -> 4.0
-      // 4.5 -> 4.0
-      // 4.6 -> 5.0
+      *rdigits = resolved_var->rdigits;
+      retval = __gg__packed_to_binary(resolved_location,
+                                      resolved_length);
+      break;
+      }
+    }
 
-     // We know that the fractional part is 0.5 or -0.5, and we know that
-     // we want 3 to become 4 and for 4 to stay 4.
+  if( resolved_var->attr & scaled_e )
+    {
+    // Here's where we handle a P-scaled number.
 
-    if( value < 0 )
+    if( resolved_var->rdigits >= 0)
       {
-      if( retval & 1 )
-        {
-        retval -= 1;
-        }
+      // We might be dealing with a source with a PICTURE string of
+      // PPPPPP999, which means retval is a three-digit number
+      // and resolved_var->rdigits is +6.  That means we need to divide retval
+      // by 10**9, and we need to make rdigits 9
+      *rdigits = resolved_var->digits + resolved_var->rdigits;
       }
     else
       {
-      if( retval & 1 )
-        {
-        retval += 1;
-        }
-      }
-      break;
+      // We have a source with a PIC string like 999PPPPPP, which is
+      // a capacity of 3 and a resolved_var->rdigits of -6.  We need to multiply
+      // retval by +6, and make rdigits zero:
+      retval *= __gg__power_of_ten( -resolved_var->rdigits );
+      *rdigits = 0;
       }
+    }
 
-    case prohibited_e:
-      {
-      if( fpart != 0 )
-        {
-        *compute_error |= compute_error_truncate;
-        }
+  return retval;
+  }
 
-      break;
+static uint32_t
+get_init_value(cblc_field_t *field)
+  {
+  uint32_t retval = 0;
+  cbl_figconst_t figconst = (cbl_figconst_t)(field->attr & FIGCONST_MASK);
+  if( figconst )
+    {
+    switch(figconst)
+      {
+      case normal_value_e :
+        // This is not possible, it says here in the fine print.
+        abort();
+        break;
+      case low_value_e    :
+        retval = __gg__low_value_character;
+        break;
+      case zero_value_e   :
+        retval = ascii_zero;
+        break;
+      case space_value_e  :
+        retval = ascii_space;
+        break;
+      case quote_value_e  :
+        retval = __gg__quote_character;
+        break;
+      case high_value_e   :
+        retval = __gg__high_value_character;
+        break;
+      case null_value_e:
+        retval = 0x00;
+        break;
       }
-
-    default:
-      abort();
-      break;
+    }
+  else
+    {
+    int rdigits;
+    retval = get_binary_value_local(&rdigits,
+                                    field,
+                                    field->data,
+                                    field->capacity
+                                    );
     }
   return retval;
   }
 
-static __int128
-f128_to_i128_rounded( cbl_round_t rounded,
-                    GCOB_FP128   value,
-                    int        *compute_error)
+extern "C"
+void __gg__initialization_values( uint32_t wsclear,
+                                  cblc_field_t *working_init,
+                                  cblc_field_t *local_init)
   {
-  // value is signed, and is scaled to the target
-  GCOB_FP128 ipart;
-  GCOB_FP128 fpart = FP128_FUNC(modf)(value, &ipart);
-  __int128 retval = (__int128)ipart;
+  __gg__wsclear = wsclear;
+  __gg__working_init = NOT_A_CHARACTER;
+  __gg__local_init   = NOT_A_CHARACTER;
 
-  if(rounded == nearest_even_e
-     && fpart != GCOB_FP128_LITERAL (-0.5)
-     && fpart != GCOB_FP128_LITERAL (0.5))
+  if( working_init )
     {
-    // "bankers rounding" has been requested.
-    //
-    // Since the fraction is not 0.5, this is an ordinary rounding
-    // problem
-    rounded =  nearest_away_from_zero_e;
+    __gg__working_init = get_init_value(working_init);
+    }
+  if( local_init )
+    {
+    __gg__local_init = get_init_value(local_init);
     }
+  program_states.back().rt_working_init = __gg__working_init;
+  program_states.back().rt_local_init   = __gg__local_init;
+  }
 
-  switch(rounded)
+extern "C"
+void
+__gg__init_program_state(cbl_encoding_t display_encoding,
+                         cbl_encoding_t national_encoding)
+  {
+  // This routine gets called at DATA DIVISION time.
+
+  __gg__display_encoding  = display_encoding;
+  __gg__national_encoding = national_encoding;
+
+  // We need to make sure that the program_states vector has at least one
+  // entry in it.  This happens when we are the very first PROGRAM-ID called
+  // in this module.
+  if( program_states.empty() )
     {
-    case truncation_e:
-      break;
+    initialize_program_state();
+    }
+  }
 
-    case nearest_away_from_zero_e:
-      {
-      // This is ordinary rounding, like you learned in grade school
-      // 0.0 through 0.4 becomes 0
-      // 0.5 through 0.9 becomes 1
-      if( value < 0 )
-        {
-        if( fpart <= GCOB_FP128_LITERAL (-0.5) )
-          {
-          retval -= 1;
-          }
-        }
-      else
-        {
-        if( fpart >= GCOB_FP128_LITERAL (0.5) )
-          {
-          retval += 1;
-          }
-        }
-      break;
-      }
+static int
+var_is_refmod( const cblc_field_t *var )
+  {
+  return (var->attr & refmod_e) != 0;
+  }
 
-    case away_from_zero_e:
+extern "C"
+__int128
+__gg__power_of_ten(int n)
+  {
+  // 2** 64 = 1.8E19
+  // 2**128 = 3.4E38
+  __int128 retval = 1;
+  static const int MAX_POWER = 19 ;
+  static const __int128 pos[MAX_POWER+1] =
+    {
+    1ULL,                       // 00
+    10ULL,                      // 01
+    100ULL,                     // 02
+    1000ULL,                    // 03
+    10000ULL,                   // 04
+    100000ULL,                  // 05
+    1000000ULL,                 // 06
+    10000000ULL,                // 07
+    100000000ULL,               // 08
+    1000000000ULL,              // 09
+    10000000000ULL,             // 10
+    100000000000ULL,            // 11
+    1000000000000ULL,           // 12
+    10000000000000ULL,          // 13
+    100000000000000ULL,         // 14
+    1000000000000000ULL,        // 15
+    10000000000000000ULL,       // 16
+    100000000000000000ULL,      // 17
+    1000000000000000000ULL,     // 18
+    10000000000000000000ULL,    // 19
+    };
+  if( n < 0 || n>MAX_POWER*2)     // The most we can handle is 10**38
+    {
+    fprintf(stderr,
+            "Trying to raise 10 to %d as an int128, which we can't do.\n",
+            n);
+    fprintf(stderr, "The problem is in %s %s:%d.\n", __func__, __FILE__, __LINE__);
+    abort();
+    }
+  if( n <= MAX_POWER )
+    {
+    // Up to 10**18 we do directly:
+    retval = pos[n];
+    }
+  else
+    {
+    // 19 through 38:
+    retval = pos[n/2];
+    retval *= retval;
+    if( n & 1 )
       {
-      // zero stays zero, otherwise head for the next number away from zero
-      if( value < 0 )
-        {
-        if( fpart != 0 )
-          {
-          retval -= 1;
-          }
-        }
-      else
-        {
-        if( fpart != 0 )
-          {
-          retval += 1;
-          }
-        }
-      break;
+      retval *= 10;
       }
+    }
+  return retval;
+  }
 
-    case nearest_toward_zero_e:
-      {
-      // 0.0 through 0.5 becomes 0
-      // 0.6 through 0.9 becomes 1
-      if( value < 0 )
-        {
-        if( fpart < GCOB_FP128_LITERAL (-0.5) )
-          {
-          retval -= 1;
-          }
-        }
-      else
-        {
-        if( fpart > GCOB_FP128_LITERAL (0.5) )
-          {
-          retval += 1;
-          }
+extern "C"
+__int128
+__gg__scale_by_power_of_ten_1(__int128 value, int N)
+  {
+  // This routine is called when the result of the scaling is not allowed to
+  // have non-zero rdigits.  __gg__rdigits is set to 1 when the result is
+  // in the bad zone.  The ultimate caller needs to examine __gg__rdigits to
+  // decide what to do about it.
+
+  // This is a separate routine because of the performance hit caused by the
+  // value % pot operation, which is needed only when certain EC checking is
+  // turned on.
+  if( N > 0 )
+    {
+    __gg__rdigits = 0;
+    value *= __gg__power_of_ten(N);
+    }
+  else if( N < 0)
+    {
+    // We throwing away the N rightmost digits.  Use __gg__rdigits
+    // to let the calling chain know they were non-zero:
+    __int128 pot = __gg__power_of_ten(-N);
+    if( value % pot)
+      {
+      __gg__rdigits = 1;
+      }
+    else
+      {
+      __gg__rdigits = 0;
+      }
+
+    value /= pot;
+    }
+  else
+    {
+    // N is zero
+    __gg__rdigits = 0;
+    }
+  return value;
+  }
+
+extern "C"
+__int128
+__gg__scale_by_power_of_ten_2(__int128 value, int N)
+  {
+  if( N > 0 )
+    {
+    value *= __gg__power_of_ten(N);
+    }
+  else if( N < 0)
+    {
+    value /= __gg__power_of_ten(-N);
+    }
+  return value;
+  }
+
+static bool
+value_is_too_big(const cblc_field_t *var,
+                 __int128            value,
+                 int                 source_rdigits)
+  {
+  // This routine is in support of arithmetic ON SIZE ERROR.  It returns
+  // TRUE if var hasn't enough bytes to hold the decimal representation
+  // of value:
+  bool retval = false;
+
+  if( !(var->attr & intermediate_e) )
+    {
+    if( value < 0 )
+      {
+      value = -value;
+      }
+    if( var->digits )
+      {
+      // I don't know how to describe this calculation.  I came up with the
+      // equation by working a few examples.  For instance, if value is 12345 and
+      // source_rdigits is two, then we are trying to cram 123.45 into 99v99999
+      // and we have a size error.  So, digits is 7, rdigits is 5 and source_rdigits
+      // 2.  That means we compare 12345 with 10^(7 - 5 + 2), which is 12345 versus
+      // 10000, which is too big, which means we have a size error.
+      retval =
+          value >= __gg__power_of_ten( var->digits - var->rdigits + source_rdigits);
+      }
+    else
+      {
+      // var->digits is zero.  We are dealing with a binary-style number that
+      // fills the whole of the value
+      if( !(  var->type == FldNumericBin5
+              || var->type == FldPointer
+              || var->type == FldIndex) )
+        {
+        __gg__abort("value_is_too_big() was given a type it doesn't know about");
+        }
+      if( var->capacity < 16 )
+        {
+        __int128 max_possible = 1;
+        max_possible = max_possible << (var->capacity * 8);
+        retval = value >= max_possible;
+        }
+      }
+    }
+
+  return retval;
+  }
+
+static void
+binary_to_big_endian(   unsigned char *dest,
+                        int            bytes,
+                        __int128       value
+                    )
+  {
+  if( value < 0 )
+    {
+    memset(dest, 0xFF, bytes);
+    }
+  else
+    {
+    memset(dest, 0x00, bytes);
+    }
+
+  dest += bytes-1;
+  while( bytes-- )
+    {
+    *dest-- = (unsigned char) value;
+    value >>= 8;
+    }
+  }
+
+static void
+binary_to_little_endian(   unsigned char *dest,
+                           int            bytes,
+                           __int128       value
+                       )
+  {
+  if( value < 0 )
+    {
+    memset(dest, 0xFF, bytes);
+    }
+  else
+    {
+    memset(dest, 0x00, bytes);
+    }
+  memcpy(dest, &value, bytes);
+  }
+
+static __int128
+int128_to_int128_rounded( cbl_round_t rounded,
+                          __int128    value,
+                          __int128    factor,
+                          __int128    remainder,
+                          int        *compute_error)
+  {
+  // value is signed, and is scaled to the target
+  GCOB_FP128 fpart = ((GCOB_FP128)remainder) / ((GCOB_FP128)factor);
+  __int128 retval = value;
+
+  if(rounded == nearest_even_e
+     && fpart != GCOB_FP128_LITERAL (-0.5)
+     && fpart != GCOB_FP128_LITERAL (0.5))
+    {
+    // "bankers rounding" has been requested.
+    //
+    // Since the fraction is not 0.5, this is an ordinary rounding
+    // problem
+    rounded =  nearest_away_from_zero_e;
+    }
+
+  switch(rounded)
+    {
+    case truncation_e:
+      break;
+
+    case nearest_away_from_zero_e:
+      {
+      // This is ordinary rounding, like you learned in grade school
+      // 0.0 through 0.4 becomes 0
+      // 0.5 through 0.9 becomes 1
+      if( value < 0 )
+        {
+        if( fpart <= GCOB_FP128_LITERAL(-0.5) )
+          {
+          retval -= 1;
+          }
+        }
+      else
+        {
+        if( fpart >= GCOB_FP128_LITERAL(0.5) )
+          {
+          retval += 1;
+          }
+        }
+      break;
+      }
+
+    case away_from_zero_e:
+      {
+      // zero stays zero, otherwise head for the next number away from zero
+      if( value < 0 )
+        {
+        if( fpart != 0 )
+          {
+          retval -= 1;
+          }
+        }
+      else
+        {
+        if( fpart != 0 )
+          {
+          retval += 1;
+          }
+        }
+      break;
+      }
+
+    case nearest_toward_zero_e:
+      {
+      // 0.0 through 0.5 becomes 0
+      // 0.6 through 0.9 becomes 1
+      if( value < 0 )
+        {
+        if( fpart < GCOB_FP128_LITERAL(-0.5) )
+          {
+          retval -= 1;
+          }
+        }
+      else
+        {
+        if( fpart > GCOB_FP128_LITERAL(0.5) )
+          {
+          retval += 1;
+          }
         }
       break;
       }
@@ -1263,22 +1549,183 @@ f128_to_i128_rounded( cbl_round_t rounded,
   return retval;
   }
 
-static void
-int128_to_field(cblc_field_t   *var,
-                unsigned char  *location,
-                size_t          length,
-                __int128        value,
-                int             source_rdigits,
-                enum cbl_round_t  rounded,
-                int            *compute_error)
+static __int128
+f128_to_i128_rounded( cbl_round_t rounded,
+                    GCOB_FP128   value,
+                    int        *compute_error)
   {
-  // This routine takes a numerical value, and scales and converts it to the
-  // target field type.
-
-  // It operates in the source codeset space, and converts the final result
-  // to the native codeset space
+  // value is signed, and is scaled to the target
+  GCOB_FP128 ipart;
+  GCOB_FP128 fpart = FP128_FUNC(modf)(value, &ipart);
+  __int128 retval = (__int128)ipart;
 
-  switch( var->type )
+  if(rounded == nearest_even_e
+     && fpart != GCOB_FP128_LITERAL (-0.5)
+     && fpart != GCOB_FP128_LITERAL (0.5))
+    {
+    // "bankers rounding" has been requested.
+    //
+    // Since the fraction is not 0.5, this is an ordinary rounding
+    // problem
+    rounded =  nearest_away_from_zero_e;
+    }
+
+  switch(rounded)
+    {
+    case truncation_e:
+      break;
+
+    case nearest_away_from_zero_e:
+      {
+      // This is ordinary rounding, like you learned in grade school
+      // 0.0 through 0.4 becomes 0
+      // 0.5 through 0.9 becomes 1
+      if( value < 0 )
+        {
+        if( fpart <= GCOB_FP128_LITERAL (-0.5) )
+          {
+          retval -= 1;
+          }
+        }
+      else
+        {
+        if( fpart >= GCOB_FP128_LITERAL (0.5) )
+          {
+          retval += 1;
+          }
+        }
+      break;
+      }
+
+    case away_from_zero_e:
+      {
+      // zero stays zero, otherwise head for the next number away from zero
+      if( value < 0 )
+        {
+        if( fpart != 0 )
+          {
+          retval -= 1;
+          }
+        }
+      else
+        {
+        if( fpart != 0 )
+          {
+          retval += 1;
+          }
+        }
+      break;
+      }
+
+    case nearest_toward_zero_e:
+      {
+      // 0.0 through 0.5 becomes 0
+      // 0.6 through 0.9 becomes 1
+      if( value < 0 )
+        {
+        if( fpart < GCOB_FP128_LITERAL (-0.5) )
+          {
+          retval -= 1;
+          }
+        }
+      else
+        {
+        if( fpart > GCOB_FP128_LITERAL (0.5) )
+          {
+          retval += 1;
+          }
+        }
+      break;
+      }
+
+    case toward_greater_e:
+      {
+      if( value > 0 )
+        {
+        if( fpart != 0 )
+          {
+          retval += 1;
+          }
+        }
+      break;
+      }
+
+    case toward_lesser_e:
+      {
+      if( value < 0 )
+        {
+        if(fpart != 0)
+          {
+          retval -= 1;
+          }
+        }
+      break;
+      }
+
+    case nearest_even_e:
+      {
+      // This is "banker's rounding"
+      // 3.4 -> 3.0
+      // 3.5 -> 4.0
+      // 3.6 -> 4.0
+
+      // 4.4 -> 4.0
+      // 4.5 -> 4.0
+      // 4.6 -> 5.0
+
+     // We know that the fractional part is 0.5 or -0.5, and we know that
+     // we want 3 to become 4 and for 4 to stay 4.
+
+    if( value < 0 )
+      {
+      if( retval & 1 )
+        {
+        retval -= 1;
+        }
+      }
+    else
+      {
+      if( retval & 1 )
+        {
+        retval += 1;
+        }
+      }
+      break;
+      }
+
+    case prohibited_e:
+      {
+      if( fpart != 0 )
+        {
+        *compute_error |= compute_error_truncate;
+        }
+
+      break;
+      }
+
+    default:
+      abort();
+      break;
+    }
+  return retval;
+  }
+
+static void
+int128_to_field(cblc_field_t   *var,
+                unsigned char  *location,
+                size_t          length,
+                __int128        value,
+                int             source_rdigits,
+                enum cbl_round_t  rounded,
+                int            *compute_error)
+  {
+  // This routine takes a numerical value, and scales and converts it to the
+  // target field type.
+
+  // It operates in the source codeset space, and converts the final result
+  // to the native codeset space
+
+  switch( var->type )
     {
     case FldFloat:
       {
@@ -1473,54 +1920,61 @@ int128_to_field(cblc_field_t   *var,
           {
           case FldGroup:
           case FldAlphanumeric:
+            {
             // This is sort of a Hail Mary play.  We aren't supposed to do this
             // conversion if rdigits is non-zero.  But we shouldn't have gotten
-            // here if rdigits is non-zero.  So, we'll just go with the flow.
+            // here if rdigits is non-zero.  But we're here, so we'll do the
+            // best we can in case somebody came up with a dialect that allows
+            // the attempt.
 
             // Note that sending a signed value to an alphanumeric strips off
             // any plus or minus signs.
             memset(location, 0, length);
+            const charmap_t *charmap = __gg__get_charmap(var->encoding);
             size_error = __gg__binary_to_string_encoded(
                                            PTRCAST(char, location),
                                            length > MAX_FIXED_POINT_DIGITS
                                                     ? MAX_FIXED_POINT_DIGITS
-                                                    : length,
+                                                    : length/charmap->stride(),
                                            value,
                                            var->encoding);
             break;
+            }
 
           case FldNumericDisplay:
             if( var->attr & signable_e )
               {
               charmap_t *charmap = __gg__get_charmap(var->encoding);
+              int stride = charmap->stride();
 
               // Things get exciting when a numeric-display value is signable
 
               if( var->attr & separate_e )
                 {
                 // Whether positive or negative, a sign there will be:
-                char sign_ch = is_negative ?
+                cbl_char_t sign_ch = is_negative ?
                                 charmap->mapped_character(ascii_minus)
                               : charmap->mapped_character(ascii_plus)  ;
                 if( var->attr & leading_e )
                   {
                   // The sign character goes into the first location
                   size_error =
-                    __gg__binary_to_string_encoded(PTRCAST(char, location+1),
-                                                    length-1,
-                                                    value,
-                                                    var->encoding);
-                  location[0] = sign_ch;
+                     __gg__binary_to_string_encoded(
+                                                PTRCAST(char, location+stride),
+                                                var->digits,
+                                                value,
+                                                var->encoding);
+                  charmap->putch(sign_ch, location, (size_t)0);
                   }
                 else
                   {
                   // The sign character goes into the last location
                   size_error =
                     __gg__binary_to_string_encoded(PTRCAST(char, location),
-                                                    length-1,
+                                                    var->digits,
                                                     value,
                                                     var->encoding);
-                  location[length-1] = sign_ch;
+                  charmap->putch(sign_ch, location, length-stride);
                   }
                 }
               else
@@ -1534,7 +1988,7 @@ int128_to_field(cblc_field_t   *var,
                 // First, convert the binary value to the correct-length string
                 size_error =
                   __gg__binary_to_string_encoded(PTRCAST(char, location),
-                                                  length,
+                                                  var->digits,
                                                   value,
                                                   var->encoding);
 
@@ -1546,9 +2000,11 @@ int128_to_field(cblc_field_t   *var,
                   // If all of the digits are zero, then the result is zero, and
                   // we have to kill the is_negative flag:
                   is_negative = false;
-                  for(size_t i=0; i<length; i++)
+                  size_t index = 0;
+                  while(index<length)
                     {
-                    if( location[i] != charmap->mapped_character(ascii_0) )
+                    if( charmap->getch(location, &index)
+                                        != charmap->mapped_character(ascii_0) )
                       {
                       is_negative = true;
                       break;
@@ -1557,10 +2013,13 @@ int128_to_field(cblc_field_t   *var,
                   }
 
                 unsigned char *sign_location =
-                  var->attr & leading_e ? location : location + length - 1;
-
-                *sign_location = charmap->set_digit_negative(*sign_location,
-                                                             is_negative);
+                  var->attr & leading_e ? location
+                                        : location + length - stride;
+                cbl_char_t sign_digit = charmap->getch(sign_location,
+                                                       (size_t)0);
+                sign_digit = charmap->set_digit_negative(sign_digit,
+                                                         is_negative);
+                charmap->putch(sign_digit, sign_location, (size_t)0);
                 }
               }
             else
@@ -1568,7 +2027,7 @@ int128_to_field(cblc_field_t   *var,
               // It's a simple positive number
               size_error = __gg__binary_to_string_encoded(
                                                     PTRCAST(char, location),
-                                                    length,
+                                                    var->digits,
                                                     value,
                                                     var->encoding);
               }
@@ -1580,7 +2039,7 @@ int128_to_field(cblc_field_t   *var,
             charmap_t *charmap = __gg__get_charmap(var->encoding);
             if( value == 0 && (var->attr & blank_zero_e) )
               {
-              memset(location, charmap->mapped_character(ascii_space), length);
+              charmap->memset(location, charmap->mapped_character(ascii_space), length);
               }
             else
               {
@@ -1605,7 +2064,7 @@ int128_to_field(cblc_field_t   *var,
                                      DEFAULT_SOURCE_ENCODING,
                                      var->encoding,
                                      PTRCAST(char, location),
-                                     var->capacity,
+                                     var->capacity/charmap->stride(),
                                      &outlength);
               memcpy(location, converted, outlength);
               }
@@ -1701,416 +2160,38 @@ int128_to_field(cblc_field_t   *var,
                 result might have a bad high-place digit for a value with an
                 odd number of places. */
 
-            __int128 mask = __gg__power_of_ten(digits);
-            size_error = !!(value / mask);
-            value %= mask;
-
-            // We are now set up to do the conversion:
-            __gg__binary_to_packed(location, digits, value);
-
-            // We can put the sign nybble into place at this point.  Note that
-            // for COMP-6 numbers the sign_nybble value is zero, so the next
-            // operation is harmless.
-            location[length -1] |= sign_nybble;
-
-            // And we're done.
-            break;
-            }
-
-          default:
-            fprintf(stderr, "can't convert in %s() %s %d\n",
-                    __func__,
-                    var->name,
-                    var->type);
-            abort();
-            break;
-          }
-        if( compute_error )
-          {
-          *compute_error |= size_error ? compute_error_truncate : 0;
-          }
-        }
-      }
-      break;
-    }
-  }
-
-static __int128
-edited_to_binary( const cblc_field_t *field,
-                  char *ps_,
-                  int length,
-                  int *rdigits)
-  {
-  charmap_t *charmap = __gg__get_charmap(field->encoding);
-
-  const unsigned char *ps = const_cast<const unsigned char *>(PTRCAST(unsigned char, ps_));
-  // This routine is used for converting NumericEdited strings to
-  // binary.
-
-  // Numeric edited strings can have all kinds of crap in them: spaces,
-  // slashes, dollar signs...you name it.  It might have a minus sign at
-  // the beginning or end, or it might have CR or DB at the end.
-
-  // We are going to look for a minus sign, D (or d) and use that to flag the
-  // result as negative.  We are going to look for a decimal point and count up
-  // the numerical digits to the right of it.  And we are going to pretend
-  // that nothing else matters.
-
-  int hyphen = 0;
-  *rdigits = 0;
-
-  // index into the ps string
-  int index = 0;
-
-  // Create a delta_r for counting digits to the right of
-  // any decimal point.  If and when we encounter a decimal point,
-  // we'll set this to one, otherwise it'll stay zero.
-  int delta_r = 0;
-
-  __int128 result = 0;
-
-  // We need to check the last two characters.  If CR or DB, then the result
-  // is negative:
-  if( length >= 2)
-    {
-    if(         ((ps[length-2]&0xFF) == charmap->mapped_character(ascii_D)
-              || (ps[length-2]&0xFF) == charmap->mapped_character(ascii_d))
-            &&  ((ps[length-1]&0xFF) == charmap->mapped_character(ascii_B)
-              || (ps[length-1]&0xFF) == charmap->mapped_character(ascii_b)) )
-      {
-      hyphen = 1;
-      }
-    else if(    ((ps[length-2]&0xFF) == charmap->mapped_character(ascii_C)
-              || (ps[length-2]&0xFF) == charmap->mapped_character(ascii_c))
-            &&  ((ps[length-1]&0xFF) == charmap->mapped_character(ascii_R)
-              || (ps[length-1]&0xFF) == charmap->mapped_character(ascii_r)) )
-      {
-      hyphen = 1;
-      }
-    }
-
-  while( index < length )
-    {
-    unsigned char ch = ps[index++] & 0xFF;
-    if( ch == charmap->mapped_character(__gg__decimal_point) )
-      {
-      delta_r = 1;
-      continue;
-      }
-    if( ch == charmap->mapped_character(ascii_minus)  )
-      {
-      hyphen = 1;
-      continue;
-      }
-
-    if(  charmap->mapped_character(ascii_0) <= ch
-      && ch <= charmap->mapped_character(ascii_9) )
-      {
-      result *= 10;
-      // In both EBCDIC and ASCII, this works:
-      result += ch & 0x0F ;
-      *rdigits += delta_r ;
-      continue;
-      }
-    }
-
-  if( hyphen )
-    {
-    result = -result;
-    }
-  return result;
-  }
-
-static
-__int128
-big_endian_to_binary_signed(
-  const unsigned char *psource,
-  int   capacity
-)
-  {
-  // This subroutine takes a big-endian value of "capacity" bytes and
-  // converts it to a signed INT128.  The highest order bit of the big-endian
-  // value determines whether or not the highest-order bits of the INT128
-  // return value are off or on.
-
-  __int128 retval;
-  if( *psource >= 128 )
-    {
-    retval = -1;
-    }
-  else
-    {
-    retval = 0;
-    }
-
-  // move the bytes of psource into retval, flipping them end-to-end
-  unsigned char *dest = PTRCAST(unsigned char, &retval);
-  while(capacity > 0)
-    {
-    *dest++ = psource[--capacity];
-    }
-  return retval;
-  }
-
-static
-__int128
-little_endian_to_binary_signed(
-  const unsigned char *psource,
-  int capacity
-)
-  {
-  // This subroutine takes a little-endian value of "capacity" bytes and
-  // converts it to a signed INT128.  The highest order bit of the little-endian
-  // value determines whether or not the highest-order bits of the INT128
-  // return value are off or on.
-
-  __int128 result;
-
-  // Set all the bits of the result based on the sign of the source:
-  if( psource[capacity-1] >= 128 )
-    {
-    result = -1;
-    }
-  else
-    {
-    result = 0;
-    }
-
-  // Copy the low-order bytes into place:
-  memcpy(&result, psource, capacity);
-  return result;
-  }
-
-static
-__int128
-little_endian_to_binary_unsigned(
-  const unsigned char *psource,
-  int capacity
-)
-  {
-  __int128 result = 0;
-
-  // Copy the low-order bytes into place:
-  memcpy(&result, psource, capacity);
-  return result;
-  }
-
-static
-__int128
-big_endian_to_binary_unsigned(
-  const unsigned char *psource,
-  int   capacity
-)
-  {
-  // This subroutine takes an unsigned big-endian value of "capacity" bytes and
-  // converts it to an INT128.
-
-  __int128 retval = 0 ;
-
-  // move the bytes of psource into retval, flipping them end-to-end
-  unsigned char *dest = PTRCAST(unsigned char, &retval);
-  while(capacity > 0)
-    {
-    *dest++ = psource[--capacity];
-    }
-  return retval;
-  }
-
-static
-__int128
-get_binary_value_local(  int                 *rdigits,
-                         const cblc_field_t  *resolved_var,
-                         unsigned char       *resolved_location,
-                         size_t               resolved_length)
-  {
-  __int128 retval = 0;
-
-  switch( resolved_var->type )
-    {
-    case FldLiteralA :
-      fprintf(stderr, "%s(): is trying to handle a FldLiteralA\n", __func__);
-      abort();
-      break;
-
-    case FldGroup :
-    case FldAlphanumeric :
-      // Read the data area as a dirty string:
-      retval = __gg__dirty_to_binary(
-                        PTRCAST(const char, resolved_location),
-                        resolved_var->encoding,
-                        resolved_length,
-                        rdigits );
-      break;
-
-    case FldNumericDisplay:
-      {
-      *rdigits = resolved_var->rdigits;
-      if( resolved_location[resolved_length-1] == DEGENERATE_HIGH_VALUE )
-        {
-        // This is a degenerate case, which violates the language
-        // specification, but nonetheless seems to be a thing.  By
-        // default, HIGH-VALUE is usually assumed to be 0xFF.  This is
-        // not necessarily true; HIGH-VALUE can be changed by the
-        // SPECIAL-NAMES ALPHABET clause.  Furthermore, by definition,
-        // HIGH-VALUE applies *only* to text literals.  However, there
-        // seems to be code out in the universe that wants to be able
-        // to compare NumericDisplay values that have been set to
-        // HIGH-VALUE.  Consider, for example, code that reads from
-        // a disk file which sets the input field to HIGH-VALUE upon
-        // an end-of-file condition.
-
-        // This code detects that particular condition, and sets the
-        // resulting binary number to the maximum possible positive
-        // value.
-
-        // Turn all the bits on
-        memset( &retval, 0xFF, sizeof(retval) );
-
-        // Make it positive by turning off the highest order bit:
-        (PTRCAST(unsigned char, &retval))[sizeof(retval)-1] = 0x3F;
-        }
-      else
-        {
-        unsigned char *digits;
-        unsigned char *sign_byte_location;
-        int ndigits;
-        if( resolved_var->attr & signable_e )
-          {
-          // Pick up the sign byte, and force our value to be positive
-          if(   (resolved_var->attr  & separate_e )
-             && (resolved_var->attr  & leading_e  ) )
-            {
-            // LEADING SEPARATE
-            digits             = resolved_location+1;
-            sign_byte_location = resolved_location;
-            ndigits = resolved_length - 1;
-            }
-          else if(    (resolved_var->attr & separate_e)
-                  && !(resolved_var->attr & leading_e ) )
-            {
-            // TRAILING SEPARATE
-            digits             = resolved_location;
-            sign_byte_location = resolved_location + resolved_length - 1;
-            ndigits = resolved_length - 1;
-            }
-          else if( (resolved_var->attr & leading_e) )
-            {
-            // LEADING
-            digits             = resolved_location;
-            sign_byte_location = resolved_location;
-            ndigits = resolved_length;
-            }
-          else // if( !(resolved_var->attr & leading_e) )
-            {
-            // TRAILING
-            digits             = resolved_location;
-            sign_byte_location = resolved_location + resolved_length - 1;
-            ndigits = resolved_length;
-            }
-          }
-        else
-          {
-          digits             = resolved_location;
-          sign_byte_location = resolved_location;
-          ndigits = resolved_length;
-          }
-        retval = __gg__numeric_display_to_binary(sign_byte_location,
-                                                 digits,
-                                                 ndigits,
-                                                 resolved_var->encoding);
-        }
-      break;
-      }
-
-    case FldNumericEdited :
-      retval = edited_to_binary(resolved_var,
-                                PTRCAST(char, resolved_location),
-                                resolved_length,
-                                rdigits);
-      break;
-
-    case FldNumericBinary :
-      if( resolved_var->attr & signable_e)
-        {
-        retval = big_endian_to_binary_signed(
-                        PTRCAST(const unsigned char, resolved_location),
-                        resolved_length);
-        }
-      else
-        {
-        retval = big_endian_to_binary_unsigned(
-                        PTRCAST(const unsigned char, resolved_location),
-                        resolved_length);
-        }
-      *rdigits = resolved_var->rdigits;
-      break;
-
-    case FldLiteralN:
-      {
-      if( resolved_var->attr & signable_e)
-        {
-        retval = little_endian_to_binary_signed(resolved_var->data,
-                                                resolved_var->capacity);
-        }
-      else
-        {
-        retval = little_endian_to_binary_unsigned(resolved_var->data,
-                                                  resolved_var->capacity);
-        }
-      *rdigits = resolved_var->rdigits;
-      break;
-      }
-
-    case FldNumericBin5:
-    case FldIndex:
-    case FldPointer:
-      if( resolved_var->attr & signable_e)
-        {
-        retval = little_endian_to_binary_signed(
-                      PTRCAST(const unsigned char, resolved_location),
-                      resolved_length);
-        }
-      else
-        {
-        retval = little_endian_to_binary_unsigned(
-                      PTRCAST(const unsigned char, resolved_location),
-                      resolved_length);
-        }
-      *rdigits = resolved_var->rdigits;
-      break;
-
-    case FldPacked:
-      {
-      *rdigits = resolved_var->rdigits;
-      retval = __gg__packed_to_binary(resolved_location,
-                                      resolved_length);
-      break;
-      }
-    }
+            __int128 mask = __gg__power_of_ten(digits);
+            size_error = !!(value / mask);
+            value %= mask;
 
-  if( resolved_var->attr & scaled_e )
-    {
-    // Here's where we handle a P-scaled number.
+            // We are now set up to do the conversion:
+            __gg__binary_to_packed(location, digits, value);
 
-    if( resolved_var->rdigits >= 0)
-      {
-      // We might be dealing with a source with a PICTURE string of
-      // PPPPPP999, which means retval is a three-digit number
-      // and resolved_var->rdigits is +6.  That means we need to divide retval
-      // by 10**9, and we need to make rdigits 9
-      *rdigits = resolved_var->digits + resolved_var->rdigits;
-      }
-    else
-      {
-      // We have a source with a PIC string like 999PPPPPP, which is
-      // a capacity of 3 and a resolved_var->rdigits of -6.  We need to multiply
-      // retval by +6, and make rdigits zero:
-      retval *= __gg__power_of_ten( -resolved_var->rdigits );
-      *rdigits = 0;
+            // We can put the sign nybble into place at this point.  Note that
+            // for COMP-6 numbers the sign_nybble value is zero, so the next
+            // operation is harmless.
+            location[length -1] |= sign_nybble;
+
+            // And we're done.
+            break;
+            }
+
+          default:
+            fprintf(stderr, "can't convert in %s() %s %d\n",
+                    __func__,
+                    var->name,
+                    var->type);
+            abort();
+            break;
+          }
+        if( compute_error )
+          {
+          *compute_error |= size_error ? compute_error_truncate : 0;
+          }
+        }
       }
+      break;
     }
-
-  return retval;
   }
 
 #pragma GCC diagnostic ignored "-Wformat-overflow"
@@ -2124,8 +2205,47 @@ cobol_time()
   }
 
 extern "C"
-char *
-__gg__get_date_yymmdd(const cblc_field_t *field)
+void
+__gg__field_from_string(cblc_field_t *field,
+                        size_t field_o,
+                        size_t field_s,
+                  const char *string,
+                        size_t string_length)
+  {
+  // Warning:  field_from_string uses charmap_t, so you can't safely feed it
+  // the results of __gg__iconverter without copying them.
+
+  // The string has to be in the field->encoding.  It's legitimate for
+  // string_length to be less than field_s; we will right fill with spaces. And
+  // it can be greater than field_s, in which case __gg__move will truncate.
+
+  cblc_field_t source = {};
+  source.type = FldAlphanumeric;
+  source.encoding = field->encoding;
+  source.data = reinterpret_cast<unsigned char *>
+                                                (const_cast<char *>(string)),
+  source.capacity = string_length;
+  __gg__move(  field, field_o, field_s,
+              &source, source.offset, source.capacity,
+              0, truncation_e );
+  }
+
+static void
+field_from_ascii(cblc_field_t *field, char *psz)
+  {
+  cblc_field_t source = {};
+  source.type = FldAlphanumeric;
+  source.capacity = strlen(psz);
+  source.data = reinterpret_cast<unsigned char *>(psz);
+  source.encoding = __gg__console_encoding;
+  __gg__move(  field, field->offset, field->capacity,
+             &source, source.offset, source.capacity,
+             0, truncation_e );
+  }
+
+extern "C"
+void
+__gg__get_date_yymmdd(cblc_field_t *field)
   {
   char ach[32];
 
@@ -2137,42 +2257,27 @@ __gg__get_date_yymmdd(const cblc_field_t *field)
           local->tm_year  % 100,
           local->tm_mon+1 % 100,
           local->tm_mday  % 100 );
-  size_t charsout;
-  const char *converted = __gg__iconverter(__gg__console_encoding,
-                                           field->encoding,
-                                           ach,
-                                           strlen(ach),
-                                           &charsout);
-  return strdup(converted);
+  field_from_ascii(field, ach);
   }
 
 extern "C"
-char *
-__gg__get_date_yyyymmdd(const cblc_field_t *field)
+void
+__gg__get_date_yyyymmdd(cblc_field_t *field)
   {
   char ach[32];
-
   time_t t = cobol_time();
   const struct tm *local = localtime(&t);
-
   sprintf(ach,
           "%4.4d%2.2d%2.2d",
           local->tm_year + 1900,
           local->tm_mon+1,
           local->tm_mday);
-
-  size_t charsout;
-  const char *converted = __gg__iconverter(__gg__console_encoding,
-                                           field->encoding,
-                                           ach,
-                                           strlen(ach),
-                                           &charsout);
-  return strdup(converted);
+  field_from_ascii(field, ach);
   }
 
 extern "C"
-char *
-__gg__get_date_yyddd(const cblc_field_t *field)
+void
+__gg__get_date_yyddd(cblc_field_t *field)
   {
   char ach[32];
 
@@ -2183,19 +2288,12 @@ __gg__get_date_yyddd(const cblc_field_t *field)
           "%2.2d%3.3d",
           local->tm_year % 100,
           local->tm_yday+1);
-
-  size_t charsout;
-  const char *converted = __gg__iconverter(__gg__console_encoding,
-                                           field->encoding,
-                                           ach,
-                                           strlen(ach),
-                                           &charsout);
-  return strdup(converted);
+  field_from_ascii(field, ach);
   }
 
 extern "C"
-char *
-__gg__get_yyyyddd(const cblc_field_t *field)
+void
+__gg__get_yyyyddd(cblc_field_t *field)
   {
   char ach[32];
 
@@ -2206,19 +2304,12 @@ __gg__get_yyyyddd(const cblc_field_t *field)
           "%4.4d%3.3d",
           local->tm_year + 1900,
           local->tm_yday+1);
-
-  size_t charsout;
-  const char *converted = __gg__iconverter(__gg__console_encoding,
-                                           field->encoding,
-                                           ach,
-                                           strlen(ach),
-                                           &charsout);
-  return strdup(converted);
+  field_from_ascii(field, ach);
   }
 
 extern "C"
-char *
-__gg__get_date_dow(const cblc_field_t *field)
+void
+__gg__get_date_dow(cblc_field_t *field)
   {
   char ach[32];
 
@@ -2228,14 +2319,7 @@ __gg__get_date_dow(const cblc_field_t *field)
   sprintf(ach,
           "%1.1d",
           local->tm_wday == 0 ? 7 : local->tm_wday);
-
-  size_t charsout;
-  const char *converted = __gg__iconverter(__gg__console_encoding,
-                                           field->encoding,
-                                           ach,
-                                           strlen(ach),
-                                           &charsout);
-  return strdup(converted);
+  field_from_ascii(field, ach);
   }
 
 static int
@@ -2334,11 +2418,10 @@ __gg__clock_gettime(struct cbl_timespec *tp)
   }
 
 extern "C"
-char *
-__gg__get_date_hhmmssff(const cblc_field_t *field)
+void
+__gg__get_date_hhmmssff(cblc_field_t *field)
   {
   char ach[32];
-
   struct cbl_timespec tv;
   __gg__clock_gettime(&tv);
 
@@ -2359,23 +2442,54 @@ __gg__get_date_hhmmssff(const cblc_field_t *field)
           tm.tm_min,
           tm.tm_sec,
           hundredths);
+  field_from_ascii(field, ach);
+  }
 
-  size_t charsout;
-  const char *converted = __gg__iconverter(__gg__console_encoding,
-                                           field->encoding,
-                                           ach,
-                                           strlen(ach),
-                                           &charsout);
-  return strdup(converted);
+static
+uint32_t collation_position( cbl_char_t ch )
+  {
+  uint32_t retval;
+  if( (ch & 0xFFFFFF00) == 0x00000000 )
+    {
+    // The character fits into the current DISPLAY collation
+    retval = collated(ch);
+    }
+  else
+    {
+    // It doesn't fit, so use the character value itself
+    retval = ch;
+    }
+  return retval;
+  }
+
+static cbl_char_t
+uber_compare(cbl_char_t ch_left, cbl_char_t ch_right)
+  {
+  if( ((ch_left | ch_right) & 0xFFFFFF00) == 0x00000000 )
+    {
+    // This is where collation is going to have to be fixed for multi-byte
+    // encodings.  For now, if both characters fit into 0xFF, then we will
+    // use the current collation.  Otherwise, we just compare them
+
+    // Both characters fit into the current DISPLAY codeset, so assume we
+    // are using the DISPLAY collation:
+    ch_left  = collated(ch_left);
+    ch_right = collated(ch_right);
+    }
+  else
+    {
+    // Just compare the raw characters.
+    }
+  cbl_char_t retval = ch_left - ch_right;
+  return retval;
   }
 
+
 extern "C"
 int
 __gg__setop_compare(
-  const char *candidate,
-  int capacity,
-  char *domain,
-  cbl_encoding_t domain_encoding)
+  const cblc_field_t *candidate_field,
+  char *domain)
   {
   // This routine is called to compare the characters of 'candidate'
   // against the list of character pairs in 'domain'
@@ -2385,23 +2499,34 @@ __gg__setop_compare(
   int h;
   char *d;
 
-  /*  At the present writing, the domain was created in "source code" space,
-      meaning that 'A' comes through as 0x41 no matter what the
-      domain->encoding.  Until Jim gets around to providing me with target
-      values from the parser, we're doing the conversion here. */
+  /* The domain was created by converting characters to their UTF32
+     equivalents and then turning that information to hex.  Numerical values,
+     which represent collation positions, are flagged as negative values.
+
+     In order to compare the apples in candidate to the UTF32 values in the
+     domain, we need to convert the candidate to UTF32 as well: */
 
-  charmap_t *charmap = __gg__get_charmap(domain_encoding);
+  const charmap_t *charmap = __gg__get_charmap(DEFAULT_32_ENCODING);
 
-  for(int i=0; i<capacity; i++)
+  size_t nbytes_converted;
+  const char *candidate = __gg__iconverter(candidate_field->encoding,
+                                            DEFAULT_32_ENCODING,
+                                            candidate_field->data,
+                                            candidate_field->capacity,
+                                            &nbytes_converted);
+  const char *candidate_end = candidate + nbytes_converted;
+  while(candidate < candidate_end)
     {
-    int ch = (*candidate++ & 0xFF);
+    cbl_char_t ch = charmap->getch(candidate, size_t(0));
+    candidate += charmap->stride();
+    int collation_pos = collation_position(ch);
     d = domain;
     while(*d)
       {
       retval = 0;
       // We are decoding hexadecimal numbers, either in pairs,
       // or singletons:  "20/30 " or "20 ".  The final one is
-      // terminated with '\0'
+      // terminated with ' \0'
 
       // See the comments in genapi.cc::get_class_condition_string
       // to see how this string was encoded.
@@ -2409,7 +2534,15 @@ __gg__setop_compare(
       l = (int)strtoll(d, reinterpret_cast<char **>(&d), 16);
       if( l < 0 )
         {
+        // This is a collation position, as given in the COBOL program. Make
+        // it positive, and subtract 1 from it to make it the same space
+        // as the collation table:
         l = -l;
+        l -= 1;
+        }
+      else
+        {
+
         }
       h = l;
       if( *d == '/' )
@@ -2418,7 +2551,9 @@ __gg__setop_compare(
         h = (int)strtoll(d, reinterpret_cast<char **>(&d), 16);
         if( h < 0 )
           {
+          // This is a collation position; make it the same as
           h = -h;
+          h -= 1;
           }
         }
       else if( *d == ' ' )
@@ -2426,9 +2561,7 @@ __gg__setop_compare(
         d += 1;
         }
 
-      l = charmap->mapped_character(l);
-      h = charmap->mapped_character(h);
-      if( ch >= l && ch <= h )
+      if( collation_pos >= l && collation_pos <= h )
         {
         // This character is acceptable
         retval = 1;
@@ -2551,13 +2684,15 @@ __gg__dirty_to_binary(const char *dirty,
   // MAX_FIXED_POINT_DIGITS
 
   charmap_t *charmap    = __gg__get_charmap(encoding);
-  int mapped_minus          = charmap->mapped_character(ascii_minus);
-  int mapped_plus           = charmap->mapped_character(ascii_plus);
-  int mapped_decimal_point  = charmap->mapped_character(__gg__decimal_point);
-  int mapped_0              = charmap->mapped_character(ascii_0);
-  int mapped_9              = charmap->mapped_character(ascii_9);
-  int mapped_E              = charmap->mapped_character(ascii_E);
-  int mapped_e              = charmap->mapped_character(ascii_e);
+  int stride = charmap->stride();
+
+  cbl_char_t mapped_minus          = charmap->mapped_character(ascii_minus);
+  cbl_char_t mapped_plus           = charmap->mapped_character(ascii_plus);
+  cbl_char_t mapped_decimal_point  = charmap->mapped_character(__gg__decimal_point);
+  cbl_char_t mapped_0              = charmap->mapped_character(ascii_0);
+  cbl_char_t mapped_9              = charmap->mapped_character(ascii_9);
+  cbl_char_t mapped_E              = charmap->mapped_character(ascii_E);
+  cbl_char_t mapped_e              = charmap->mapped_character(ascii_e);
 
   __int128 retval = 0;
 
@@ -2571,11 +2706,13 @@ __gg__dirty_to_binary(const char *dirty,
   int delta_r = 0;
 
   // We now loop over the remaining input characters:
-  unsigned char ch = '\0';
+  cbl_char_t ch = '\0';
+  size_t chindex = 0;
 
-  if(length-- > 0)
+  if(length > 0)
     {
-    ch = *dirty++;
+    length -= stride;
+    ch = charmap->getch(dirty, &chindex);
     if( ch == mapped_minus )
       {
       hyphen = 1;
@@ -2606,9 +2743,10 @@ __gg__dirty_to_binary(const char *dirty,
       }
     }
 
-  while( length-- > 0 )
+  while( length > 0 )
     {
-    ch = *dirty++;
+    length -= stride;
+    ch = charmap->getch(dirty, &chindex);
     if( ch == mapped_decimal_point && delta_r == 0 )
       {
       // This is the first decimal point we've seen, so we
@@ -2642,22 +2780,23 @@ __gg__dirty_to_binary(const char *dirty,
     int exponent_sign = 1;
     if( length > 0  )
       {
-      ch = *dirty;
+      ch = charmap->getch(dirty, chindex);
       if( ch == mapped_plus)
         {
-        length -= 1;
-        dirty += 1;
+        length -= stride;
+        dirty += stride;
         }
       else if( ch == mapped_minus )
         {
         exponent_sign = -1;
-        length -= 1;
-        dirty += 1;
+        length -= stride;
+        dirty += stride;
         }
       }
-    while(length-- > 0)
+    while(length > 0)
       {
-      ch = *dirty++;
+      length -= stride;
+      ch = charmap->getch(dirty, &chindex);
       if(    ch < mapped_0
           || ch > mapped_9 )
         {
@@ -2730,29 +2869,37 @@ __gg__dirty_to_float( const char *dirty,
   int delta_r = 0;
 
   // We now loop over the remaining input characters:
-  unsigned char ch = '\0';
+  cbl_char_t ch = '\0';
 
   charmap_t *charmap = __gg__get_charmap(field->encoding);
-
+  cbl_char_t mapped_minus  = charmap->mapped_character(ascii_minus);
+  cbl_char_t mapped_plus   = charmap->mapped_character(ascii_plus);
+  cbl_char_t mapped_decimal = charmap->mapped_character(__gg__decimal_point);
+  cbl_char_t mapped_0       = charmap->mapped_character(ascii_0);
+  cbl_char_t mapped_9       = charmap->mapped_character(ascii_9);
+  cbl_char_t mapped_E       = charmap->mapped_character(ascii_E);
+  cbl_char_t mapped_e       = charmap->mapped_character(ascii_e);
+
+  size_t index = 0;
   if(length-- > 0)
     {
-    ch = *dirty++;
-    if( ch == charmap->mapped_character(ascii_minus) )
+    ch = charmap->getch(dirty, &index);
+    if( ch == mapped_minus )
       {
       hyphen = 1;
       }
-    else if( ch == charmap->mapped_character(ascii_plus) )
+    else if( ch == mapped_plus )
       {
       // A plus sign is okay
       }
-    else if( ch == charmap->mapped_character(__gg__decimal_point) )
+    else if( ch == mapped_decimal )
       {
       delta_r = 1;
       }
-    else if(   ch >= charmap->mapped_character(ascii_0)
-            && ch <= charmap->mapped_character(ascii_9) )
+    else if(   ch >= mapped_0
+            && ch <= mapped_9 )
       {
-      retval = ch - charmap->mapped_character(ascii_0) ;
+      retval = ch & 0x0F ;
       }
     else
       {
@@ -2766,40 +2913,40 @@ __gg__dirty_to_float( const char *dirty,
 
   while( length-- > 0 )
     {
-    ch = *dirty++;
-    if( ch == charmap->mapped_character(__gg__decimal_point) && delta_r == 0 )
+    ch = charmap->getch(dirty, &index);
+    if( ch == mapped_decimal && delta_r == 0 )
       {
       // This is the first decimal point we've seen, so we
       // can start counting rdigits:
       delta_r = 1;
       continue;
       }
-    if(   ch < charmap->mapped_character(ascii_0)
-       || ch > charmap->mapped_character(ascii_9) )
+    if(   ch < mapped_0
+       || ch > mapped_9 )
       {
       // When we hit something that isn't a digit, then we are done
       break;
       }
     retval *= 10;
-    retval += ch - charmap->mapped_character(ascii_0) ;
+    retval += ch & 0x0F ;
     rdigits += delta_r;
     }
 
   // Let's check for an exponent:
   int exponent = 0;
-  if(    ch == charmap->mapped_character(ascii_E)
-      || ch == charmap->mapped_character(ascii_e)  )
+  if(    ch == mapped_E
+      || ch == mapped_e )
     {
     int exponent_sign = 1;
     if( length > 0  )
       {
-      ch = *dirty;
-      if( ch == charmap->mapped_character(ascii_plus) )
+      ch = charmap->getch(dirty, &index);
+      if( ch == mapped_plus )
         {
         length -= 1;
         dirty += 1;
         }
-      else if (ch == charmap->mapped_character(ascii_minus) )
+      else if(ch == mapped_minus )
         {
         exponent_sign = -1;
         length -= 1;
@@ -2808,15 +2955,15 @@ __gg__dirty_to_float( const char *dirty,
       }
     while(length-- > 0)
       {
-      ch = *dirty++;
-      if(   ch < charmap->mapped_character(ascii_0)
-         || ch > charmap->mapped_character(ascii_9) )
+      ch = charmap->getch(dirty, &index);
+      if(   ch < mapped_0
+         || ch > mapped_9 )
         {
         // When we hit something that isn't a digit, then we are done
         break;
         }
       exponent *= 10;
-      exponent += ch - charmap->mapped_character(ascii_0) ;
+      exponent += ch & 0x0F ;
       }
     exponent *= exponent_sign;
     }
@@ -2926,7 +3073,7 @@ format_for_display_internal(char **dest,
 
   // The routine returns the cbl_encoding_t of the result.
 
-  cbl_encoding_t retval = var->encoding;
+  cbl_encoding_t enc_dest = var->encoding;
 
   int source_rdigits = var->rdigits;
 
@@ -2940,32 +3087,67 @@ format_for_display_internal(char **dest,
   if( address_of )
     {
     // Assume that DISPLAY OF ADDRESS OF should be what's expected:
-
-    __gg__realloc_if_necessary(dest, dest_size, 2*sizeof(void *) + 1);
+    const charmap_t *charmap = __gg__get_charmap(enc_dest);
+    __gg__realloc_if_necessary(dest,
+                               dest_size,
+                               2*sizeof(void *) + charmap->stride());
 
     sprintf(  *dest,
               "0x%*.*lx",
               (int)(2*sizeof(void *)),
               (int)(2*sizeof(void *)),
               (unsigned long)actual_location);
-    retval = __gg__console_encoding;
+    enc_dest = __gg__console_encoding;
     goto done;
     }
 
   switch( var->type )
     {
     case FldLiteralA:
+      {
+      charmap_t *charmap = __gg__get_charmap(enc_dest);
+      __gg__realloc_if_necessary(dest,
+                                 dest_size,
+                                 actual_length+charmap->stride());
+
+      cbl_figconst_t figconst = (cbl_figconst_t)(var->attr & FIGCONST_MASK);
+      if( figconst )
+        {
+        charmap = __gg__get_charmap(enc_dest);
+        int figconst_char  = charmap->figconst_character(figconst);
+        memset(*dest, figconst_char, actual_length);
+        (*dest)[actual_length] = NULLCH;
+        }
+      else
+        {
+        if( actual_location )
+          {
+          memcpy(*dest, actual_location, actual_length);
+          }
+        else
+          {
+          fprintf(stderr, "attempting to display a NULL pointer in %s\n", var->name);
+          abort();
+          }
+        (*dest)[actual_length] = NULLCH;
+        }
+      break;
+      }
+
     case FldGroup:
     case FldAlphanumeric:
     case FldNumericEdited:
     case FldAlphaEdited:
       {
-      __gg__realloc_if_necessary(dest, dest_size, actual_length+1);
+      charmap_t *charmap = __gg__get_charmap(enc_dest);
+      __gg__realloc_if_necessary(dest,
+                                 dest_size,
+                                 actual_length+charmap->stride());
 
       cbl_figconst_t figconst = (cbl_figconst_t)(var->attr & FIGCONST_MASK);
       if( figconst )
         {
-        charmap_t *charmap = __gg__get_charmap(retval);
+        charmap = __gg__get_charmap(enc_dest);
         int figconst_char  = charmap->figconst_character(figconst);
         memset(*dest, figconst_char, actual_length);
         (*dest)[actual_length] = NULLCH;
@@ -2988,11 +3170,14 @@ format_for_display_internal(char **dest,
 
     case FldNumericDisplay:
       {
+      charmap_t *charmap = __gg__get_charmap(enc_dest);
       if( var_is_refmod(var) )
         {
         // Because we are dealing with a refmod, we just output those
         // characters.
-        __gg__realloc_if_necessary(dest, dest_size, actual_length+1);
+        __gg__realloc_if_necessary(dest,
+                                   dest_size,
+                                   actual_length+charmap->stride());
         memcpy((*dest), actual_location, actual_length);
         (*dest)[actual_length] = NULLCH;
         break;
@@ -3030,15 +3215,15 @@ format_for_display_internal(char **dest,
         // This buffer is larger than can validly be needed
         unsigned char converted[128];
         size_t outlength;
-        retval = DEFAULT_SOURCE_ENCODING;
+        enc_dest = DEFAULT_SOURCE_ENCODING;
         const char *mapped = __gg__iconverter(
                                   var->encoding,
-                                  retval,
+                                  enc_dest,
                                   PTRCAST(char, actual_location),
                                   actual_length,
                                   &outlength);
         memcpy(converted, mapped, outlength);
-        charmap_t *charmap = __gg__get_charmap(retval);
+        charmap = __gg__get_charmap(enc_dest);
 
         // converted[] is now an ASCII version of the value in memory.  We are
         // going to "validate" the characters, which might be garbage.
@@ -3052,7 +3237,6 @@ format_for_display_internal(char **dest,
         bool is_negative;
         int index = 0;  // This is the running index into our output destination
 
-        std::ptrdiff_t signoffset;
         switch(signtype)
           {
           case 0:
@@ -3062,14 +3246,19 @@ format_for_display_internal(char **dest,
             // not signable
             signloc  = converted;
             digits   = converted;
-            digits_e = converted + actual_length;
+            digits_e = converted + outlength;
             is_negative = false;
             break;
           case 4:
+            {
             // internal trailing
-            signloc  = converted + actual_length-1;
+            const charmap_t *charmap_from = __gg__get_charmap(var->encoding);
+            cbl_char_t original_sign_digit =
+                charmap_from->getch(actual_location,
+                                    actual_length - charmap_from->stride());
+            signloc  = converted + outlength-1;
             digits   = converted;
-            digits_e = converted + actual_length;
+            digits_e = converted + outlength;
             /*  In ascii, negative is indicated by turning bit 0x40 on.
                 In ebcdic, by turning bit 0x20 off.  In both cases, the result
                 is outside of the range '0' through '9'.  Working this way is
@@ -3078,32 +3267,35 @@ format_for_display_internal(char **dest,
                 variable's memory.  I am not overly concerned.
             */
             is_negative = *signloc > ascii_9 || *signloc < ascii_0;
-            signoffset = signloc-converted;
-            *signloc = charmap->mapped_character(ascii_0)
-                                        + (actual_location[signoffset] & 0x0F);
+            *signloc = ascii_0 + (original_sign_digit & 0x0F);
             break;
+            }
           case 5:
+            {
             // internal leading
+            const charmap_t *charmap_from = __gg__get_charmap(var->encoding);
+            cbl_char_t original_sign_digit =
+                charmap_from->getch(actual_location,
+                                    (size_t)0);
             signloc  = converted;
             digits   = converted;
-            digits_e = converted + actual_length;
+            digits_e = converted + outlength;
             is_negative = *signloc > ascii_9 || *signloc < ascii_0;
-            signoffset = signloc-converted;
-            *signloc = charmap->mapped_character(ascii_0)
-                                        + (actual_location[signoffset] & 0x0F);
+            *signloc = ascii_0 + (original_sign_digit & 0x0F);
             break;
+            }
           case 6:
             // separate trailing
-            signloc  = converted + actual_length-1;
+            signloc  = converted + outlength-1;
             digits   = converted;
-            digits_e = converted + actual_length-1;
+            digits_e = converted + outlength-1;
             is_negative = *signloc == ascii_minus;
             break;
           case 7:
             // separate leading
             signloc  = converted;
             digits   = converted+1;
-            digits_e = converted + actual_length;
+            digits_e = converted + outlength;
             is_negative = *signloc == ascii_minus;
             break;
           }
@@ -3139,6 +3331,7 @@ format_for_display_internal(char **dest,
             break;
           case 6:
             // separate trailing
+            // We'll stick on the trailing sign character later
             break;
           case 7:
             // separate leading
@@ -3260,8 +3453,8 @@ format_for_display_internal(char **dest,
         }
 
       char ach[128];
-      retval = DEFAULT_SOURCE_ENCODING;
-      charmap_t *charmap = __gg__get_charmap(retval);
+      enc_dest = DEFAULT_SOURCE_ENCODING;
+      charmap_t *charmap = __gg__get_charmap(enc_dest);
 
       __gg__binary_to_string_ascii(ach, digits, value);
 
@@ -3317,7 +3510,7 @@ format_for_display_internal(char **dest,
       sprintf(ach, "%lu", (unsigned long)value);
       __gg__realloc_if_necessary(dest, dest_size, strlen(ach)+1);
       strcpy(*dest, ach);
-      retval = __gg__console_encoding;
+      enc_dest = __gg__console_encoding;
       }
     break;
 
@@ -3336,7 +3529,7 @@ format_for_display_internal(char **dest,
         memset(*dest, 0, retsize);
         strcpy(*dest, "<LEVEL88>");
         }
-      retval = __gg__console_encoding;
+      enc_dest = __gg__console_encoding;
       break;
       }
 
@@ -3354,7 +3547,7 @@ format_for_display_internal(char **dest,
                 (int)(2*sizeof(void *)),
                 (int)(2*sizeof(void *)),
                 (unsigned long)value);
-      retval = __gg__console_encoding;
+      enc_dest = __gg__console_encoding;
       break;
       }
 
@@ -3488,7 +3681,7 @@ format_for_display_internal(char **dest,
           break;
           }
         }
-      retval = __gg__console_encoding;
+      enc_dest = __gg__console_encoding;
       break;
       }
 
@@ -3502,7 +3695,7 @@ format_for_display_internal(char **dest,
 
   if( (var->attr & scaled_e) && var->type != FldNumericDisplay )
     {
-    charmap_t *charmap = __gg__get_charmap(retval);
+    charmap_t *charmap = __gg__get_charmap(enc_dest);
 
     static size_t buffer_size = MINIMUM_ALLOCATION_SIZE;
     static char  *buffer = static_cast<char *>(malloc(buffer_size));
@@ -3569,106 +3762,129 @@ format_for_display_internal(char **dest,
     }
 
   done:
-  if( retval == custom_encoding_e )
+  if( enc_dest == custom_encoding_e )
     {
     fprintf(stderr, "Bum encoding in format_for_display_internal\n");
     abort();
     }
-  return retval;
+  return enc_dest;
   }
 
 static int
 compare_88( const char    *list,
             const char    *list_e,
             bool           fig_const,
-      const cblc_field_t  *conditional,
-            unsigned char *conditional_location,
-            int            conditional_length)
+      const cblc_field_t  *conditional_,
+      const unsigned char *conditional_location_,
+            int            conditional_length_)
   {
-  charmap_t *charmap = __gg__get_charmap(conditional->encoding);
-  int list_len = (int)(list_e-list);
+  int cmpval;
+
+  // We know that list through list_e are characters in UTF32 encoding.
+  size_t list_len = list_e-list;
+
+  // We need to convert the conditional to be UTF32 as well:
+  charmap_t *charmap = __gg__get_charmap(DEFAULT_32_ENCODING);
+  size_t stride = charmap->stride();
+  cbl_char_t mapped_space = charmap->mapped_character(ascii_space);
+
+  // First, convert the conditional to UTF32
+  size_t conditional_length=0;
+  char * conditional_i = __gg__miconverter(
+                                conditional_->encoding,
+                                DEFAULT_32_ENCODING,
+                                conditional_location_,
+                                conditional_length_,
+                                &conditional_length);
+  const char *conditional = conditional_i;
+
+  // Now we want to trim away trailing spaces from the conditional, leaving
+  // just one so that we don't get down to an empty string.
+  while( conditional_length > stride)
+    {
+    cbl_char_t ch = charmap->getch(conditional, conditional_length - stride);
+    if( ch != mapped_space )
+      {
+      break;
+      }
+    conditional_length -= stride;
+    }
+
+  // We have conditional_length bytes at conditional.  Create a test area that
+  // we will compare against conditional:
+
   int test_len;
   char *test;
   if( fig_const )
     {
-    // We are working with a figurative constant
+    // The 'list' is a figurative constant, so we need to create a test
+    // buffer that is all the character designated by the figurative constant.
 
     test = static_cast<char *>(malloc(conditional_length));
     massert(test);
     test_len = conditional_length;
+
     // This is where we handle the zero-length strings that
     // nonetheless can magically be expanded into figurative
     // constants:
 
-    int ch = charmap->mapped_character(ascii_space);
+    // We default to space, since we know that the figurative constant is
+    // S, Z, H, Q, or L
+    cbl_char_t ch = charmap->mapped_character(ascii_space);
     // Check for the strings starting with 0xFF whose second character
     // indicates a figurative constant:
-    if( list[0] == ascii_Z )
+    cbl_char_t char_0 = charmap->getch(list, (size_t)0);
+    if( char_0 == charmap->mapped_character(ascii_Z) )
       {
       ch = charmap->mapped_character(ascii_0);
       }
-    else if( list[0] == ascii_H )
+    else if( char_0 == charmap->mapped_character(ascii_H) )
       {
       ch = charmap->high_value_character();
       }
-    else if( list[0] == ascii_Q )
+    else if( char_0 == charmap->mapped_character(ascii_Q) )
       {
       ch = charmap->quote_character();
       }
-    else if( list[0] == ascii_L )
+    else if( char_0 == charmap->mapped_character(ascii_L) )
       {
       ch = charmap->low_value_character();
       }
-    memset( test, ch, conditional_length );
+    // The test location is full of the figurative constant
+    charmap->memset( test, ch, conditional_length );
     }
   else if( list_len < conditional_length )
     {
-    // 'list' is too short; we have to right-fill with spaces:
+    // 'list' element is too short; we have to right-fill with spaces:
     test = static_cast<char *>(malloc(conditional_length));
     massert(test);
     test_len = conditional_length;
-    memset(test, charmap->mapped_character(ascii_space), conditional_length);
+    // Copy over the shorty string from 'list'
     memcpy(test, list, list_len);
+    // Right fill with spaces:
+    charmap->memset(test+list_len,
+                    charmap->mapped_character(ascii_space),
+                    conditional_length-list_len);
     }
   else
     {
+    // list_len is >= conditional length.  Presumably the parser ensured that
+    // the list element couldn't be bigger than the maximum condition length,
+    // we we'll truncate at list_len:
     test = static_cast<char *>(malloc(list_len));
     massert(test);
     test_len = list_len;
     memcpy(test, list, list_len);
     }
 
-  int cmpval;
-
-  if( test[0] == NULLCH && conditional_location[0] == 0)
-    {
-    cmpval = 0;
-    }
-  else
-    {
-    cmpval = cstrncmp (test,
-                      PTRCAST(char, conditional_location),
-                      conditional_length);
+  // At this point we have conditional and test, and they both have at least
+  // test_len bytes.
 
-//    if( cmpval == 0 && (int)strlen(test) != conditional_length )
-    if( cmpval == 0 && test_len != conditional_length )
-      {
-      // When strncmp returns 0, the actual smaller string is the
-      // the shorter of the two:
-      cmpval = test_len - conditional_length;
-      }
-    }
+  cmpval = memcmp(test, conditional, test_len);
 
   free(test);
+  free(conditional_i);
 
-  if( cmpval < 0 )
-    {
-    cmpval = -1;
-    }
-  else if(cmpval > 0)
-    {
-    cmpval = +1;
-    }
   return cmpval;
   }
 
@@ -3697,40 +3913,67 @@ get_float128( const cblc_field_t *field,
     }
   else if( field->type == FldLiteralN )
     {
-    if( __gg__decimal_point == '.' )
+    union
       {
-      size_t charsout;
-      char *converted = __gg__iconverter(field->encoding,
-                                         DEFAULT_SOURCE_ENCODING,
-                                         field->initial,
-                                         strlen(field->initial),
-                                         &charsout);
-      retval = strtofp128(converted, NULL);
-      }
-    else
+      __int128 i128;
+      uint64_t u64;
+      uint32_t u32;
+      uint16_t u16;
+      uint8_t  u8 ;
+      int64_t  i64;
+      int32_t  i32;
+      int16_t  i16;
+      int8_t   i8 ;
+      };
+    i128 = 0;
+    memcpy(&i128, field->data, field->capacity);
+
+    if( field->attr & signable_e )
       {
-      // We need to replace any commas with periods
-      static size_t size = 128;
-      static char *buffer = static_cast<char *>(malloc(size));
-      while( strlen(field->initial)+1 > size )
+      switch(field->capacity)
         {
-        size *= 2;
-        buffer = static_cast<char *>(malloc(size));
+        case 16:
+          retval = i128;
+          break;
+        case 8:
+          retval = i64;
+          break;
+        case 4:
+          retval = i32;
+          break;
+        case 2:
+          retval = i16;
+          break;
+        case 1:
+          retval = i8;
+          break;
         }
-      massert(buffer);
-      strcpy(buffer, field->initial);
-      char *p = strchr(buffer, ',');
-      if(p)
+      }
+    else
+      {
+      switch(field->capacity)
         {
-        *p = '.';
+        case 16:
+          retval = i128;
+          break;
+        case 8:
+          retval = u64;
+          break;
+        case 4:
+          retval = u32;
+          break;
+        case 2:
+          retval = u16;
+          break;
+        case 1:
+          retval = u8;
+          break;
         }
-      retval = strtofp128(buffer, NULL);
       }
-    }
-  else
-    {
-    fprintf(stderr, "What's all this then?\n");
-    abort();
+    if( field->rdigits )
+      {
+      retval /= __gg__power_of_ten( field->rdigits );
+      }
     }
   return retval;
   }
@@ -3746,8 +3989,12 @@ compare_field_class(const cblc_field_t  *conditional,
   __int128 value;
   int rdigits;
 
-  // list->initial points to a superstring: a double-null terminated
-  // string containing pairs of strings.  We are looking for equality.
+  charmap_t *charmap32 = __gg__get_charmap(DEFAULT_32_ENCODING);
+  int stride32 = charmap32->stride();
+  cbl_char_t mapped_F = charmap32->mapped_character(ascii_F);
+  cbl_char_t mapped_Z = charmap32->mapped_character(ascii_Z);
+
+  // We are disassembling strings that have the form <length><flag><value>
 
   switch( conditional->type )
     {
@@ -3762,34 +4009,36 @@ compare_field_class(const cblc_field_t  *conditional,
                                       conditional,
                                       conditional_location,
                                       conditional_length);
-      const char *walker = list->initial;
+      char *walker = list->initial;
       while(*walker)
         {
-        char   left_flag;
+        cbl_char_t left_flag;
         size_t left_len;
-        char * left;
+        char *left;
 
-        char   right_flag;
+        cbl_char_t right_flag;
         size_t right_len;
-        char * right;
+        char *right;
 
         char *pend;
-        left_len = strtoull(walker, &pend, 10);
-        left_flag = *pend;
-        left = pend+1;
 
-        right = left + left_len;
-        right_len = strtoull(right, &pend, 10);
-        right_flag = *pend;
-        right = pend+1;
+        left_len = charmap32->strtoull(walker, &pend, 10);
+        left_flag = charmap32->getch(pend, (size_t)0);
+        left = pend+stride32;
+
+        right = left + left_len*stride32;
+        right_len = charmap32->strtoull(right, &pend, 10);
+        right_flag = charmap32->getch(pend, (size_t)0);
+        right = pend+stride32;
 
-        walker = right + right_len;
+        walker = right + right_len*stride32;
 
         int left_rdigits;
         int right_rdigits;
 
         __int128 left_value;
-        if( left_flag == 'F' && left[0] == 'Z' )
+        cbl_char_t left_0 = charmap32->getch(left, size_t(0));
+        if( left_flag == mapped_F && left_0 == mapped_Z )
           {
           left_value = 0;
           left_rdigits = 0;
@@ -3798,13 +4047,14 @@ compare_field_class(const cblc_field_t  *conditional,
           {
           left_value = __gg__dirty_to_binary(
                                   left,
-                                  conditional->encoding,
-                                  left_len,
+                                  DEFAULT_32_ENCODING,
+                                  left_len*stride32,
                                   &left_rdigits);
           }
 
         __int128 right_value;
-        if( right_flag == 'F' && right[0] == 'Z' )
+        cbl_char_t right_0 = charmap32->getch(right, size_t(0));
+        if( right_flag == ascii_F && right_0 == mapped_Z )
           {
           right_value = 0;
           right_rdigits = 0;
@@ -3813,8 +4063,8 @@ compare_field_class(const cblc_field_t  *conditional,
           {
           right_value = __gg__dirty_to_binary(
                                    right,
-                                   conditional->encoding,
-                                   right_len,
+                                   DEFAULT_32_ENCODING,
+                                   right_len*stride32,
                                    &right_rdigits);
           }
 
@@ -3847,6 +4097,8 @@ compare_field_class(const cblc_field_t  *conditional,
     case FldAlphanumeric:
     case FldLiteralA:
       {
+      // This is an alphanumeric comparison.  The list is in UTF32, so we
+      // are going to have to convert the conditional to UTF32.
       char *walker = list->initial;
       while(*walker)
         {
@@ -3861,18 +4113,22 @@ compare_field_class(const cblc_field_t  *conditional,
 
         char *pend;
 
+        cbl_char_t ch;
+
         first = walker;
-        first_len = strtoull(first, &pend, 10);
-        fig1 = *pend == 'F';
-        first = pend+1;
-        first_e = first + first_len;
+        first_len = charmap32->strtoull(first, &pend, 10);
+        ch = charmap32->getch(pend, (size_t)0);
+        fig1 = ch == mapped_F;
+        first = pend+stride32;
+        first_e = first + first_len*stride32;
 
         last = first_e;
 
-        last_len = strtoull(last, &pend, 10);
-        fig2 = *pend == 'F';
-        last = pend+1;
-        last_e = last + last_len;
+        last_len = charmap32->strtoull(last, &pend, 10);
+        ch = charmap32->getch(pend, (size_t)0);
+        fig2 = ch == mapped_F;
+        last = pend+stride32;
+        last_e = last + last_len*stride32;
 
         walker = last_e;
 
@@ -3910,32 +4166,38 @@ compare_field_class(const cblc_field_t  *conditional,
 
     case FldFloat:
       {
+      // We need a fake field to hold the encoding for the
+      // __gg__dirty_to_float() routine.
+      cblc_field_t fakir;
+      fakir.encoding = DEFAULT_32_ENCODING;
+
       GCOB_FP128 fp128 = get_float128(conditional, conditional_location) ;
-      const char *walker = list->initial;
+      char *walker = list->initial;
       while(*walker)
         {
-        char   left_flag;
+        cbl_char_t   left_flag;
         size_t left_len;
         char * left;
 
-        char   right_flag;
+        cbl_char_t   right_flag;
         size_t right_len;
         char * right;
 
         char *pend;
-        left_len = strtoull(walker, &pend, 10);
-        left_flag = *pend;
-        left = pend+1;
+        left_len = charmap32->strtoull(walker, &pend, 10);
+        left_flag = charmap32->getch(pend, (size_t)0);
+        left = pend+stride32;
 
-        right = left + left_len;
-        right_len = strtoull(right, &pend, 10);
-        right_flag = *pend;
-        right = pend+1;
+        right = left + left_len*stride32;
+        right_len = charmap32->strtoull(right, &pend, 10);
+        right_flag = charmap32->getch(pend, (size_t)0);
+        right = pend+stride32;
 
-        walker = right + right_len;
+        walker = right + right_len*stride32;
 
         GCOB_FP128 left_value;
-        if( left_flag == ascii_F && left[0] == ascii_Z )
+        if( left_flag == mapped_F
+            && charmap32->getch(left, (size_t)0) == mapped_Z )
           {
           left_value = 0;
           }
@@ -3943,11 +4205,12 @@ compare_field_class(const cblc_field_t  *conditional,
           {
           left_value = __gg__dirty_to_float(left,
                                             left_len,
-                                            conditional);
+                                            &fakir);
           }
 
         GCOB_FP128 right_value;
-        if( right_flag == 'F' && right[0] == 'Z' )
+        if( right_flag == mapped_F
+            && charmap32->getch(right, (size_t)0) == mapped_Z )
           {
           right_value = 0;
           }
@@ -3955,7 +4218,7 @@ compare_field_class(const cblc_field_t  *conditional,
           {
           right_value = __gg__dirty_to_float( right,
                                               right_len,
-                                              conditional);
+                                              &fakir);
           }
 
         if( left_value <= fp128 && fp128 <= right_value )
@@ -4036,102 +4299,213 @@ local_is_alpha(int type, bool address_of)
   return retval;
   }
 
+static void
+interconvert( char **allocated_left,
+              char **allocated_right,
+              char **left_string,
+              char **right_string,
+              size_t *left_length,
+              size_t *right_length,
+              cbl_encoding_t *encoding_left,
+              cbl_encoding_t *encoding_right)
+  {
+  // This routine looks at two encodings and decides what do to about comparing
+  // apples to apples.
+  *allocated_left  = nullptr;
+  *allocated_right = nullptr;
+
+  bool convert_left_to_right = false;
+  bool convert_right_to_left = false;
+
+  size_t converted_length;
+  const char *converted;
+  if( *encoding_left == *encoding_right )
+    {
+    // This is both the most-seen situation, and, happily, the easiest to
+    // handle.  We just do nothing.
+    }
+  else if(   *encoding_left  == __gg__national_encoding
+          || *encoding_right == __gg__national_encoding )
+    {
+    // The encodings are different, but at least one is the national encoding.
+    // Convert the other one to be national as well:
+    if( *encoding_left != __gg__national_encoding )
+      {
+      convert_left_to_right = true;
+      }
+    else
+      {
+      convert_right_to_left = true;
+      }
+    }
+  else
+    {
+    // We have two different encodings, and neither of them are national.  This
+    // can happen when a file descriptor has a specific codeset that doesn't
+    // match the national codeset.  We will convert the narrower to the wider;
+    // if they are both the same width we will pick one arbitrarily.
+    const charmap_t *charmap_left  = __gg__get_charmap(*encoding_left);
+    const charmap_t *charmap_right = __gg__get_charmap(*encoding_right);
+    if( charmap_right->stride() >= charmap_left->stride() )
+      {
+      convert_left_to_right = true;
+      }
+    else
+      {
+      convert_right_to_left = true;
+      }
+    }
+
+  if( convert_left_to_right )
+    {
+    // Convert the left side to the right encoding
+    converted = __gg__iconverter(*encoding_left,
+                                 *encoding_right,
+                                 *left_string,
+                                 *left_length,
+                                 &converted_length);
+    *encoding_left = *encoding_right ;
+    *allocated_left = static_cast<char *>(malloc(converted_length));
+    massert(*allocated_left);
+    *left_string = *allocated_left;
+    *left_length = converted_length;
+    memcpy(*left_string, converted, *left_length);
+    }
+  if( convert_right_to_left )
+    {
+    // Convert the right side to the left_encoding
+    converted = __gg__iconverter(*encoding_right,
+                                 *encoding_left,
+                                 *right_string,
+                                 *right_length,
+                                 &converted_length);
+    *encoding_right = *encoding_left ;
+    *allocated_right = static_cast<char *>(malloc(converted_length));
+    massert(*allocated_right);
+    *right_string = *allocated_right;
+    *right_length = converted_length;
+    memcpy(right_string, converted, *right_length);
+    }
+  }
+
 static
 int
-compare_strings(const char   *left_string,
-                size_t        left_length,
-                bool          left_all,
-                const char   *right_string,
-                size_t        right_length,
-                bool          right_all,
-                cbl_encoding_t encoding)
-  {
+compare_strings(char   *left_string,
+                size_t  left_length,
+                bool    left_all,
+                char   *right_string,
+                size_t  right_length,
+                bool    right_all,
+                cbl_encoding_t encoding_left,
+                cbl_encoding_t encoding_right)
+  {
+  // This routine compares two strings.  It sounds innocent enough, right? But
+  // we have to deal with different encodings.  It's not clear what the rules
+  // are, or should be, and collation is just a mess.  We are going to be
+  // playing Whac-A-Mole with this routine, possibly until the end of time.
+
+  char *allocated_left  = nullptr;
+  char *allocated_right = nullptr;
+
+  interconvert(&allocated_left,
+               &allocated_right,
+               &left_string,
+               &right_string,
+               &left_length,
+               &right_length,
+               &encoding_left,
+               &encoding_right);
+
+  charmap_t *charmap_left  = __gg__get_charmap(encoding_left);
+  charmap_t *charmap_right = __gg__get_charmap(encoding_right);
+
   int retval = 0;
-  size_t i = 0;
+  size_t index_left  = 0;
+  size_t index_right = 0;
 
   if( right_all && right_length > left_length )
     {
-    // In the rubber-bandy ALL situation, and the ALL is longer than the
-    // fixed side, we just compare the characters of the fixed side:
+    // If the right side is ALL, and is longer than the left side, we just
+    // compare the matching characters.
     right_length = left_length;
     }
 
   if( left_all && left_length > right_length )
     {
+    // If the left side is ALL, and is longer than the right side, we just
+    // compare the matching characters.
     left_length = right_length;
     }
 
-  while( !retval && i<left_length && i<right_length )
+  while( !retval && index_left<left_length && index_right<right_length )
     {
-    unsigned int chl = collated((unsigned char)left_string[i]);
-    unsigned int chr = collated((unsigned char)right_string[i]);
-    retval = chl - chr;
-    i += 1;
+    cbl_char_t ch_left  = charmap_left->getch(left_string, &index_left);
+    cbl_char_t ch_right = charmap_right->getch(right_string, &index_right);
+    retval = uber_compare(ch_left, ch_right);
     }
 
   // We need to space-extend the shorter value.  That's because
   // "Bob" is equal to "Bob     "
   if( !right_all )
     {
-    charmap_t *charmap = __gg__get_charmap(encoding);
-    while( !retval && i<left_length )
+    while( !retval && index_left<left_length )
       {
-      retval = collated((unsigned char)left_string[i])
-               - collated(charmap->mapped_character(ascii_space));
-      i += 1;
+      cbl_char_t ch_left  = charmap_left->getch(left_string, &index_left);
+      cbl_char_t ch_right = charmap_right->mapped_character(ascii_space);
+      retval = uber_compare(ch_left, ch_right);
       }
     }
   else
     {
     // In an ALL situation where the ALL is shorter than the fixed side, we
     // wrap around the ALL characters
-    while( !retval && i<left_length )
+    while( !retval && index_left<left_length )
       {
-      retval = collated((unsigned char)left_string[i])
-               - collated((unsigned char)right_string[i%right_length]);
-      i += 1;
+      index_right %= right_length;
+      cbl_char_t ch_left  = charmap_left->getch(left_string, &index_left);
+      cbl_char_t ch_right = charmap_right->getch(right_string, &index_right);
+      retval = uber_compare(ch_left, ch_right);
       }
     }
 
   if( !left_all )
     {
-    charmap_t *charmap = __gg__get_charmap(encoding);
-    while( !retval && i<right_length )
+    while( !retval && index_right<right_length )
       {
-      retval = collated(charmap->mapped_character(ascii_space))
-               - collated((unsigned char)right_string[i]);
-      i += 1;
+      cbl_char_t ch_left  = charmap_left->mapped_character(ascii_space);
+      cbl_char_t ch_right = charmap_right->getch(right_string, &index_right);
+      retval = uber_compare(ch_left, ch_right);
       }
     }
   else
     {
-    if( left_length > right_length )
-      {
-      left_length = right_length;
-      }
-    while( !retval && i<right_length )
+    while( !retval && index_right<right_length )
       {
-      retval = collated((unsigned char)left_string[i%left_length])
-               - collated((unsigned char)right_string[i]);
-      i += 1;
+      index_left %= left_length;
+      cbl_char_t ch_left  = charmap_left->mapped_character(ascii_space);
+      cbl_char_t ch_right = charmap_right->getch(right_string, &index_right);
+      retval = uber_compare(ch_left, ch_right);
       }
     }
+
+  free(allocated_right);
+  free(allocated_left);
   return retval;
   }
 
 extern "C"
 int
-__gg__compare_2(cblc_field_t *left_side,
-                unsigned char   *left_location,
-                size_t  left_length,
-                int     left_attr,
-                int     left_flags,
-                cblc_field_t *right_side,
-                unsigned char   *right_location,
-                size_t  right_length,
-                int     right_attr,
-                int     right_flags,
-                int     second_time_through)
+__gg__compare_2(cblc_field_t  *left_side,
+                unsigned char *left_location,
+                size_t         left_length,
+                uint64_t       left_attr,
+                int            left_flags,
+                cblc_field_t  *right_side,
+                unsigned char *right_location,
+                size_t         right_length,
+                uint64_t       right_attr,
+                int            right_flags,
+                int            second_time_through)
   {
   // First order of business:  If right_side is a FldClass, pass that off
   // to the speciality squad:
@@ -4160,16 +4534,23 @@ __gg__compare_2(cblc_field_t *left_side,
   cbl_encoding_t encoding_right = right_side->encoding;
   charmap_t *charmap_left  = __gg__get_charmap(encoding_left);
   charmap_t *charmap_right = __gg__get_charmap(encoding_right);
+  int stride = charmap_left->stride();
 
   // Figure out if we have any figurative constants
   cbl_figconst_t left_figconst  = (cbl_figconst_t)(left_attr  & FIGCONST_MASK);
   cbl_figconst_t right_figconst = (cbl_figconst_t)(right_attr & FIGCONST_MASK);
 
-  unsigned int fig_left  = 0;
-  unsigned int fig_right = 0;
+  cbl_char_t fig_left  = 0;
+  cbl_char_t fig_right = 0;
 
-  fig_left  = charmap_left->figconst_character(left_figconst);
-  fig_right = charmap_right->figconst_character(right_figconst);
+  if( left_figconst )
+    {
+    fig_left  = charmap_left->figconst_character(left_figconst);
+    }
+  if( right_figconst )
+    {
+    fig_right = charmap_right->figconst_character(right_figconst);
+    }
 
   // We have four high-level conditions to consider depending on whether
   // left and/or right are figurative constants:
@@ -4180,7 +4561,7 @@ __gg__compare_2(cblc_field_t *left_side,
   if( left_figconst && right_figconst )
     {
     // We are comparing two figurative constants
-    retval = collated(fig_left) - collated(fig_right);
+    retval = uber_compare(fig_left, fig_right);
     compare = true;
     goto fixup_retval;
     }
@@ -4211,14 +4592,15 @@ __gg__compare_2(cblc_field_t *left_side,
       case quote_value_e:
       case space_value_e:
         retval = 0;
-        for(size_t i=0; i<left_length; i++)
+        for(size_t i=0; i<left_length; i+=stride)
           {
-          // The right side is a figurative constant.  Compare the left side
-          // to the appropriate constant.
-          unsigned int fig_of_left =
+          // The right side is a figurative constant.  Compare data from the
+          // left side to the figurative constant from the right converted to
+          // the left encoding:
+          cbl_char_t fig_of_right =
                              charmap_left->figconst_character(right_figconst);
-          retval = collated((unsigned int)left_location[i])
-                   - collated(fig_of_left);
+          cbl_char_t left_ch = charmap_left->getch(left_location, i);
+          retval = uber_compare(left_ch, fig_of_right);
           if( retval )
             {
             break;
@@ -4271,12 +4653,12 @@ __gg__compare_2(cblc_field_t *left_side,
           default:
             // We are comparing a alphanumeric string to ZEROES
             retval = 0;
-            for(size_t i=0; i<left_length; i++)
+            for(size_t i=0; i<left_length; i+=stride)
               {
-              unsigned int fig_of_left =
+              unsigned int fig_of_right =
                              charmap_left->figconst_character(right_figconst);
-              retval = collated((unsigned int)left_location[i])
-                       - collated(fig_of_left);
+              cbl_char_t ch_left = charmap_left->getch(left_location, i);
+              retval = uber_compare(ch_left, fig_of_right);
               if( retval )
                 {
                 break;
@@ -4300,21 +4682,18 @@ __gg__compare_2(cblc_field_t *left_side,
     if(    local_is_alpha(left_side->type,  left_address_of)
         && local_is_alpha(right_side->type, right_address_of) )
       {
-      if( encoding_left != encoding_right )
+      if( (left_side->attr | right_side->attr) & hex_encoded_e )
         {
-        fprintf(stderr, "We don't yet know how to compare strings of different encodings\n");
-        fprintf(stderr, "Let Dubner and Lowden of cobolworx know about this\n");
-        abort();
+        encoding_left = encoding_right = iconv_CP1252_e;
         }
-
       retval = compare_strings(   reinterpret_cast<char *>(left_location),
                                   left_length,
                                   left_all,
                                   reinterpret_cast<char *>(right_location),
                                   right_length,
                                   right_all,
-                                  encoding_left
-                                  );
+                                  encoding_left,
+                                  encoding_right );
 
       compare = true;
       goto fixup_retval;
@@ -4344,39 +4723,15 @@ __gg__compare_2(cblc_field_t *left_side,
 
         if( right_side->type == FldLiteralN)
           {
+          right_value = get_float128(right_side, right_location);
           // In order to do the comparision, we need the value from the
           // literal to be the same flavor as the left side:
-          // We need to replace any commas with periods
-          static size_t size = 128;
-          static char *buffer = static_cast<char *>(malloc(size));
-          while( strlen(right_side->initial)+1 > size )
-            {
-            size *= 2;
-            buffer = static_cast<char *>(malloc(size));
-            }
-          massert(buffer);
-          strcpy(buffer, right_side->initial);
-
-          if( __gg__decimal_point == ',' )
-            {
-            // We are operating in DECIMAL IS COMMA mode, so we need to
-            // replace any commas with periods.
-            char *p = strchr(buffer, ',');
-            if(p)
-              {
-              *p = '.';
-              }
-            }
-
-          // buffer[] now contains the right-side string we want to convert
-          // to one of the floating-point types.  We want them to be the
-          // same size:
           switch(left_side->capacity)
             {
             case 4:
               {
               _Float32 left_value4  = *PTRCAST(_Float32, left_location);
-              _Float32 right_value4 = strtof(buffer, NULL);
+              _Float32 right_value4 = (_Float32)right_value;
               retval = 0;
               retval = left_value4 < right_value4 ? -1 : retval;
               retval = left_value4 > right_value4 ?  1 : retval;
@@ -4385,7 +4740,7 @@ __gg__compare_2(cblc_field_t *left_side,
             case 8:
               {
               _Float64 left_value8  = *PTRCAST(_Float64, left_location);
-              _Float64 right_value8 = strtod(buffer, NULL);
+              _Float64 right_value8 = (_Float64)right_value;
               retval = 0;
               retval = left_value8 < right_value8 ? -1 : retval;
               retval = left_value8 > right_value8 ?  1 : retval;
@@ -4396,7 +4751,7 @@ __gg__compare_2(cblc_field_t *left_side,
               //_Float128 left_value  = *(_Float128 *)left_location;
               GCOB_FP128 left_value16;
               memcpy(&left_value16, left_location, 16);
-              GCOB_FP128 right_value16 = strtofp128(buffer, NULL);
+              GCOB_FP128 right_value16 = right_value;
               retval = 0;
               retval = left_value16 < right_value16 ? -1 : retval;
               retval = left_value16 > right_value16 ?  1 : retval;
@@ -4484,13 +4839,18 @@ __gg__compare_2(cblc_field_t *left_side,
 
       if( right_refmod )
         {
+        if( (left_side->attr | right_side->attr) & hex_encoded_e )
+          {
+          encoding_left = encoding_right = iconv_CP1252_e;
+          }
         retval = compare_strings(   reinterpret_cast<char *>(left_location),
                                     left_length,
                                     left_all,
                                     reinterpret_cast<char *>(right_location),
                                     right_length,
                                     right_all,
-                                    left_side->encoding);
+                                    left_side->encoding,
+                                    right_side->encoding );
         compare = true;
         goto fixup_retval;
         }
@@ -4521,7 +4881,7 @@ __gg__compare_2(cblc_field_t *left_side,
                                                          right_location,
                                                          right_length,
                                                          0);
-
+      size_t right_string_length = strlen(right_string);
       if( encoding_formatted != encoding_left )
         {
         // The encodings are not the same.  We need to convert the right_string
@@ -4530,9 +4890,10 @@ __gg__compare_2(cblc_field_t *left_side,
         const char *converted = __gg__iconverter(encoding_formatted,
                                                  encoding_left,
                                                  right_string,
-                                                 strlen(right_string),
+                                                 right_string_length,
                                                  &outsize);
         memcpy(right_string, converted, outsize);
+        right_string_length = outsize;
         }
 
       // There is a tricky aspect to comparing an alphanumeric to
@@ -4543,30 +4904,38 @@ __gg__compare_2(cblc_field_t *left_side,
       // considered a "pseudo-move", and the rule for moving a negative
       // number to an alphanumeric is that negative signs get stripped off
 
-      if(    *left_location == charmap_left->mapped_character(ascii_plus)
-          || *left_location == charmap_left->mapped_character(ascii_minus)  )
+      cbl_char_t left_ch = charmap_left->getch(left_location, size_t(0));
+      if(    left_ch == charmap_left->mapped_character(ascii_plus)
+          || left_ch == charmap_left->mapped_character(ascii_minus)  )
         {
-        left_location += 1;
-        left_length -= 1;
+        left_location += charmap_left->stride();
+        left_length -= charmap_left->stride();
         }
 
-      const char *right_fixed;
-      if(  *right_string == charmap_right->mapped_character(ascii_plus)
-        || *right_string == charmap_right->mapped_character(ascii_minus)  )
+      char *right_fixed;
+      cbl_char_t right_ch = charmap_right->getch(right_string, size_t(0));
+      if(  right_ch == charmap_right->mapped_character(ascii_plus)
+        || right_ch == charmap_right->mapped_character(ascii_minus)  )
         {
-        right_fixed = right_string + 1;
+        right_fixed = right_string + charmap_right->stride();
+        right_string_length -= charmap_right->stride();
         }
       else
         {
         right_fixed = right_string;
         }
 
+      if( (left_side->attr | right_side->attr) & hex_encoded_e )
+        {
+        encoding_left = encoding_right = iconv_CP1252_e;
+        }
       retval = compare_strings(   reinterpret_cast<char *>(left_location),
                                   left_length,
                                   left_all,
                                   right_fixed,
-                                  strlen(right_fixed),
+                                  right_string_length,
                                   right_all,
+                                  encoding_left,
                                   encoding_left);
       compare = true;
       goto fixup_retval;
@@ -4704,7 +5073,8 @@ struct for_sort_table
 static for_sort_table sorter;
 
 static int
-compare_two_records(unsigned char *range1, unsigned char *range2)
+compare_two_records(unsigned char *range1,
+                    unsigned char *range2)
   {
   int retval = 0;
 
@@ -4724,6 +5094,7 @@ compare_two_records(unsigned char *range1, unsigned char *range2)
     field2.data = range2
                   + field2.offset
                   - sorter.base;
+    field1.encoding = field2.encoding = encoding_for_sort;
 
     // We handle descending by swapping the data sources:
     if( !sorter.ascending[i] )
@@ -4826,6 +5197,8 @@ __gg__sort_table( const cblc_field_t    *table,
     next_record += record_size;
     }
 
+  encoding_for_sort = table->encoding;
+
   // Sort it
   sort_contents(contents,
                 offsets,
@@ -4876,8 +5249,6 @@ init_var_both(cblc_field_t  *var,
               unsigned char *qual_data,
               int            flag_bits)
   {
-  //fprintf(stderr, "CALLED WITH %s 0x%x\n", var->name, flag_bits);
-
   if( flag_bits & JUST_ONCE_BIT && var->attr & initialized_e )
     {
     return;
@@ -4894,8 +5265,6 @@ init_var_both(cblc_field_t  *var,
       && var->type != FldLiteralA
       && var->type != FldLiteralN )
     {
-    //fprintf(stderr, "ABORTING on %2.2d %s %d\n", var->level, var->name, var->type);
-    //abort();
     var->data = static_cast<unsigned char *>(malloc(var->capacity));
     }
 
@@ -4915,86 +5284,7 @@ init_var_both(cblc_field_t  *var,
     initialize_program_state();
     }
 
-  char *local_initial = as_initial(var->initial);
-
-  if( var->level == LEVEL88 )
-    {
-    // We need to convert the options to the var->encoding
-
-    size_t buffer_size = 4;
-    char *buffer = static_cast<char *>(malloc(buffer_size));
-
-    size_t index = 0;
-
-    const cblc_field_t *parent = var->parent;
-    switch(parent->type)
-      {
-      case FldGroup:
-      case FldAlphanumeric:
-        {
-        char *walker = local_initial;
-        while(*walker)
-          {
-          static size_t first_size = MINIMUM_ALLOCATION_SIZE;
-          static char *first = static_cast<char *>(malloc(first_size));
-          static size_t last_size = MINIMUM_ALLOCATION_SIZE;
-          static char *last = static_cast<char *>(malloc(last_size));
-          if( strlen(walker)+1 > first_size )
-            {
-            first_size = strlen(walker)+1;
-            first = static_cast<char *>(realloc(first, first_size));
-            }
-          if( (*walker & 0xFF) == 0xFF )
-            {
-            // I don't recall what 0xFF means, and I neglected to comment it
-            // the first time through.  Probably means numerical values follow.
-            strcpy(first, walker);
-            }
-          else
-            {
-            strcpy(first, walker);
-            __gg__convert_encoding( first,
-                                    DEFAULT_SOURCE_ENCODING,
-                                    var->encoding);
-            }
-          walker += strlen(first) + 1;
-
-          if( strlen(walker)+1 > last_size )
-            {
-            last_size = strlen(walker)+1;
-            last = static_cast<char *>(realloc(last, last_size));
-            }
-          if( (*walker & 0xFF) == 0xFF )
-            {
-            strcpy(last, walker);
-            }
-          else
-            {
-           __gg__convert_encoding( last,
-                                   DEFAULT_SOURCE_ENCODING,
-                                   var->encoding);
-            }
-          walker += strlen(last) + 1;
-          while(index + strlen(first) + strlen(last) + 3 > buffer_size)
-            {
-            buffer_size *= 2;
-            buffer = static_cast<char *>(realloc(buffer, buffer_size));
-            }
-          strcpy(buffer+index, first);
-          index += strlen(first) + 1;
-          strcpy(buffer+index, last);
-          index += strlen(last) + 1;
-          }
-        buffer[index++] = 0;
-        break;
-        }
-      }
-    if( index > 0 )
-      {
-      buffer = static_cast<char *>(realloc(buffer, index));
-      local_initial = buffer;
-      }
-    }
+  const char *local_initial = as_initial(var->initial);
 
   // Next order of business: When the variable was allocated in
   // parser_symbol_add(), only LEVEL 01 variables had memory allocated.  All
@@ -5131,31 +5421,46 @@ init_var_both(cblc_field_t  *var,
     switch( var->type )
       {
       case FldGroup:
+        {
+        if( var->initial )
+          {
+          memcpy(outer_location, var->initial, var->capacity);
+          }
+        break;
+        }
+
       case FldAlphanumeric:
       case FldAlphaEdited:
       case FldNumericEdited:
       case FldLiteralA:
         {
-        // Any initialization values were converted to single-byte-coding in the
-        // right codeset during parser_symbol_add()
         if( var->initial )
           {
           memcpy(outer_location, var->initial, var->capacity);
           }
         else
           {
+          charmap_t *charmap = __gg__get_charmap(var->encoding);
           if( !defaultbyte_in_play )
             {
-            charmap_t *charmap = __gg__get_charmap(var->encoding);
-            memset(  outer_location,
-                     charmap->mapped_character(ascii_space),
-                     capacity );
+            cbl_char_t initialization_character = ascii_space;
+            if( var->attr & linkage_e && __gg__local_init != NOT_A_CHARACTER )
+              {
+              initialization_character = __gg__local_init;
+              }
+            if( !(var->attr & linkage_e) && __gg__working_init != NOT_A_CHARACTER )
+              {
+              initialization_character = __gg__working_init;
+              }
+            charmap->memset(outer_location,
+                            charmap->mapped_character(initialization_character),
+                            capacity );
             }
           else
             {
-            memset(  outer_location,
-                     defaultbyte,
-                     capacity );
+            charmap->memset(outer_location,
+                            defaultbyte,
+                            capacity );
             }
           }
         break;
@@ -5171,30 +5476,36 @@ init_var_both(cblc_field_t  *var,
           }
         else
           {
+          cbl_char_t init_zero = ascii_zero;
+          cbl_char_t init_plus = ascii_plus;
+
+          charmap_t *charmap = __gg__get_charmap(var->encoding);
           if( !defaultbyte_in_play )
             {
-            charmap_t *charmap = __gg__get_charmap(var->encoding);
-            memset(  outer_location,
-                     charmap->mapped_character(ascii_zero),
-                     capacity );
+            charmap->memset( outer_location,
+                             charmap->mapped_character(init_zero),
+                             capacity );
             if( (var->attr & signable_e) && (var->attr & separate_e) )
               {
               if( var->attr & leading_e )
                 {
-                outer_location[0] = charmap->mapped_character(ascii_plus);
+                charmap->putch(charmap->mapped_character(init_plus),
+                               outer_location,
+                               size_t(0));
                 }
               else
                 {
-                outer_location[var->capacity-1] =
-                                        charmap->mapped_character(ascii_plus);
+                charmap->putch(charmap->mapped_character(init_plus),
+                               outer_location,
+                               var->capacity-charmap->stride());
                 }
               }
             }
           else
             {
-            memset(  outer_location,
-                     defaultbyte,
-                     capacity );
+            charmap->memset(outer_location,
+                            defaultbyte,
+                            capacity );
             }
           }
         break;
@@ -5301,11 +5612,6 @@ extern "C"
 void
 __gg__initialize_variable_clean(cblc_field_t *var, int flag_bits)
   {
-//  if( var->type == FldLiteralA )
-//    {
-//    fprintf(stderr, "BAZINGA!\n");
-//    }
-
   init_var_both(  var,
                   var->data,
                   flag_bits);
@@ -5380,7 +5686,9 @@ alpha_to_alpha_move_from_location(cblc_field_t *field,
         memmove(to + (dest_length-count),
                 from,
                 count);
-        memset(to, charmap->mapped_character(ascii_space), dest_length-count);
+        charmap->memset(to,
+                        charmap->mapped_character(ascii_space),
+                        dest_length-count);
         }
       }
     else
@@ -5412,14 +5720,28 @@ alpha_to_alpha_move_from_location(cblc_field_t *field,
         memmove(to,
                 from,
                 count);
-        memset( to + count,
-                charmap->mapped_character(ascii_space),
-                dest_length-count);
+        charmap->memset(to + count,
+                        charmap->mapped_character(ascii_space),
+                        dest_length-count);
         }
       }
     }
   }
 
+extern "C"
+void *
+__gg__memdup(const void *p, size_t size)
+  {
+  void *retval = nullptr;
+  if(size)
+    {
+    retval = malloc(size);
+    massert(retval);
+    memcpy(retval, p, size);
+    }
+  return retval;
+  }
+
 static void
 alpha_to_alpha_move(cblc_field_t *dest,
                     size_t dest_offset,
@@ -5434,44 +5756,27 @@ alpha_to_alpha_move(cblc_field_t *dest,
   size_t outlength;
   if(dest->encoding == source->encoding)
     {
+    // we don't need to bother calling __gg__iconverter
     outlength = source_size;
     }
   else
     {
     // Before calling the mover, we need to convert the source to the
     // destination encoding:
-    static size_t bufsize = 0;
-    static char *buffer = nullptr;
-
-    // Supposing we might someday want to convert to UCS-4, then we need four
-    // output bytes for each input SBC character.  This is a dumb way of
-    // thinking about it.  By rights we should be calculating the worst case
-    // dest size as (source_size / min_bytes_per_source_position) times the
-    // max_bytes_per_dest_position.
-
-    // But that's work for another day.  This is harmless, if perhaps a bit
-    // wasteful of memory.
-
-    size_t needed = 4 * source_size;
-    if( needed > bufsize )
-      {
-      bufsize = needed;
-      buffer = static_cast<char *>(realloc(buffer, bufsize));
-      massert(buffer);
-      }
-
     source_location = __gg__iconverter( source->encoding,
                                         dest->encoding,
                                         source_location,
                                         source_size,
                                         &outlength);
     }
+  char *duped = static_cast<char *>(__gg__memdup(source_location, outlength));
   alpha_to_alpha_move_from_location(dest,
                                     dest_offset,
                                     dest_size,
-                                    source_location,
+                                    duped,
                                     outlength,
                                     source_move_all);
+  free(duped);
   }
 
 extern "C"
@@ -5506,8 +5811,32 @@ __gg__move( cblc_field_t        *fdest,
   __int128 value;
   int rdigits;
 
+  charmap_t *charmap = __gg__get_charmap(fdest->encoding);
+  int stride = charmap->stride();
   cbl_figconst_t source_figconst =
                         (cbl_figconst_t)(fsource->attr & FIGCONST_MASK);
+  int special_char = 0; // quiets cppcheck
+  if( source_figconst == low_value_e )
+    {
+    special_char = charmap->low_value_character();
+    }
+  else if( source_figconst == high_value_e )
+    {
+    special_char = charmap->high_value_character();
+    }
+  else if( source_figconst == quote_value_e )
+    {
+    special_char = charmap->quote_character();
+    }
+  else if( source_figconst == space_value_e )
+    {
+    special_char = charmap->mapped_character(ascii_space);
+    }
+  else if( source_figconst == zero_value_e )
+    {
+    special_char = charmap->mapped_character(ascii_zero);
+    }
+
   cbl_field_type_t dest_type   = (cbl_field_type_t)fdest->type;
   cbl_field_type_t source_type = (cbl_field_type_t)fsource->type;
 
@@ -5533,8 +5862,6 @@ __gg__move( cblc_field_t        *fdest,
         ||  fdest->type == FldFloat )
         )
     {
-    charmap_t *charmap_dest = __gg__get_charmap(fdest->encoding);
-
     // Regardless of what you see below, as time went on it became clear that
     // high-value and low-value required special processing in order to cope
     // with code.  Or, at least, to cope with legacy tests.
@@ -5550,27 +5877,9 @@ __gg__move( cblc_field_t        *fdest,
      *  other than QUOTE or QUOTES, to a numeric item is an archaic feature of
      *  standard COBOL and its use should be avoided
      */
-
-    int special_char = 0; // quiets cppcheck
-    if( source_figconst == low_value_e )
-      {
-      special_char = charmap_dest->low_value_character();
-      }
-    else if( source_figconst == high_value_e )
-      {
-      special_char = charmap_dest->high_value_character();
-      }
-    else if( source_figconst == quote_value_e )
-      {
-      special_char = charmap_dest->quote_character();
-      }
-    else if( source_figconst == space_value_e )
-      {
-      special_char = charmap_dest->mapped_character(ascii_space);
-      }
-    memset( fdest->data + dest_offset,
-            special_char,
-            dest_size);
+    charmap->memset( fdest->data + dest_offset,
+                          special_char,
+                          dest_size);
     }
   else
     {
@@ -5590,6 +5899,7 @@ __gg__move( cblc_field_t        *fdest,
           case FldPacked:
           case FldNumericBin5:
           case FldGroup:
+          case FldLiteralA:
             {
             // This is a little bold, but non-alphabetics will never
             // have the rjust_e or MOVE_ALL bits on, so it's safe
@@ -5629,14 +5939,24 @@ __gg__move( cblc_field_t        *fdest,
           case FldAlphanumeric:
           case FldNumericEdited:
           case FldAlphaEdited:
+          case FldLiteralA:
             // This is an ordinary alpha-to-alpha move:
-            alpha_to_alpha_move(fdest,
-                                dest_offset,
-                                dest_size,
-                                fsource,
-                                source_offset,
-                                source_size,
-                                !!(source_flags & REFER_T_MOVE_ALL));
+            if( source_figconst )
+              {
+              charmap->memset( fdest->data + dest_offset,
+                                    special_char,
+                                    dest_size);
+              }
+            else
+              {
+              alpha_to_alpha_move(fdest,
+                                  dest_offset,
+                                  dest_size,
+                                  fsource,
+                                  source_offset,
+                                  source_size,
+                                  !!(source_flags & REFER_T_MOVE_ALL));
+              }
             break;
 
           case FldNumericDisplay:
@@ -5659,12 +5979,9 @@ __gg__move( cblc_field_t        *fdest,
               // alphanumeric.  We ignore any sign bit, and just
               // move the characters:
 
-              charmap_t *charmap = __gg__get_charmap(fdest->encoding);
-
               size_t source_digits
-                = fsource->digits
-                  + ( fsource->rdigits < 0
-                      ? -fsource->rdigits : 0) ;
+                = fsource->digits + ( fsource->rdigits < 0
+                                      ? -fsource->rdigits : 0) ;
 
               // Pick up the absolute value of the source
               value = __gg__binary_value_from_qualified_field(&rdigits,
@@ -5683,7 +6000,7 @@ __gg__move( cblc_field_t        *fdest,
 
               if( !(fdest->attr & rjust_e) )
                 {
-                min_length = std::min(  source_digits,
+                min_length = std::min(  source_digits*stride,
                                         dest_size);
                 memmove(fdest->data + dest_offset, ach, min_length);
                 if( min_length < dest_size )
@@ -5691,22 +6008,22 @@ __gg__move( cblc_field_t        *fdest,
                   // min_length is smaller than dest_length, so we
                   // have to space-fill the excess bytes in the
                   // destination:
-                  memset( fdest->data + dest_offset + min_length,
-                          charmap->mapped_character(ascii_space),
-                          dest_size - min_length );
+                  charmap->memset(fdest->data + dest_offset + min_length ,
+                                  charmap->mapped_character(ascii_space),
+                                  dest_size - min_length );
                   }
                 }
               else
                 {
                 // Destination is right-justified, so things are
                 // slightly more complex
-                if( source_digits >= dest_size )
+                if( source_digits*stride >= dest_size )
                   {
                   // We need to truncate the source data on the
                   // left:
                   memmove(
                     fdest->data + dest_offset,
-                    ach + (source_digits - dest_size),
+                    ach + (source_digits*stride - dest_size),
                     dest_size );
                   }
                 else
@@ -5714,12 +6031,13 @@ __gg__move( cblc_field_t        *fdest,
                   // We need to move the shorty source string to
                   // the right side of the destination, and space-fill
                   //  the prefix:
-                  memmove(fdest->data + dest_offset + (dest_size - source_digits),
+                  memmove(fdest->data
+                            + dest_offset + (dest_size - source_digits*stride),
                           ach,
-                          source_digits );
-                  memset( fdest->data + dest_offset,
-                          charmap->mapped_character(ascii_space),
-                          dest_size - source_digits);
+                          source_digits*stride );
+                  charmap->memset( fdest->data + dest_offset,
+                                  charmap->mapped_character(ascii_space),
+                                  dest_size - source_digits*stride);
                   }
                 }
               }
@@ -5741,7 +6059,6 @@ __gg__move( cblc_field_t        *fdest,
               }
             else
               {
-              charmap_t *charmap_dest = __gg__get_charmap(fdest->encoding);
               char ach[128];
 
               // Turn the integer source into a value:
@@ -5770,33 +6087,34 @@ __gg__move( cblc_field_t        *fdest,
 
               if( fsource->attr & intermediate_e )
                 {
-                charmap_t *charmap_src = __gg__get_charmap(fsource->encoding);
                 while(source_size > 1)  // This ensures we leave one '0'
                   {
-                  if( *(pach+1) == '\0' )
+                  cbl_char_t ch = charmap->getch(pach, stride);
+                  if( ch == '\0' )
                     {
                     break;
                     }
-                  if( ((*pach)&0xFF) != charmap_src->mapped_character(ascii_0))
+                  ch = charmap->getch(pach, size_t(0));
+                  if( ch != charmap->mapped_character(ascii_0))
                     {
                     break;
                     }
-                  pach += 1;
+                  pach += stride;
                   source_size -= 1;
                   }
                 }
 
               if( !(fdest->attr & rjust_e) )
                 {
-                min_length = std::min(  source_size,
+                min_length = std::min(  source_size*stride,
                                         dest_size);
                 memmove(fdest->data+dest_offset, pach, min_length);
                 if( min_length < dest_size )
                   {
                   // min_length is smaller than dest_length, so we have to
                   // space-fill the excess bytes in the destination:
-                  memset( fdest->data+dest_offset + min_length,
-                          charmap_dest->mapped_character(ascii_space),
+                  charmap->memset( fdest->data+dest_offset + min_length,
+                          charmap->mapped_character(ascii_space),
                           dest_size - min_length );
                   }
                 }
@@ -5815,12 +6133,13 @@ __gg__move( cblc_field_t        *fdest,
                   {
                   // We need to move the shorty source string to the
                   // right side of the destination, and space-fill the prefix:
-                  memmove(fdest->data+dest_offset + (dest_size - source_size),
+                  memmove(fdest->data+dest_offset +
+                              (dest_size - source_size*stride),
                           pach,
-                          source_size );
-                  memset(fdest->data+dest_offset,
-                         charmap_dest->mapped_character(ascii_space),
-                         (dest_size - source_size));
+                          source_size*stride );
+                  charmap->memset(fdest->data+dest_offset,
+                         charmap->mapped_character(ascii_space),
+                         (dest_size - source_size*stride));
                   }
                 }
               }
@@ -5828,7 +6147,6 @@ __gg__move( cblc_field_t        *fdest,
 
           case FldIndex:
             {
-            charmap_t *charmap_dest = __gg__get_charmap(fdest->encoding);
             char ach[128];
 
             // Turn the integer source into a value:
@@ -5836,8 +6154,11 @@ __gg__move( cblc_field_t        *fdest,
                                                             fsource,
                                                             source_offset,
                                                             source_size);
-            sprintf(ach, "%lu", (unsigned long)value);
-
+            // Turn the integer value into a string:
+            __gg__binary_to_string_encoded(ach,
+                                           source_size,
+                                           value,
+                                           fdest->encoding);
             char *pach = ach;
 
             if( !(fdest->attr & rjust_e) )
@@ -5849,8 +6170,8 @@ __gg__move( cblc_field_t        *fdest,
                 {
                 // min_length is smaller than dest_length, so we have to
                 // space-fill the excess bytes in the destination:
-                memset( fdest->data+dest_offset + min_length,
-                        charmap_dest->mapped_character(ascii_space),
+                charmap->memset( fdest->data+dest_offset + min_length,
+                        charmap->mapped_character(ascii_space),
                         dest_size - min_length );
                 }
               }
@@ -5872,7 +6193,7 @@ __gg__move( cblc_field_t        *fdest,
                         pach,
                         source_size );
                 memset(fdest->data+dest_offset,
-                       charmap_dest->mapped_character(ascii_space),
+                       charmap->mapped_character(ascii_space),
                        (dest_size - source_size));
                 }
               }
@@ -5888,7 +6209,6 @@ __gg__move( cblc_field_t        *fdest,
 
       case FldNumericBinary:
         {
-        charmap_t *charmap_dest = __gg__get_charmap(fdest->encoding);
         switch( source_type )
           {
           case FldGroup:
@@ -5899,7 +6219,7 @@ __gg__move( cblc_field_t        *fdest,
               // min_length is smaller than dest_length, so we have to
               // space-fill the excess bytes in the destination:
               memset( fdest->data+dest_offset + min_length,
-                      charmap_dest->mapped_character(ascii_space),
+                      charmap->mapped_character(ascii_space),
                       dest_size - min_length );
               }
             fdest->attr &= ~FIGCONST_MASK;
@@ -6037,7 +6357,6 @@ __gg__move( cblc_field_t        *fdest,
         {
         // Bin5 and Index are treated with no truncation, as if they were
         // trunc_bin_e.  The other types aren't subject to truncation.
-        charmap_t *charmap_dest = __gg__get_charmap(fdest->encoding);
         switch( source_type )
           {
           case FldGroup:
@@ -6048,7 +6367,7 @@ __gg__move( cblc_field_t        *fdest,
               // min_length is smaller than dest_length, so we have to
               // space-fill the excess bytes in the destination:
               memset( fdest->data+dest_offset + min_length,
-                      charmap_dest->mapped_character(ascii_space),
+                      charmap->mapped_character(ascii_space),
                       dest_size - min_length );
               }
             break;
@@ -6121,7 +6440,6 @@ __gg__move( cblc_field_t        *fdest,
 
       case FldAlphaEdited:
         {
-        charmap_t *charmap_dest = __gg__get_charmap(fdest->encoding);
         switch( source_type )
           {
           case FldGroup:
@@ -6132,7 +6450,7 @@ __gg__move( cblc_field_t        *fdest,
               // min_length is smaller than dest_length, so we have to
               // space-fill the excess bytes in the destination:
               memset( fdest->data+dest_offset + min_length,
-                      charmap_dest->mapped_character(ascii_space),
+                      charmap->mapped_character(ascii_space),
                       dest_size - min_length );
               }
             break;
@@ -6176,7 +6494,7 @@ __gg__move( cblc_field_t        *fdest,
                                         display_string_length);
 
             int fc_char = __gg__fc_char(fsource);
-            if( fc_char > -1 )
+            if( fc_char != NOT_A_CHARACTER )
               {
               memset(display_string, fc_char, dest_size);
               __gg__convert_encoding_length(display_string,
@@ -6213,6 +6531,7 @@ __gg__move( cblc_field_t        *fdest,
         switch( source_type )
           {
           case FldAlphanumeric:
+          case FldGroup:
             {
             // Converting alphanumeric to float means first converting to
             // ascii:
@@ -6539,6 +6858,8 @@ __gg__sort_workfile(cblc_file_t    *workfile,
                     size_t         *ascending,
                     int             duplicates)
   {
+  encoding_for_sort = workfile->encoding;
+
   // We are going to read the records of workfile into memory.  We keep offsets
   // into the memory buffer, and then we'll sort those offsets according to the
   // things they point to.
@@ -6553,6 +6874,8 @@ __gg__sort_workfile(cblc_file_t    *workfile,
   size_t bytes_read;
   size_t bytes_to_write;
 
+  const charmap_t *charmap = __gg__get_charmap(workfile->encoding);
+
   sv_suppress_eof_ec = true;
   for(;;)
     {
@@ -6561,9 +6884,10 @@ __gg__sort_workfile(cblc_file_t    *workfile,
     if( workfile->record_length )
       {
       int rdigits;
-      bytes_read = (size_t) __gg__binary_value_from_field(
-                                              &rdigits,
-                                              workfile->record_length);
+      // The record length is reported in character positions:
+      bytes_read = charmap->stride() * (size_t) __gg__binary_value_from_field(
+                                                  &rdigits,
+                                                  workfile->record_length);
       }
     else
       {
@@ -6616,8 +6940,10 @@ __gg__sort_workfile(cblc_file_t    *workfile,
     if(    workfile->record_area_min != workfile->record_area_max
         && workfile->record_length )
       {
+      // Set the number of bytes to write, remembering that record_length is
+      // in characters, not bytes:
       __gg__int128_to_field(workfile->record_length,
-                            bytes_to_write,
+                            bytes_to_write/charmap->stride(),
                             0,
                             truncation_e,
                             NULL);
@@ -6650,6 +6976,8 @@ __gg__merge_files( cblc_file_t   *workfile,
 
   // Then we will begin.
 
+  encoding_for_sort = workfile->encoding;
+
   sorter.nkeys     = nkeys;
   sorter.keys      = keys;
   sorter.ascending = ascending;
@@ -6720,7 +7048,6 @@ __gg__merge_files( cblc_file_t   *workfile,
         {
         // The prior winner is bigger than the current winner, which means that
         // the input files were not in order.  This is a run-time error.
-
         exception_raise(ec_sort_merge_sequence_e);
         abort();
         }
@@ -6759,6 +7086,9 @@ __gg__merge_files( cblc_file_t   *workfile,
   free(prior_winner);
   }
 
+typedef std::vector<cbl_char_t>::const_iterator char_it_c ;
+typedef std::vector<cbl_char_t>::iterator       char_it   ;
+
 static const char *
 funky_find( const char *piece,
             const char *piece_end,
@@ -6787,6 +7117,42 @@ funky_find( const char *piece,
   return retval;
   }
 
+static char_it_c
+funky_find_wide( char_it_c needle,
+                 char_it_c needle_end,    // Actually end+1
+                 char_it_c haystack,
+                 char_it_c haystack_end,  // Actually end+1
+                 char_it_c notfound)
+  {
+  // We are looking for the needle in the haystack
+
+  char_it_c retval = notfound;
+
+  size_t length_of_piece = needle_end - needle;
+  if(length_of_piece == 0)
+    {
+    __gg__abort("funky_find_wide() length_of_piece shouldn't be zero");
+    }
+
+  haystack_end -= length_of_piece;
+
+  while( haystack <= haystack_end )
+    {
+    // Compare the memory at needle to the memory at haystack
+    if( memcmp( &(*needle),
+                &(*haystack),
+                length_of_piece*sizeof(cbl_char_t)) == 0 )
+      {
+      // They are the same; return where needle was found
+      retval = haystack;
+      break;
+      }
+    // Not found; move to the next location in the haystach
+    haystack += 1;
+    }
+  return retval;
+  }
+
 static const char *
 funky_find_backward(const char *piece,
                     const char *piece_end,
@@ -6815,15 +7181,58 @@ funky_find_backward(const char *piece,
   return retval;
   }
 
+static char_it_c
+funky_find_wide_backward( char_it_c needle,
+                 char_it_c needle_end,    // Actually end+1
+                 char_it_c haystack,
+                 char_it_c haystack_end,  // Actually end+1
+                 char_it_c notfound)
+  {
+  // We are looking for the needle in the haystack
+
+  char_it_c retval = notfound;
+
+  size_t length_of_piece = needle_end - needle;
+  if(length_of_piece == 0)
+    {
+    __gg__abort("funky_find_wide_backward() length_of_piece shouldn't be zero");
+    }
+
+  haystack_end -= length_of_piece;
+
+  while( haystack <= haystack_end )
+    {
+    if( memcmp( &(*needle),
+                &(*haystack_end),
+                length_of_piece*sizeof(cbl_char_t)) == 0 )
+      {
+      // They are the same; return where needle was found
+      retval = haystack_end;
+      break;
+      }
+    // Not found; move to the next location in the haystack
+    haystack_end -= 1;
+    }
+  return retval;
+  }
+
 typedef struct normalized_operand
   {
   // These are the characters of the string.  When the field is NumericDisplay
   // any leading or trailing +/- characters are removed, and any embedded
   // minus bits are removed.
+
+  // In order for INSPECT to handle things like UTF-8, which often has
+  // multi-byte codepoints, and UTF-16, which sometimes has multi-pair
+  // codepoints we are going to convert everything to UTF-32 for internal
+  // calculations and searches.
   std::string the_characters;
-  size_t offset;  // Usually zero.  One when there is a leading sign.
+  std::vector<cbl_char_t>the_vectorxxxx;
+
+  // offset and length are maintained in characters, not bytes
+  size_t offset;  // Usually zero.  Increased by one for leading separate sign.
   size_t length;  // Usually the same as the original.  But it is one less
-  //              // than the original when there is a trailing sign.
+  //              // than the original when there is a trailing separate sign.
   } normalized_operand;
 
 typedef struct comparand
@@ -6834,6 +7243,8 @@ typedef struct comparand
   normalized_operand identifier_5; // The replacement, for FORMAT 2
   const char *alpha; // The start location within normalized_id_1
   const char *omega; // The end+1 location within normalized_id_1
+  char_it_c     alpha_it;   // The start location within normalized_id_1
+  char_it_c     omega_it;   // The end+1 location within normalized_id_1
   size_t leading_count;
   bool leading;
   bool first;
@@ -6857,6 +7268,9 @@ normalize_id( const cblc_field_t *field,
 
   if( field )
     {
+    charmap_t *charmap = __gg__get_charmap(encoding);
+
+    // This is the old-style byte-based assumption
     const unsigned char *data = field->data + field_o;
     cbl_figconst_t figconst
       = (cbl_figconst_t)(field->attr & FIGCONST_MASK);
@@ -6879,11 +7293,11 @@ normalize_id( const cblc_field_t *field,
           retval.offset = 1;
           }
         }
-      for( size_t i=retval.offset; i<retval.length; i++ )
+      for( size_t i=retval.offset; i<retval.length; i+=1 )
         {
-        charmap_t *charmap = __gg__get_charmap(field->encoding);
         // Because we are dealing with a NumericDisplay that might have
-        // the minus bit turned on, we need to mask it off
+        // the minus bit turned on, we will to mask it off as we copy the
+        // input characters over to retval:
         retval.the_characters += charmap->set_digit_negative(data[i], false);
         }
       }
@@ -6892,16 +7306,15 @@ normalize_id( const cblc_field_t *field,
       // We are set up to create the_characters;
       if( figconst == normal_value_e )
         {
-        for( size_t i=retval.offset; i<retval.length; i++ )
+        for( size_t i=retval.offset; i<retval.length; i+=1 )
           {
           retval.the_characters += data[i];
           }
         }
       else
         {
-        charmap_t *charmap = __gg__get_charmap(encoding);
         char ch =  charmap->figconst_character(figconst);
-        for( size_t i=retval.offset; i<retval.length; i++ )
+        for( size_t i=retval.offset; i<retval.length; i+=1 )
           {
           retval.the_characters += ch;
           }
@@ -6914,6 +7327,112 @@ normalize_id( const cblc_field_t *field,
     retval.offset = 0;
     retval.length = 0;
     }
+
+  if( field )
+    {
+    cbl_encoding_t source_encoding = field->encoding;
+    const charmap_t *charmap_source = __gg__get_charmap(source_encoding);
+    charmap_t *charmap = __gg__get_charmap(encoding);
+    int stride = charmap->stride();
+
+    const unsigned char *data = field->data + field_o;
+    cbl_figconst_t figconst = (cbl_figconst_t)(field->attr & FIGCONST_MASK);
+    if( figconst == normal_value_e )
+      {
+      retval.offset = 0;
+      retval.length = field_s / stride;
+
+      if( field->type == FldNumericDisplay )
+        {
+        // The value is NumericDisplay, so we might need to adjust the offset
+        // and length:
+        if( field->attr & separate_e )
+          {
+          // Because the sign is a separate plus or minus, the length
+          // gets reduced by one:
+          retval.length = field_s - 1;
+          if( field->attr & leading_e )
+            {
+            // Because the sign character is LEADING, we increase the
+            // offset by one
+            retval.offset = 1;
+            }
+          }
+        }
+      // We are ready to convert from the input to UTF32
+      size_t converted_characters;
+      const char *converted = __gg__iconverter(source_encoding,
+                                               DEFAULT_32_ENCODING,
+                                               data+retval.offset * stride,
+                                               retval.length * stride,
+                                               &converted_characters);
+      // We are ready to copy the characters over:
+      for( size_t i=0; i<converted_characters; i+=width_of_utf32 )
+        {
+        // Because we are dealing with a NumericDisplay that might have
+        // the minus bit turned on, we will to mask it off as we copy the
+        // input characters over to retval:
+        cbl_char_t ch = charmap->getch(converted, i);
+        if( field->type == FldNumericDisplay )
+          {
+          if( charmap_source->is_like_ebcdic() )
+            {
+            // In EBCDIC, a flagged negative digit 0xF0 through 0xF9 becomes
+            // 0xD0 through 0xD9.  Those represent the characters
+            // "}JKLMNOPQR", which, now that we are in UTF32 space, don't have
+            // the right bit pattern to be fixed with set_digit_negative().
+            // So, we fix it separately with this table:  Note that location
+            // 0x7D, which is ASCII '{', becomes 0x30 '0'.  See also that
+            // locations 0x4A through 0x52 become 0x31 through 0x39.
+            static const uint8_t fixit[256] =
+              {
+              0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, 0x80, 0x09, 0x0a, 0x0b, 0x0c, 0x0d, 0x0e, 0x0f,
+              0x10, 0x11, 0x12, 0x13, 0x14, 0x15, 0x16, 0x17, 0x81, 0x19, 0x1a, 0x1b, 0x1c, 0x1d, 0x1e, 0x1f,
+              0x20, 0x21, 0x22, 0x23, 0x24, 0x25, 0x26, 0x27, 0x82, 0x29, 0x2a, 0x2b, 0x2c, 0x2d, 0x2e, 0x2f,
+              0x30, 0x31, 0x32, 0x33, 0x34, 0x35, 0x36, 0x37, 0x83, 0x39, 0x3a, 0x3b, 0x3c, 0x3d, 0x3e, 0x3f,
+              0x40, 0x41, 0x42, 0x43, 0x44, 0x45, 0x46, 0x47, 0x84, 0x49, 0x31, 0x32, 0x33, 0x34, 0x35, 0x36,
+              0x37, 0x38, 0x39, 0x53, 0x54, 0x55, 0x56, 0x57, 0x85, 0x59, 0x5a, 0x5b, 0x5c, 0x5d, 0x5e, 0x5f,
+              0x60, 0x61, 0x62, 0x63, 0x64, 0x65, 0x66, 0x67, 0x86, 0x69, 0x6a, 0x6b, 0x6c, 0x6d, 0x6e, 0x6f,
+              0x70, 0x71, 0x72, 0x73, 0x74, 0x75, 0x76, 0x77, 0x87, 0x79, 0x7a, 0x7b, 0x7c, 0x30, 0x7e, 0x7f,
+              0x80, 0x81, 0x82, 0x83, 0x84, 0x85, 0x86, 0x87, 0x88, 0x89, 0x8a, 0x8b, 0x8c, 0x8d, 0x8e, 0x8f,
+              0x90, 0x91, 0x92, 0x93, 0x94, 0x95, 0x96, 0x97, 0x89, 0x99, 0x9a, 0x9b, 0x9c, 0x9d, 0x9e, 0x9f,
+              0xa0, 0xa1, 0xa2, 0xa3, 0xa4, 0xa5, 0xa6, 0xa7, 0x8a, 0xa9, 0xaa, 0xab, 0xac, 0xad, 0xae, 0xaf,
+              0xb0, 0xb1, 0xb2, 0xb3, 0xb4, 0xb5, 0xb6, 0xb7, 0x8b, 0xb9, 0xba, 0xbb, 0xbc, 0xbd, 0xbe, 0xbf,
+              0xc0, 0xc1, 0xc2, 0xc3, 0xc4, 0xc5, 0xc6, 0xc7, 0x8c, 0xc9, 0xca, 0xcb, 0xcc, 0xcd, 0xce, 0xcf,
+              0xd0, 0xd1, 0xd2, 0xd3, 0xd4, 0xd5, 0xd6, 0xd7, 0x8d, 0xd9, 0xda, 0xdb, 0xdc, 0xdd, 0xde, 0xdf,
+              0xe0, 0xe1, 0xe2, 0xe3, 0xe4, 0xe5, 0xe6, 0xe7, 0x8e, 0xe9, 0xea, 0xeb, 0xec, 0xed, 0xee, 0xef,
+              0xf0, 0xf1, 0xf2, 0xf3, 0xf4, 0xf5, 0xf6, 0xf7, 0x8f, 0xf9, 0xfa, 0xfb, 0xfc, 0xfd, 0xfe, 0xff,
+              };
+            ch = fixit[ch & 0xFF];
+            }
+          else
+            {
+            ch = charmap->set_digit_negative(ch, false);
+            }
+          }
+        retval.the_vectorxxxx.push_back(ch);
+        }
+      }
+    else
+      {
+      // We need to fill the field with a figurative constant:
+      // We are set up to create the_characters;
+      charmap_t *charmap32 = __gg__get_charmap(DEFAULT_32_ENCODING);
+      char ch =  charmap32->figconst_character(figconst);
+      for( size_t i=retval.offset; i<retval.length; i+=1 )
+        {
+        retval.the_characters += ch;
+        retval.the_vectorxxxx.push_back(ch);
+        }
+      }
+    }
+  else
+    {
+    // There is no field, so leave the_characters empty.
+    retval.offset = 0;
+    retval.length = 0;
+    }
+
   return retval;
   }
 
@@ -6921,12 +7440,22 @@ static void
 match_lengths(      normalized_operand &id_target,
                     const normalized_operand &id_source)
   {
+  // This routine gets called when id_source is a figurative constant and
+  // we need the target to be the same length as the source
+
   char ch = id_target.the_characters[0];
   id_target.the_characters.clear();
   for(size_t i=0; i<id_source.length; i++)
     {
     id_target.the_characters += ch;
     }
+
+  cbl_char_t wch = id_target.the_vectorxxxx[0];
+  id_target.the_vectorxxxx.clear();
+  for(size_t i=0; i<id_source.length; i++)
+    {
+    id_target.the_vectorxxxx.push_back(wch);
+    }
   id_target.length = id_source.length;
   }
 
@@ -6934,7 +7463,10 @@ static void
 the_alpha_and_omega(const normalized_operand &id_before,
                     const normalized_operand &id_after,
                     const char *          &alpha,
-                    const char *          &omega)
+                    const char *          &omega,
+                    char_it_c             &alpha_it,
+                    char_it_c             &omega_it,
+                    char_it_c              notfound)
   {
   /*  The 2023 ISO description of the AFTER and BEFORE phrases of the INSPECT
       statement is, in a word, garbled.
@@ -6983,6 +7515,18 @@ the_alpha_and_omega(const normalized_operand &id_before,
       omega = found;
       // If not found, we just leave omega alone.
       }
+
+    char_it_c omega_found = funky_find_wide(id_before.the_vectorxxxx.begin(),
+                                            id_before.the_vectorxxxx.end(),
+                                            alpha_it,
+                                            omega_it,
+                                            notfound );
+    if( omega_found != notfound )
+      {
+      // We found id_before within alpha/omega, so reduce omega
+      // to the found location.
+      omega_it = omega_found;
+      }
     }
 
   if( id_after.length )
@@ -6997,25 +7541,48 @@ the_alpha_and_omega(const normalized_operand &id_before,
       {
       // We found id_after in the alpha/omega segment.  We update alpha
       // be the character after the id_after substring.
-      alpha = found + (end-start);
+      alpha = found + (end-start);
+      }
+    else
+      {
+      // We didn't find the id_after string, so we set the alpha to be
+      // omega.  That means that no tally or replace operation will take
+      // because no characters will qualify.
+      alpha = omega;
+      }
+
+    char_it_c omega_found = funky_find_wide(id_after.the_vectorxxxx.begin(),
+                                            id_after.the_vectorxxxx.end(),
+                                            alpha_it,
+                                            omega_it,
+                                            notfound );
+    if( omega_found != notfound)
+      {
+      // We found id_after in the alpha/omega segment.  We update alpha
+      // be the character after the id_after substring.
+      alpha_it = omega_found + (end-start);
       }
     else
       {
       // We didn't find the id_after string, so we set the alpha to be
       // omega.  That means that no tally or replace operation will take
       // because no characters will qualify.
-      alpha = omega;
+      alpha_it = omega_it;
       }
     }
+
   }
 
 static void
 the_alpha_and_omega_backward( const normalized_operand &id_before,
                               const normalized_operand &id_after,
                               const char *          &alpha,
-                              const char *          &omega)
+                              const char *          &omega,
+                              char_it_c             &alpha_it,
+                              char_it_c             &omega_it,
+                              char_it_c              notfound)
   {
-  /*  Not unlike the_alpha_and_omega(), but for handling BACKWARD.
+  /*  Like the_alpha_and_omega(), but for handling BACKWARD.
 
       "xyzxyzBEFORExyzxyzAFTERxyzxyzxyzxyzBEFORExyzxyzAFTERxyzxyz"
                                                 ^     ^
@@ -7039,9 +7606,23 @@ the_alpha_and_omega_backward( const normalized_operand &id_before,
       {
       // We found id_before within id_1, so change alpha to the character just
       // to the right of BEFORE.  Otherwise, we will leave alpha alone, so that
-      // it stays at the beginning of id_1
+      // it stays at the beginning of id_1. That's because if you can't find
+      // id_before, it's as if there were no BEFORE phrase.
       alpha = found + id_before.length;
       }
+
+    char_it_c omega_found = funky_find_wide_backward(id_before.the_vectorxxxx.begin(),
+                                            id_before.the_vectorxxxx.end(),
+                                            alpha_it,
+                                            omega_it,
+                                            notfound );
+    if( omega_found != notfound )
+      {
+      // We found id_before within id_1, so change alpha to the character just
+      // to the right of BEFORE.  Otherwise, we will leave alpha alone, so that
+      // it stays at the beginning of id_1
+      alpha_it = omega_found + id_before.length;
+      }
     }
 
   if( id_after.length )
@@ -7061,7 +7642,25 @@ the_alpha_and_omega_backward( const normalized_operand &id_before,
       {
       // If the AFTER isn't found, we need to adjust things so that nothing
       // happens.
-      omega = id_1;
+      omega = alpha;
+      }
+
+    char_it_c omega_found = funky_find_wide_backward(id_after.the_vectorxxxx.begin(),
+                                            id_after.the_vectorxxxx.end(),
+                                            alpha_it,
+                                            omega_it,
+                                            notfound );
+    if( omega_found != notfound)
+      {
+      // We found id_after in id_1.  We update omega to be
+      // at that location.
+      omega_it = omega_found;
+      }
+    else
+      {
+      // If the AFTER isn't found, we need to adjust things so that nothing
+      // happens.
+      omega_it = alpha_it;
       }
     }
   }
@@ -7143,10 +7742,17 @@ inspect_backward_format_1(const size_t integers[])
           next_comparand.omega
             = next_comparand.alpha + normalized_id_1.length;
 
-          the_alpha_and_omega_backward( normalized_id_4_before,
-                                        normalized_id_4_after,
-                                        next_comparand.alpha,
-                                        next_comparand.omega);
+          next_comparand.alpha_it = normalized_id_1.the_vectorxxxx.begin();
+          next_comparand.omega_it = normalized_id_1.the_vectorxxxx.end();
+
+          the_alpha_and_omega_backward(normalized_id_4_before,
+                              normalized_id_4_after,
+                              next_comparand.alpha,
+                              next_comparand.omega,
+                              next_comparand.alpha_it,
+                              next_comparand.omega_it,
+                              normalized_id_1.the_vectorxxxx.end());
+
           comparands.push_back(next_comparand);
           break;
           }
@@ -7193,10 +7799,17 @@ inspect_backward_format_1(const size_t integers[])
             normalized_operand normalized_id_4_after
               = normalize_id(id4_after, id4_after_o, id4_after_s, id1->encoding);
 
-            the_alpha_and_omega_backward( normalized_id_4_before,
-                                          normalized_id_4_after,
-                                          next_comparand.alpha,
-                                          next_comparand.omega);
+            next_comparand.alpha_it = normalized_id_1.the_vectorxxxx.begin();
+            next_comparand.omega_it = normalized_id_1.the_vectorxxxx.end();
+
+            the_alpha_and_omega_backward(normalized_id_4_before,
+                                normalized_id_4_after,
+                                next_comparand.alpha,
+                                next_comparand.omega,
+                                next_comparand.alpha_it,
+                                next_comparand.omega_it,
+                                normalized_id_1.the_vectorxxxx.end());
+
             next_comparand.leading = true;
             next_comparand.leading_count = 0;
             comparands.push_back(next_comparand);
@@ -7212,32 +7825,35 @@ inspect_backward_format_1(const size_t integers[])
   // We are now set up to accomplish the data flow described
   // in the language specification.  We loop through the
   // the character positions in normalized_id_1:
-  const char *leftmost  = normalized_id_1.the_characters.c_str();
-  const char *rightmost = leftmost + normalized_id_1.length;
-  const char *the_end_of_the_world = rightmost;
+  char_it_c leftmost  = normalized_id_1.the_vectorxxxx.begin();
+  char_it_c rightmost = leftmost + normalized_id_1.length;
+  char_it_c the_end_of_the_world = rightmost;
 
   while( leftmost < rightmost )
     {
+    size_t rightmost_delta = 0;
     rightmost -= 1;
     // We look at the rightmost position.  If that position is within the
     // alpha-to-omega qualified range, we check all possible matches:
 
     for(size_t k=0; k<comparands.size(); k++)
       {
-      if( rightmost < comparands[k].alpha )
+      if( rightmost < comparands[k].alpha_it )
         {
         // This can't be a match, because rightmost is
         // to the left of the comparand's alpha.
         continue;
         }
-      if( rightmost + comparands[k].identifier_3.length > comparands[k].omega )
+      if( rightmost + comparands[k].identifier_3.length >
+                                                       comparands[k].omega_it )
         {
         // This can't be a match, because the rightmost
         // character of the comparand falls to the right
         // of the comparand's omega
         continue;
         }
-      if( rightmost + comparands[k].identifier_3.length > the_end_of_the_world )
+      if( rightmost + comparands[k].identifier_3.length >
+                                                        the_end_of_the_world )
         {
         // This can't be a match, because the rightmost character of the
         // comparand falls past the new edge of id_1 established by a prior
@@ -7253,7 +7869,7 @@ inspect_backward_format_1(const size_t integers[])
         {
         for(size_t m=0; m<comparands[k].identifier_3.length; m++)
           {
-          if( comparands[k].identifier_3.the_characters[m] != rightmost[m] )
+          if( comparands[k].identifier_3.the_vectorxxxx[m] != rightmost[m] )
             {
             possible_match = false;
             break;
@@ -7295,12 +7911,15 @@ inspect_backward_format_1(const size_t integers[])
             if( comparands[k].leading )
               {
               if( rightmost + comparands[k].identifier_3.length
-                                                        == comparands[k].omega)
+                                                     == comparands[k].omega_it)
                 {
                 // This means that the match here is just the latest of a
                 // string of LEADING matches that started at .omega
                 comparands[k].leading_count += 1;
                 match = true;
+                comparands[k].omega_it -= comparands[k].identifier_3.length;
+                the_end_of_the_world = rightmost;
+                rightmost_delta = comparands[k].identifier_3.length-1;
                 }
               }
             break;
@@ -7314,7 +7933,7 @@ inspect_backward_format_1(const size_t integers[])
             // all of the possible matches from here leftward to the alpha have
             // to be true as well:
 
-            if( (rightmost - comparands[k].alpha )
+            if( (rightmost - comparands[k].alpha_it )
                     % comparands[k].identifier_3.length == 0 )
               {
               // The remaining number of characters is correct for a match.
@@ -7322,13 +7941,13 @@ inspect_backward_format_1(const size_t integers[])
 
               // Assume a match until we learn otherwise:
               match = true;
-              const char *local_left = rightmost;
+              char_it_c local_left = rightmost;
               local_left -= comparands[k].identifier_3.length;
-              while( local_left >= comparands[k].alpha )
+              while( local_left >= comparands[k].alpha_it )
                 {
                 for(size_t m=0; m<comparands[k].identifier_3.length; m++)
                   {
-                  if( comparands[k].identifier_3.the_characters[m]
+                  if( comparands[k].identifier_3.the_vectorxxxx[m]
                       != local_left[m] )
                     {
                     // We have a mismatched character, so no trailing match is
@@ -7350,11 +7969,13 @@ inspect_backward_format_1(const size_t integers[])
           // Bump the result counter
           id_2_results[comparands[k].id_2_index].result += 1;
 
-          // Because we are scanning from right to left, we have to drag
-          // the goalpost along with us to ensure that following
-          // comparisions don't spill over into the characters we just matched.
+          // We have a match here at rightmost, so we need to set the end of
+          // the world here
           the_end_of_the_world = rightmost;
 
+          // Adjust rightmost by the additional characters in a BACKWARD
+          // LEADING search:
+          rightmost -= rightmost_delta;
           break;
           }
         }
@@ -7423,7 +8044,7 @@ __gg__inspect_format_1(int backward, size_t integers[])
   cblc_index += 1;
   // normalize it, according to the language specification.
   normalized_operand normalized_id_1
-                                    = normalize_id(id1, id1_o, id1_s, id1->encoding);
+                             = normalize_id(id1, id1_o, id1_s, id1->encoding);
 
   std::vector<comparand> comparands;
 
@@ -7480,10 +8101,17 @@ __gg__inspect_format_1(int backward, size_t integers[])
           next_comparand.omega
             = next_comparand.alpha + normalized_id_1.length;
 
+          next_comparand.alpha_it = normalized_id_1.the_vectorxxxx.begin();
+          next_comparand.omega_it = normalized_id_1.the_vectorxxxx.end();
+
           the_alpha_and_omega(normalized_id_4_before,
                               normalized_id_4_after,
                               next_comparand.alpha,
-                              next_comparand.omega);
+                              next_comparand.omega,
+                              next_comparand.alpha_it,
+                              next_comparand.omega_it,
+                              normalized_id_1.the_vectorxxxx.end());
+
           comparands.push_back(next_comparand);
           break;
           }
@@ -7527,6 +8155,9 @@ __gg__inspect_format_1(int backward, size_t integers[])
             next_comparand.omega
               = next_comparand.alpha + normalized_id_1.length;
 
+            next_comparand.alpha_it = normalized_id_1.the_vectorxxxx.begin();
+            next_comparand.omega_it = normalized_id_1.the_vectorxxxx.end();
+
             normalized_operand normalized_id_4_before
               = normalize_id(id4_before, id4_before_o, id4_before_s, id1->encoding);
 
@@ -7536,7 +8167,11 @@ __gg__inspect_format_1(int backward, size_t integers[])
             the_alpha_and_omega(normalized_id_4_before,
                                 normalized_id_4_after,
                                 next_comparand.alpha,
-                                next_comparand.omega);
+                                next_comparand.omega,
+                                next_comparand.alpha_it,
+                                next_comparand.omega_it,
+                                normalized_id_1.the_vectorxxxx.end());
+
             next_comparand.leading = true;
             next_comparand.leading_count = 0;
             comparands.push_back(next_comparand);
@@ -7552,8 +8187,8 @@ __gg__inspect_format_1(int backward, size_t integers[])
   // We are now set up to accomplish the data flow described
   // in the language specification.  We loop through the
   // the character positions in normalized_id_1:
-  const char *leftmost  = normalized_id_1.the_characters.c_str();
-  const char *rightmost = leftmost + normalized_id_1.length;
+  char_it_c leftmost  = normalized_id_1.the_vectorxxxx.begin();
+  char_it_c rightmost = leftmost + normalized_id_1.length;
 
   while( leftmost < rightmost )
     {
@@ -7562,13 +8197,13 @@ __gg__inspect_format_1(int backward, size_t integers[])
 
     for(size_t k=0; k<comparands.size(); k++)
       {
-      if( leftmost < comparands[k].alpha )
+      if( leftmost < comparands[k].alpha_it )
         {
         // This can't be a match, because leftmost is
         // to the left of the comparand's alpha.
         continue;
         }
-      if( leftmost + comparands[k].identifier_3.length > comparands[k].omega )
+      if( leftmost + comparands[k].identifier_3.length > comparands[k].omega_it )
         {
         // This can't be a match, because the rightmost
         // character of the comparand falls to the right
@@ -7584,7 +8219,7 @@ __gg__inspect_format_1(int backward, size_t integers[])
         {
         for(size_t m=0; m<comparands[k].identifier_3.length; m++)
           {
-          if( comparands[k].identifier_3.the_characters[m] != leftmost[m] )
+          if( comparands[k].identifier_3.the_vectorxxxx[m] != leftmost[m] )
             {
             possible_match = false;
             break;
@@ -7630,10 +8265,11 @@ __gg__inspect_format_1(int backward, size_t integers[])
             //  2) leftmost / (length_of_comparand ) = current_count
             //
             // I get chills every time I look at that.
+
             if( comparands[k].leading )
               {
               // So far, so good.
-              size_t count = (leftmost - comparands[k].alpha)
+              size_t count = ((leftmost - comparands[k].alpha_it))
                               / comparands[k].identifier_3.length;
               if( count == comparands[k].leading_count )
                 {
@@ -7654,7 +8290,7 @@ __gg__inspect_format_1(int backward, size_t integers[])
             // all of the possible matches from here to the omega have to be
             // true as well:
 
-            if( (comparands[k].omega-leftmost)
+            if( (comparands[k].omega_it-leftmost)
                     % comparands[k].identifier_3.length == 0 )
               {
               // The remaining number of characters is correct for a match.
@@ -7662,13 +8298,13 @@ __gg__inspect_format_1(int backward, size_t integers[])
 
               // Assume a match until we learn otherwise:
               match = true;
-              const char *local_left = leftmost;
+              char_it_c local_left = leftmost;
               local_left += comparands[k].identifier_3.length;
-              while( local_left < comparands[k].omega )
+              while( match && local_left < comparands[k].omega_it )
                 {
                 for(size_t m=0; m<comparands[k].identifier_3.length; m++)
                   {
-                  if( comparands[k].identifier_3.the_characters[m]
+                  if( comparands[k].identifier_3.the_vectorxxxx[m]
                       != local_left[m] )
                     {
                     // We have a mismatched character, so no trailing match is
@@ -7713,7 +8349,6 @@ __gg__inspect_format_1(int backward, size_t integers[])
 
   // Add our results to the identifier_2 values:
 
-
   for(size_t i = 0; i<id_2_results.size(); i++)
     {
     int rdigits;
@@ -7809,10 +8444,18 @@ inspect_backward_format_2(const size_t integers[])
         next_comparand.omega
           = next_comparand.alpha + normalized_id_1.length;
 
-        the_alpha_and_omega_backward( normalized_id_4_before,
-                                      normalized_id_4_after,
-                                      next_comparand.alpha,
-                                      next_comparand.omega);
+        next_comparand.alpha_it = normalized_id_1.the_vectorxxxx.begin();
+        next_comparand.omega_it = normalized_id_1.the_vectorxxxx.end();
+
+        the_alpha_and_omega_backward(normalized_id_4_before,
+                            normalized_id_4_after,
+                            next_comparand.alpha,
+                            next_comparand.omega,
+                            next_comparand.alpha_it,
+                            next_comparand.omega_it,
+                            normalized_id_1.the_vectorxxxx.end());
+
+
         comparands.push_back(next_comparand);
         break;
         }
@@ -7877,10 +8520,17 @@ inspect_backward_format_2(const size_t integers[])
           normalized_operand normalized_id_4_after
             = normalize_id(id4_after, id4_after_o, id4_after_s, id1->encoding);
 
-          the_alpha_and_omega_backward( normalized_id_4_before,
-                                        normalized_id_4_after,
-                                        next_comparand.alpha,
-                                        next_comparand.omega);
+          next_comparand.alpha_it = normalized_id_1.the_vectorxxxx.begin();
+          next_comparand.omega_it = normalized_id_1.the_vectorxxxx.end();
+
+          the_alpha_and_omega_backward(normalized_id_4_before,
+                              normalized_id_4_after,
+                              next_comparand.alpha,
+                              next_comparand.omega,
+                              next_comparand.alpha_it,
+                              next_comparand.omega_it,
+                              normalized_id_1.the_vectorxxxx.end());
+
           next_comparand.leading = true;
           next_comparand.leading_count = 0;
           next_comparand.first   = true;
@@ -7890,25 +8540,29 @@ inspect_backward_format_2(const size_t integers[])
       }
     }
 
-  const char *leftmost  = normalized_id_1.the_characters.c_str();
-  const char *rightmost = leftmost + normalized_id_1.length;
-  const char *the_end_of_the_world = rightmost;
+  // We can now look through normalized_id_1 and replace characters:
+
+  char_it_c leftmost  = normalized_id_1.the_vectorxxxx.begin();
+  char_it_c rightmost = leftmost + normalized_id_1.length;
+  char_it_c the_end_of_the_world = rightmost;
 
   while( leftmost < rightmost )
     {
+    size_t rightmost_delta = 0;
+
     rightmost -= 1;
     // We look at the rightmost position.  If that position is within the
     // alpha-to-omega qualified range, we check all possible matches:
 
     for(size_t k=0; k<comparands.size(); k++)
       {
-      if( rightmost < comparands[k].alpha )
+      if( rightmost < comparands[k].alpha_it )
         {
         // This can't be a match, because rightmost is
         // to the left of the comparand's alpha.
         continue;
         }
-      if( rightmost + comparands[k].identifier_3.length > comparands[k].omega )
+      if( rightmost + comparands[k].identifier_3.length > comparands[k].omega_it )
         {
         // This can't be a match, because the rightmost
         // character of the comparand falls to the right
@@ -7931,7 +8585,7 @@ inspect_backward_format_2(const size_t integers[])
         {
         for(size_t m=0; m<comparands[k].identifier_3.length; m++)
           {
-          if( comparands[k].identifier_3.the_characters[m] != rightmost[m] )
+          if( comparands[k].identifier_3.the_vectorxxxx[m] != rightmost[m] )
             {
             possible_match = false;
             break;
@@ -7973,14 +8627,14 @@ inspect_backward_format_2(const size_t integers[])
             if( comparands[k].leading )
               {
               if(   rightmost
-                  + comparands[k].identifier_3.length
-                  + comparands[k].leading_count
-                    == comparands[k].omega)
+                  + comparands[k].identifier_3.length * (comparands[k].leading_count +1)
+                    == comparands[k].omega_it)
                 {
                 // This means that the match here is just the latest of a
                 // string of LEADING matches that started at .omega
                 comparands[k].leading_count += 1;
                 match = true;
+                rightmost_delta = comparands[k].identifier_3.length-1;
                 }
               }
             break;
@@ -7994,7 +8648,7 @@ inspect_backward_format_2(const size_t integers[])
             // all of the possible matches from here leftward to the alpha have
             // to be true as well:
 
-            if( (rightmost - comparands[k].alpha )
+            if( (rightmost - comparands[k].alpha_it )
                     % comparands[k].identifier_3.length == 0 )
               {
               // The remaining number of characters is correct for a match.
@@ -8002,13 +8656,13 @@ inspect_backward_format_2(const size_t integers[])
 
               // Assume a match until we learn otherwise:
               match = true;
-              const char *local_left = rightmost;
+              char_it_c local_left = rightmost;
               local_left -= comparands[k].identifier_3.length;
-              while( local_left >= comparands[k].alpha )
+              while( local_left >= comparands[k].alpha_it )
                 {
                 for(size_t m=0; m<comparands[k].identifier_3.length; m++)
                   {
-                  if( comparands[k].identifier_3.the_characters[m]
+                  if( comparands[k].identifier_3.the_vectorxxxx[m]
                       != local_left[m] )
                     {
                     // We have a mismatched character, so no trailing match is
@@ -8031,18 +8685,18 @@ inspect_backward_format_2(const size_t integers[])
           // with the characters from normalized_id_5
           //fprintf(stderr, "Rule: %ld %p %s\n", k+1, rightmost, rightmost);
 
-          size_t index = rightmost - normalized_id_1.the_characters.c_str();
+          size_t index = rightmost - normalized_id_1.the_vectorxxxx.begin();
           for( size_t l = 0;
                l < comparands[k].identifier_5.length;
                l++ )
             {
-            char ch = comparands[k].identifier_5.
-                      the_characters[l];
-            normalized_id_1.the_characters[index++] = ch;
+            cbl_char_t ch = comparands[k].identifier_5.
+                      the_vectorxxxx[l];
+            normalized_id_1.the_vectorxxxx[index++] = ch;
             }
 
           the_end_of_the_world = rightmost;
-
+          rightmost -= rightmost_delta;
           break;
           }
         }
@@ -8054,36 +8708,29 @@ inspect_backward_format_2(const size_t integers[])
     }
 
   // Here is where we take the characters from normalized_id_1 and put them
-  // back into identifier_1.  There is some special processing to make sure
-  // an embedded sign in a NumericDisplay survives the processing.
+  // back into identifier_1.
+
+  charmap_t *charmap = __gg__get_charmap(id1->encoding);
+  // Wastefully prefill id_1 with spaces in case the processing resulted in a
+  // string shorter than the original.  (There is always the possiblity that
+  // a UTF-8 or UTF-16 codeset pair got replaced with a single character.) Do
+  // this before calling __gg__converter, because both mapped_character and
+  // __gg__iconverter use the same static buffer.
   unsigned char *id1_data = id1->data + id1_o;
-  int index_dest = normalized_id_1.offset;
-  if( id1->type == FldNumericDisplay )
-    {
-    for(size_t i=0; i<normalized_id_1.length; i++)
-      {
-      charmap_t *charmap = __gg__get_charmap(id1->encoding);
-      id1_data[index_dest] = normalized_id_1.the_characters[i];
-      if( charmap->is_digit_negative(normalized_id_1.the_characters[i]) )
-        {
-        id1_data[index_dest]
-                  = charmap->set_digit_negative(id1_data[index_dest], true);
-        }
-      else
-        {
-        id1_data[index_dest]
-                  = charmap->set_digit_negative(id1_data[index_dest], false);
-        }
-      index_dest += 1;
-      }
-    }
-  else
-    {
-    for(size_t i=0; i<normalized_id_1.length; i++)
-      {
-      id1_data[index_dest++] = normalized_id_1.the_characters[i];
-      }
-    }
+  charmap->memset(id1_data, charmap->mapped_character(ascii_space), id1_s);
+
+  // We've been working in UTF32; we convert back to the original id1 encoding.
+  size_t bytes_converted;
+  const char *converted = __gg__iconverter( DEFAULT_32_ENCODING,
+                                         id1->encoding,
+                                         normalized_id_1.the_vectorxxxx.data(),
+                                         normalized_id_1.length*width_of_utf32,
+                                         &bytes_converted) ;
+  // And move those characters into place in id_1:
+  memcpy(id1_data,
+         converted,
+         std::min(bytes_converted, id1_s));
+
   return;
   }
 
@@ -8106,21 +8753,10 @@ __gg__inspect_format_2(int backward, size_t integers[])
   size_t        id1_s = __gg__treeplet_1s[cblc_index];
   cblc_index += 1;
 
-#if 0
-  fprintf(stderr, "%s:%d: '%.*s' id1_o %zu, id1_s %zu\n", __func__, __LINE__, 
-          int(id1_s), (char*)id1->data, id1_o, id1_s);
-#endif
-  
   // normalize it, according to the language specification.
   normalized_operand normalized_id_1
                                    = normalize_id(id1, id1_o, id1_s, id1->encoding);
-#if 0
-  fprintf(stderr, "%s:%d: normalized_id_1 '%s' offset %zu, length %zu\n", __func__, __LINE__, 
-          normalized_id_1.the_characters.c_str(),
-          normalized_id_1.offset, 
-          normalized_id_1.length );
-#endif
-  
+
   std::vector<comparand> comparands;
 
   // Pick up the count of operations:
@@ -8171,10 +8807,16 @@ __gg__inspect_format_2(int backward, size_t integers[])
         next_comparand.omega
           = next_comparand.alpha + normalized_id_1.length;
 
+        next_comparand.alpha_it = normalized_id_1.the_vectorxxxx.begin();
+        next_comparand.omega_it = normalized_id_1.the_vectorxxxx.end();
+
         the_alpha_and_omega(normalized_id_4_before,
                             normalized_id_4_after,
                             next_comparand.alpha,
-                            next_comparand.omega);
+                            next_comparand.omega,
+                            next_comparand.alpha_it,
+                            next_comparand.omega_it,
+                            normalized_id_1.the_vectorxxxx.end());
         comparands.push_back(next_comparand);
         break;
         }
@@ -8245,10 +8887,17 @@ __gg__inspect_format_2(int backward, size_t integers[])
           normalized_operand normalized_id_4_after
             = normalize_id(id4_after, id4_after_o, id4_after_s, id1->encoding);
 
+          next_comparand.alpha_it = normalized_id_1.the_vectorxxxx.begin();
+          next_comparand.omega_it = normalized_id_1.the_vectorxxxx.end();
+
           the_alpha_and_omega(normalized_id_4_before,
                               normalized_id_4_after,
                               next_comparand.alpha,
-                              next_comparand.omega);
+                              next_comparand.omega,
+                              next_comparand.alpha_it,
+                              next_comparand.omega_it,
+                              normalized_id_1.the_vectorxxxx.end());
+
           next_comparand.leading = true;
           next_comparand.leading_count = 0;
           next_comparand.first   = true;
@@ -8261,10 +8910,8 @@ __gg__inspect_format_2(int backward, size_t integers[])
   // We are now set up to accomplish the data flow described
   // in the language specification.  We loop through the
   // the character positions in normalized_id_1:
-  const char *leftmost
-    = normalized_id_1.the_characters.c_str();
-  const char *rightmost
-    = leftmost + normalized_id_1.length;
+  char_it_c leftmost  = normalized_id_1.the_vectorxxxx.begin();
+  char_it_c rightmost = leftmost + normalized_id_1.length;
 
   while( leftmost < rightmost )
     {
@@ -8273,14 +8920,14 @@ __gg__inspect_format_2(int backward, size_t integers[])
 
     for(size_t k=0; k<comparands.size(); k++)
       {
-      if( leftmost < comparands[k].alpha )
+      if( leftmost < comparands[k].alpha_it )
         {
         // This can't be a match, because leftmost is
         // to the left of the comparand's alpha.
         continue;
         }
       if( leftmost + comparands[k].identifier_3.length
-          > comparands[k].omega )
+          > comparands[k].omega_it )
         {
         // This can't be a match, because the rightmost
         // character of the comparand falls to the right
@@ -8295,7 +8942,7 @@ __gg__inspect_format_2(int backward, size_t integers[])
         {
         for(size_t m=0; m<comparands[k].identifier_3.length; m++)
           {
-          if( comparands[k].identifier_3.the_characters[m]
+          if( comparands[k].identifier_3.the_vectorxxxx[m]
               != leftmost[m] )
             {
             possible_match = false;
@@ -8346,7 +8993,7 @@ __gg__inspect_format_2(int backward, size_t integers[])
             if( comparands[k].leading )
               {
               // So far, so good.
-              size_t count = (leftmost - comparands[k].alpha)
+              size_t count = (leftmost - comparands[k].alpha_it)
                               / comparands[k].identifier_3.length;
               if( count == comparands[k].leading_count )
                 {
@@ -8367,7 +9014,7 @@ __gg__inspect_format_2(int backward, size_t integers[])
             // all of the possible matches from here to the omega have to be
             // true as well:
 
-            if( (comparands[k].omega-leftmost)
+            if( (comparands[k].omega_it-leftmost)
                     % comparands[k].identifier_3.length == 0 )
               {
               // The remaining number of characters is correct for a match.
@@ -8375,13 +9022,13 @@ __gg__inspect_format_2(int backward, size_t integers[])
 
               // Assume a match until we learn otherwise:
               match = true;
-              const char *local_left = leftmost;
+              char_it_c local_left = leftmost;
               local_left += comparands[k].identifier_3.length;
-              while( local_left < comparands[k].omega )
+              while( local_left < comparands[k].omega_it )
                 {
                 for(size_t m=0; m<comparands[k].identifier_3.length; m++)
                   {
-                  if( comparands[k].identifier_3.the_characters[m]
+                  if( comparands[k].identifier_3.the_vectorxxxx[m]
                       != local_left[m] )
                     {
                     // We have a mismatched character, so no trailing match is
@@ -8403,14 +9050,14 @@ __gg__inspect_format_2(int backward, size_t integers[])
           // with the characters from normalized_id_5
 
           size_t index = leftmost
-                         - normalized_id_1.the_characters.c_str();
+                         - normalized_id_1.the_vectorxxxx.begin();
           for( size_t l = 0;
                l < comparands[k].identifier_5.length;
                l++ )
             {
             char ch = comparands[k].identifier_5.
-                      the_characters[l];
-            normalized_id_1.the_characters[index++] = ch;
+                      the_vectorxxxx[l];
+            normalized_id_1.the_vectorxxxx[index++] = ch;
             }
           // Adjust the leftmost pointer to point to
           // the rightmost character of the matched
@@ -8430,75 +9077,102 @@ __gg__inspect_format_2(int backward, size_t integers[])
     }
 
   // Here is where we take the characters from normalized_id_1 and put them
-  // back into identifier_1.  There is some special processing to make sure
-  // an embedded sign in a NumericDisplay survives the processing.
+  // back into identifier_1.
+
+  charmap_t *charmap = __gg__get_charmap(id1->encoding);
+  // Wastefully prefill id_1 with spaces in case the processing resulted in a
+  // string shorter than the original.  (There is always the possiblity that
+  // a UTF-8 or UTF-16 codeset pair got replaced with a single character.) Do
+  // this before calling __gg__converter, because both mapped_character and
+  // __gg__iconverter use the same static buffer.
   unsigned char *id1_data = id1->data + id1_o;
-  int index_dest = normalized_id_1.offset;
-  if( id1->type == FldNumericDisplay )
-    {
-    for(size_t i=0; i<normalized_id_1.length; i++)
-      {
-      charmap_t *charmap = __gg__get_charmap(id1->encoding);
-      id1_data[index_dest] = normalized_id_1.the_characters[i];
-      if( charmap->is_digit_negative(normalized_id_1.the_characters[i]) )
-        {
-        id1_data[index_dest]
-                    = charmap->set_digit_negative(id1_data[index_dest], true);
-        }
-      else
-        {
-        id1_data[index_dest]
-                    = charmap->set_digit_negative(id1_data[index_dest], false);
-        }
-      index_dest += 1;
-      }
-    }
-  else
-    {
-    for(size_t i=0; i<normalized_id_1.length; i++)
-      {
-      id1_data[index_dest++] = normalized_id_1.the_characters[i];
-      }
-    }
+  charmap->memset(id1_data, charmap->mapped_character(ascii_space), id1_s);
+
+  // We've been working in UTF32; we convert back to the original id1 encoding.
+  size_t bytes_converted;
+  const char *converted = __gg__iconverter( DEFAULT_32_ENCODING,
+                                         id1->encoding,
+                                         normalized_id_1.the_vectorxxxx.data(),
+                                         normalized_id_1.length*width_of_utf32,
+                                         &bytes_converted) ;
+  // And move those characters into place in id_1:
+  memcpy(id1_data,
+         converted,
+         std::min(bytes_converted, id1_s));
   return;
   }
 
-static char *
-normalize_for_inspect_format_4( size_t        *dest_size,
-                          const cblc_field_t  *var,
-                                size_t         var_offset,
-                                size_t         var_size,
-                                cbl_encoding_t encoding)
-  {
-  // Returns a malloced pointer; the caller needs to free it.
-  char *retval;
-  retval = static_cast<char *>(malloc(var_size+1));
+static std::u32string
+normalize_for_inspect_format_4(const cblc_field_t  *var,
+                                size_t              var_offset,
+                                size_t              var_size,
+                                cbl_encoding_t      source_encoding)
+  {
+  std::u32string retval;
   if(var)
     {
+    const charmap_t *charmap_var = __gg__get_charmap(source_encoding);
+    charmap_t *charmap32 = __gg__get_charmap(DEFAULT_32_ENCODING);
+
     cbl_figconst_t figconst =
                       static_cast<cbl_figconst_t>(var->attr & FIGCONST_MASK);
+    // We have a corner case to deal with:
+    if( strcmp(var->name, "NULLS") == 0 )
+      {
+      figconst = null_value_e;
+      }
+
     if( figconst )
       {
       // Build up an var_size array of figconst characters
-      charmap_t *charmap = __gg__get_charmap(encoding);
-      char figchar = '\0';
+      cbl_char_t figchar = '\0';
       switch( figconst )
         {
         case low_value_e   :
-          figchar = charmap->low_value_character();
+          figchar = charmap32->low_value_character();
           break;
         case zero_value_e  :
-          figchar = charmap->mapped_character(ascii_0);
+          figchar = charmap32->mapped_character(ascii_0);
           break;
         case space_value_e :
-          figchar = charmap->mapped_character(ascii_space);
+          figchar = charmap32->mapped_character(ascii_space);
           break;
         case quote_value_e :
-          figchar = charmap->quote_character();
+          figchar = charmap32->quote_character();
           break;
         case high_value_e  :
-          figchar = charmap->high_value_character();
+          {
+          if( __gg__high_value_character == DEFAULT_HIGH_VALUE_8 )
+            {
+            // See the comments where these constants are defined.
+            if(charmap_var->stride() == 1)
+              {
+              if(charmap_var->is_like_ebcdic())
+                {
+                // This maps back to 0xFF in CP1140
+                figchar = EBCDIC_HIGH_VALUE_32;
+                }
+              else
+                {
+                // This maps back to 0xFF in CP1252
+                figchar = ASCII_HIGH_VALUE_32;
+                }
+              }
+            else if(charmap_var->stride() == 2)
+              {
+              figchar = UTF16_HIGH_VALUE_32;
+              }
+            else
+              {
+              figchar = UTF32_HIGH_VALUE_32;
+              }
+            }
+          else
+            {
+            figchar = charmap32->mapped_character(__gg__high_value_character);
+            }
           break;
+          }
         case null_value_e:
           break;
         default:
@@ -8506,27 +9180,27 @@ normalize_for_inspect_format_4( size_t        *dest_size,
           abort();
           break;
         }
-      memset(retval, figchar, var_size);
-      retval[var_size] = '\0';
+      retval.push_back(figchar);
       }
     else
       {
-      // It's not a figurative constant, so convert var to the target encoding.
-      memcpy(retval,
-             __gg__iconverter(var->encoding,
-                              encoding,
-                              PTRCAST(char, var->data) + var_offset,
+      // It's not a figurative constant, so convert var to UTF32.
+      size_t converted_bytes;
+      const char *converted = __gg__iconverter(
+                              var->encoding,
+                              DEFAULT_32_ENCODING,
+                              var->data + var_offset,
                               var_size,
-                              dest_size),
-             var_size);
-      retval[var_size] = '\0';
+                              &converted_bytes);
+      void *duped = __gg__memdup(converted, converted_bytes);
+      for(size_t i=0; i<converted_bytes; i+=width_of_utf32)
+        {
+        cbl_char_t ch = charmap32->getch(duped, i);
+        retval.push_back(ch);
+        }
+      free(duped);
       }
     }
-  else
-    {
-    retval = nullptr;
-    *dest_size = 0;
-    }
   return retval;
   }
 
@@ -8560,9 +9234,9 @@ __gg__inspect_format_4( int backward,
   // constant.
 
   cbl_figconst_t figconst_original =
-                            static_cast<cbl_figconst_t>(original->attr & FIGCONST_MASK);
+                static_cast<cbl_figconst_t>(original->attr & FIGCONST_MASK);
   cbl_figconst_t figconst_replacement =
-                            static_cast<cbl_figconst_t>(replacement->attr & FIGCONST_MASK);
+                static_cast<cbl_figconst_t>(replacement->attr & FIGCONST_MASK);
   int figswitch = (figconst_original ? 2 : 0) + (figconst_replacement ? 1 : 0);
   switch( figswitch )
     {
@@ -8570,10 +9244,10 @@ __gg__inspect_format_4( int backward,
       // Neither are figconst; we leave the sizes alone
       break;
     case 1:
-      // Only replacement is figconst, so we make its size the same as the
-      // original.  This will cause CONVERTING "ABC" TO ZERO to be the same as
+      // Only replacement is figconst, so we make its size -1
+      // This will cause CONVERTING "ABC" TO ZERO to be the same as
       //                            CONVERTING "ABC" TO "000"
-      replacement_size = original_size;
+      replacement_size = (size_t)(-1LL);
       break;
     case 2:
       // Only original is figconst.  Set the size to one.  (This is necessary
@@ -8598,148 +9272,183 @@ __gg__inspect_format_4( int backward,
     after_size = 1;
     }
 
-  size_t psz_input_size      ;
-  size_t psz_original_size   ;
-  size_t psz_replacement_size;
-  size_t psz_after_size      ;
-  size_t psz_before_size     ;
-
   bool all = (replacement_size == (size_t)(-1LL));
   if( all )
     {
     // A replacement_size of -1 means that the statement is something like
     // INSPECT XYZ CONVERTING "abcxyz" to ALL "?"  That means replacement is
     // a single character.  We need to convert it to the target encoding.
-    replacement_size = 1;
+    const charmap_t * charmap = __gg__get_charmap(input->encoding);
+    replacement_size = charmap->stride();
     }
 
-  char *psz_input       = normalize_for_inspect_format_4(&psz_input_size      , input      , input_offset      , input_size      , input->encoding);
-  char *psz_original    = normalize_for_inspect_format_4(&psz_original_size   , original   , original_offset   , original_size   , input->encoding);
-  char *psz_replacement = normalize_for_inspect_format_4(&psz_replacement_size, replacement, replacement_offset, replacement_size, input->encoding);
-  char *psz_after       = normalize_for_inspect_format_4(&psz_after_size      , after      , after_offset      , after_size      , input->encoding);
-  char *psz_before      = normalize_for_inspect_format_4(&psz_before_size     , before     , before_offset     , before_size     , input->encoding);
+  std::u32string str_input       = normalize_for_inspect_format_4(input      , input_offset      , input_size      , input->encoding);
+  std::u32string str_original    = normalize_for_inspect_format_4(original   , original_offset   , original_size   , input->encoding);
+  std::u32string str_replacement = normalize_for_inspect_format_4(replacement, replacement_offset, replacement_size, input->encoding);
+  std::u32string str_after       = normalize_for_inspect_format_4(after      , after_offset      , after_size      , input->encoding);
+  std::u32string str_before      = normalize_for_inspect_format_4(before     , before_offset     , before_size     , input->encoding);
 
   if( all )
     {
-    // We now expand the single-byte replacement to be the same length as
+    // We now expand the single-character replacement to be the same length as
     // original.
-    psz_replacement_size = psz_original_size;
-    psz_replacement = static_cast<char *>(realloc(psz_replacement, psz_replacement_size));
-    memset(psz_replacement, psz_replacement[0], psz_replacement_size);
+    cbl_char_t ch = str_replacement[0];
+    str_replacement.clear();
+    for(size_t i=0; i<str_original.size(); i++)
+      {
+      str_replacement.push_back(ch);
+      }
     }
 
-  // Use a simple map to make this O(N), rather than an O(N-squared),
+  // Use a  map to make this O(N), rather than an O(N-squared),
   // computational complexity
-  static const unsigned char map_init[256] =
-    {
-    0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, 0x08, 0x09, 0x0a, 0x0b, 0x0c, 0x0d, 0x0e, 0x0f,
-    0x10, 0x11, 0x12, 0x13, 0x14, 0x15, 0x16, 0x17, 0x18, 0x19, 0x1a, 0x1b, 0x1c, 0x1d, 0x1e, 0x1f,
-    0x20, 0x21, 0x22, 0x23, 0x24, 0x25, 0x26, 0x27, 0x28, 0x29, 0x2a, 0x2b, 0x2c, 0x2d, 0x2e, 0x2f,
-    0x30, 0x31, 0x32, 0x33, 0x34, 0x35, 0x36, 0x37, 0x38, 0x39, 0x3a, 0x3b, 0x3c, 0x3d, 0x3e, 0x3f,
-    0x40, 0x41, 0x42, 0x43, 0x44, 0x45, 0x46, 0x47, 0x48, 0x49, 0x4a, 0x4b, 0x4c, 0x4d, 0x4e, 0x4f,
-    0x50, 0x51, 0x52, 0x53, 0x54, 0x55, 0x56, 0x57, 0x58, 0x59, 0x5a, 0x5b, 0x5c, 0x5d, 0x5e, 0x5f,
-    0x60, 0x61, 0x62, 0x63, 0x64, 0x65, 0x66, 0x67, 0x68, 0x69, 0x6a, 0x6b, 0x6c, 0x6d, 0x6e, 0x6f,
-    0x70, 0x71, 0x72, 0x73, 0x74, 0x75, 0x76, 0x77, 0x78, 0x79, 0x7a, 0x7b, 0x7c, 0x7d, 0x7e, 0x7f,
-    0x80, 0x81, 0x82, 0x83, 0x84, 0x85, 0x86, 0x87, 0x88, 0x89, 0x8a, 0x8b, 0x8c, 0x8d, 0x8e, 0x8f,
-    0x90, 0x91, 0x92, 0x93, 0x94, 0x95, 0x96, 0x97, 0x98, 0x99, 0x9a, 0x9b, 0x9c, 0x9d, 0x9e, 0x9f,
-    0xa0, 0xa1, 0xa2, 0xa3, 0xa4, 0xa5, 0xa6, 0xa7, 0xa8, 0xa9, 0xaa, 0xab, 0xac, 0xad, 0xae, 0xaf,
-    0xb0, 0xb1, 0xb2, 0xb3, 0xb4, 0xb5, 0xb6, 0xb7, 0xb8, 0xb9, 0xba, 0xbb, 0xbc, 0xbd, 0xbe, 0xbf,
-    0xc0, 0xc1, 0xc2, 0xc3, 0xc4, 0xc5, 0xc6, 0xc7, 0xc8, 0xc9, 0xca, 0xcb, 0xcc, 0xcd, 0xce, 0xcf,
-    0xd0, 0xd1, 0xd2, 0xd3, 0xd4, 0xd5, 0xd6, 0xd7, 0xd8, 0xd9, 0xda, 0xdb, 0xdc, 0xdd, 0xde, 0xdf,
-    0xe0, 0xe1, 0xe2, 0xe3, 0xe4, 0xe5, 0xe6, 0xe7, 0xe8, 0xe9, 0xea, 0xeb, 0xec, 0xed, 0xee, 0xef,
-    0xf0, 0xf1, 0xf2, 0xf3, 0xf4, 0xf5, 0xf6, 0xf7, 0xf8, 0xf9, 0xfa, 0xfb, 0xfc, 0xfd, 0xfe, 0xff
-    };
-  unsigned char map[256];
-
-  // Initialize the map to a one-to-one correspondence.
-  memcpy(map, map_init, 256);
+  std::unordered_map<cbl_char_t, cbl_char_t>map;
+  typedef std::unordered_map<cbl_char_t, cbl_char_t>::const_iterator map_it_t ;
 
   // The rule is, if the same character appears more than once in the
   // original (which is identifier-6), then the first occurrence of the
   // matching character in replacement is used.  So, we create the map
   // backwards.  The one closest to zero will win.
-  for(size_t i=original_size-1; i<original_size; i--)
+  for(size_t i=str_original.size()-1; i<str_original.size(); i--)
     {
-    map[ (unsigned char )psz_original[i] ] = (unsigned char )psz_replacement[i];
+    map[str_original[i]] = str_replacement[i];
     }
 
-  char *pstart = NULL;
-  const char *pend = NULL;
-  if( backward )
+  size_t leftmost_i;   // Leftmost index to replace at.
+  size_t rightmost_i;  // Rightmost+1 index to replace at.
+
+  if( !backward )
     {
-    if( before_size )
+    // This is a forward conversion.  We look for the first instance
+    // of str_after from the left.  And then we look for the first instance
+    // of str_before after that.  When there is no str_before, we move the
+    // rightmost limit to the end of str_input, as if there were no BEFORE
+    // phrase:
+
+    if( str_after.empty() )
+      {
+      // There is no AFTER phrase, so we start from the left.
+      leftmost_i = 0;
+      }
+    else
       {
-      size_t nfound = std::string(psz_input).rfind(psz_before);
-      if( nfound == std::string::npos )
+      size_t nfound = str_input.find(str_after);
+      if( nfound != std::u32string::npos )
         {
-        // The BEFORE string isn't in the input, so we will scan from
-        // the leftmost character
-        pstart = psz_input;
+        // Move the left limit to one character past the found element
+        leftmost_i = nfound + str_after.size();
         }
       else
         {
-        pstart = psz_input + nfound;
-        if( !pstart )
-          {
-          pstart = psz_input;
-          }
-        pstart += before_size;
+        // We didn't find the after phrase, so we move the left limit to the
+        // end of input, which means nothing will be replaced
+        leftmost_i = str_input.size();
         }
       }
-    else
+
+    // At this point, leftmost_i has been set to something.  Look for the
+    // BEFORE phrase somewhere to the right of it:
+
+    if( str_before.empty() )
       {
-      pstart = psz_input;
+      // There is no BEFORE phrase, so set rightmost to the end of the input
+      rightmost_i = str_input.size();
       }
-
-    if( after_size )
+    else
       {
-      size_t nfound = std::string(psz_input).rfind(psz_after);
-      if( nfound == std::string::npos )
+      // Look for BEFORE to the right of leftmost_i:
+      size_t nfound = str_input.find(str_before, leftmost_i);
+      if( nfound != std::u32string::npos )
         {
-        nfound = input_size;
+        // We found the BEFORE phrase.
+        rightmost_i = nfound;
+        }
+      else
+        {
+        // We didn't find the BEFORE phrase; IOS says to treat this situation
+        // as if there were no BEFORE phrase
+        rightmost_i = str_input.size();
         }
-      pend = psz_input + nfound;
-      }
-    if( !pend )
-      {
-      pend = psz_input+input_size;
       }
     }
   else
     {
-    if( after_size )
+    // We are doing a BACKWARD conversion.  So, we look for the AFTER phrase
+    // and use that to establish the rightmost limit.  And we look for the
+    // BEFORE to the left of AFTER phrase and use that to establish the
+    // leftmost limit
+
+    if( str_after.empty() )
       {
-      pstart = strstr(psz_input, psz_after);
+      // There is no AFTER phrase, so we set the rightmost limit to the end
+      // of the input:
+      rightmost_i = str_input.size();
       }
-    if( !pstart )
+    else
       {
-      pstart = psz_input;
+      // Start from the right and look for AFTER
+      size_t nfound = str_input.rfind(str_after, str_input.size());
+      if( nfound != std::u32string::npos )
+        {
+        // We found str_after, so its location becomes rightmost
+        rightmost_i = nfound;
+        }
+      else
+        {
+        // We didn't find str_after, so we move rightmost all the way to the
+        // left, so that nothing will ever be found.
+        rightmost_i = 0;
+        }
       }
-    pstart += after_size;
-
-    if( before_size )
+    // rightmost_i has been established, so now look for BEFORE to the left
+    // of it
+    if( str_before.empty() )
       {
-      pend = strstr(psz_input, psz_before);
+      // There is no str_before, so the left limit is all the way to the left
+      leftmost_i = 0;
       }
-    if( !pend )
+    else
       {
-      pend = psz_input + input_size;
+      size_t nfound = str_input.rfind(str_before, rightmost_i);
+      if( nfound != std::u32string::npos )
+        {
+        // We found BEFORE, so we put the left limit just to the right of
+        // where we found it:
+        leftmost_i = nfound + str_before.size();
+        }
+      else
+        {
+        // Not finding the BEFORE phrase is the same as the BEFORE phrase
+        // not having been specified:
+        leftmost_i = 0;
+        }
       }
     }
-
-  while(pstart && pstart < pend)
+  // leftmost_i and rightmost_i have been established.  Do the conversion of
+  // characters inside those limits:
+  for(size_t i=leftmost_i; i<rightmost_i; i++)
     {
-    *pstart = map[(unsigned char)*pstart];
-    pstart += 1;
+    cbl_char_t ch = str_input[i];
+    map_it_t cvt = map.find(ch);
+    if( cvt != map.end() )
+      {
+      str_input[i] = cvt->second;
+      }
     }
 
-  memcpy(input->data+input_offset, psz_input, input_size);
+  // We now take the converted str_input, and put it back into id_1:
 
-  free(psz_input       );
-  free(psz_original    );
-  free(psz_replacement );
-  free(psz_after       );
-  free(psz_before      );
+  size_t bytes_converted;
+  const char *converted = __gg__iconverter(DEFAULT_32_ENCODING,
+                                           input->encoding,
+                                           str_input.data(),
+                                           str_input.size()*width_of_utf32,
+                                           &bytes_converted) ;
+
+  // And move those characters into place in input:
+  memcpy(input->data + input_offset,
+         converted,
+         std::min(bytes_converted, input_size));
   }
 
 static void
@@ -8810,7 +9519,7 @@ move_string(cblc_field_t *field,
           // Get the charmap after the move, because it can mess with the
           // static 'to' buffer.
           charmap_t *charmap = __gg__get_charmap(field->encoding);
-          memset(to, charmap->mapped_character(ascii_space), dest_length-count);
+          charmap->memset(to, charmap->mapped_character(ascii_space), dest_length-count);
           }
         else
           {
@@ -8871,16 +9580,22 @@ static char *
 brute_force_trim(char *str, cbl_encoding_t encoding)
   {
   charmap_t *charmap = __gg__get_charmap(encoding);
+  int stride = charmap->stride();
 
   char *retval = str;
-  while( *retval == charmap->mapped_character(ascii_space) )
+
+  while(   charmap->getch(retval, size_t(0))
+        == charmap->mapped_character(ascii_space) )
     {
-    retval += 1;
+    retval += stride;
     }
-  char *p = retval + strlen(retval)-1;
-  while( p > retval && *p == charmap->mapped_character(ascii_space) )
+  char *p = retval + strlen(retval)-stride;
+  while(    p > retval
+        && (   charmap->getch(p, size_t(0))
+            == charmap->mapped_character(ascii_space)) )
     {
-    *p-- = NULLCH;
+    charmap->putch(NULLCH, p, size_t(0));
+    p -= stride;
     }
   return retval;
   }
@@ -8909,46 +9624,46 @@ __gg__string(const size_t integers[])
   size_t index_cblc = 0 ;
 
   // Pick up the target
-  const cblc_field_t *tgt   = ref[index_cblc];
+  const cblc_field_t *tgt = ref[index_cblc];
 
-  // Pick up the target encoding, which we assume controls all the parameters
+  // Pick up the target encoding, which according to the ISO specification
+  // controls all the parameters.
   cbl_encoding_t tgt_encoding = tgt->encoding;
   charmap_t *charmap = __gg__get_charmap(tgt_encoding);
-
-  char figlow[2]   = {(char)__gg__low_value_character, 0x00};
-  char fighigh[2]  = {(char)__gg__high_value_character, 0x00};
-  char figzero[2]  = {(char)charmap->mapped_character(ascii_zero), 0x00};
-  char figquote[2] = {(char)charmap->mapped_character(__gg__quote_character), 0x00};
-  char figspace[2] = {(char)charmap->mapped_character(ascii_space), 0x00};
+  int stride = charmap->stride();
 
   // Pick up the rest of the parameters
   size_t tgt_o              = ref_o[index_cblc];
   size_t tgt_s              = ref_s[index_cblc];
   index_cblc += 1;
+
   char  *dest         = reinterpret_cast<char *>(tgt->data + tgt_o);
-  ssize_t dest_length = tgt_s;
+  size_t dest_length = tgt_s/stride;
 
   // Skip over the index of POINTER:
   index_cblc += 1;
 
   // Pick up the pointer, if any
-  ssize_t pointer = 0;
+  size_t pointer = 0;
+  int overflow = 0;
   if( ref[INDEX_OF_POINTER] )
     {
     int rdigits;
-    pointer = (size_t)__gg__binary_value_from_qualified_field(
+    int p  = (size_t)__gg__binary_value_from_qualified_field(
                                                     &rdigits,
                                                     ref  [INDEX_OF_POINTER],
                                                     ref_o[INDEX_OF_POINTER],
                                                     ref_s[INDEX_OF_POINTER]
                                                     );
-    pointer -= 1;
+    if( p<0 )
+      {
+      overflow = 1;
+      }
+    pointer = p - 1;
     }
 
-  int overflow = 0;
-
   // Make sure that the destination pointer is within the destination
-  if( pointer >= 0 || pointer < dest_length )
+  if( pointer < dest_length )
     {
     // We are go for looping through identifier-2 values:
 
@@ -8959,103 +9674,69 @@ __gg__string(const size_t integers[])
 
     for( size_t i=0; i<N; i++ )
       {
+      // Pick up the number of M identifier-1 values for this list of
+      // identifier-2 values:
       size_t M = integers[index_int++];
 
       // Pick up the identifier_2 DELIMITED BY value
-      const cblc_field_t *id2   = ref[index_cblc];
-      size_t              id2_o = ref_o[index_cblc];
-      size_t              id2_s = ref_s[index_cblc];
+      std::u32string str_id2 = normalize_for_inspect_format_4(
+                                                        ref[index_cblc],
+                                                        ref_o[index_cblc],
+                                                        ref_s[index_cblc],
+                                                        tgt_encoding);
       index_cblc += 1;
 
-      char *piece;
-      const char *piece_end;
-      cbl_figconst_t figconst = (cbl_figconst_t) ( id2
-                                  ? (id2->attr & FIGCONST_MASK)
-                                  : 0 );
-      switch(figconst)
-        {
-        case low_value_e:
-          piece = figlow;
-          piece_end = piece + 1;
-          break;
-        case zero_value_e:
-          piece = figzero;
-          piece_end = piece + 1;
-          break;
-        case space_value_e:
-          piece = figspace;
-          piece_end = piece + 1;
-          break;
-        case quote_value_e:
-          piece = figquote;
-          piece_end = piece + 1;
-          break;
-        case high_value_e:
-          piece = fighigh;
-          piece_end = piece + 1;
-          break;
-        default:
-          piece = id2 ? reinterpret_cast<char *>(id2->data + id2_o) : NULL;
-          piece_end = id2 ? piece + id2_s : NULL;
-          break;
-        }
-
       for(size_t j=0; j<M; j++)
         {
-        // Pick up the next identifier-1 source string:
-        const cblc_field_t *id1 = ref[index_cblc];
-        size_t id1_o = ref_o[index_cblc];
-        size_t id1_s = ref_s[index_cblc];
+        // Pick up the next id-1 source string for the current id-2 delimiter
+        std::u32string str_id1 = normalize_for_inspect_format_4(
+                                                        ref[index_cblc],
+                                                        ref_o[index_cblc],
+                                                        ref_s[index_cblc],
+                                                        tgt_encoding);
         index_cblc += 1;
 
-        const char *whole = id1 ? reinterpret_cast<char *>(id1->data + id1_o): NULL ;
-        const char *whole_end = id1 ? whole + id1_s : NULL;
-
-        // As usual, we need to cope with figurative constants:
-        figconst = (cbl_figconst_t) ( id1 ? (id1->attr & FIGCONST_MASK) : 0 );
-        switch( figconst )
+        size_t nfound;
+        if( str_id2.size() == 0 )
           {
-          case low_value_e:
-            whole = figlow;
-            whole_end = whole + 1;
-            break;
-          case zero_value_e:
-            whole = figzero;
-            whole_end = whole + 1;
-            break;
-          case space_value_e:
-            whole = figspace;
-            whole_end = whole + 1;
-            break;
-          case quote_value_e:
-            whole = figquote;
-            whole_end = whole + 1;
-            break;
-          case high_value_e:
-            whole = fighigh;
-            whole_end = whole + 1;
-            break;
-          default:
-            break;
+          // No given delimiter means DELIMITED BY SIZE
+          nfound = str_id1.size();
           }
-
-        if(piece)
+        else
           {
-          const char *found = funky_find(   piece, piece_end,
-                                            whole, whole_end);
-          if(found)
+          // We have an id2, so we look for it inside id1
+          nfound = str_id1.find(str_id2);
+          if( nfound == std::u32string::npos )
             {
-            whole_end = found;
+            nfound = str_id1.size();
             }
           }
-        while(whole < whole_end)
+
+
           {
-          if(pointer >= dest_length)
+          // We have found id2 inside id1 at location nfound.
+
+          // Convert the UTF32 to the original encoding:
+          size_t bytes_converted;
+          char *converted = __gg__miconverter(DEFAULT_32_ENCODING,
+                                              tgt_encoding,
+                                              str_id1.data(),
+                                              nfound*width_of_utf32,
+                                              &bytes_converted );
+          size_t k = 0;
+          while(k < nfound)
             {
-            overflow = 1;
-            break;
+            if( pointer >= dest_length )
+              {
+              overflow = 1;
+              break;
+              }
+            cbl_char_t ch = charmap->getch(converted, k*stride);
+            charmap->putch(ch, dest, pointer*stride);
+            k += 1;
+            pointer += 1;
             }
-          dest[pointer++] = *whole++;
+          free(converted);
           }
         if( overflow )
           {
@@ -9149,6 +9830,8 @@ display_both(cblc_field_t  *field,
                                             display_string,
                                             conversion_length,
                                             &outlength);
+  // Trim off the trailing null, if present.
+  outlength = strlen(converted);
   write(file_descriptor,
         converted,
         outlength);
@@ -9210,6 +9893,8 @@ __gg__display_string( int            file_descriptor,
                                           str,
                                           length,
                                           &outlength);
+  // Trim off trailing NUL, if present.
+  outlength = strlen(converted);
   write( file_descriptor,
          converted,
          outlength);
@@ -9924,40 +10609,6 @@ __gg__bitwise_op( cblc_field_t *tgt,
     }
   }
 
-/*
- * Because this variable is static, the contructor runs before main and is
- * guaranted to run.
- */
-static class rt_encoding_t
-  {
-  const char *ctype, *lc_ctype;
-  public:
-  rt_encoding_t() : ctype( setlocale(LC_CTYPE, "") )
-    {
-    lc_ctype =  nl_langinfo(CODESET);
-    // Let's learn what the computer is using for the console:
-    // We need to establish the codeset used by the system console:
-  __gg__console_encoding = use_locale();
-
-    if( getenv("CODESET") )
-      {
-      fprintf(stderr, "%s:%d: ctype=%s,lc_ctype=%s\n", __func__, __LINE__,
-              ctype? ctype : "error" , lc_ctype);
-      }
-    }
-  cbl_encoding_t use_locale() const
-    {
-    auto encoding = strstr(ctype, "UTF-8") ?
-      iconv_UTF_8_e : __gg__encoding_iconv_type(lc_ctype);
-    if( getenv("CODESET") )
-      {
-      fprintf(stderr, "%s:%d: console encoding is '%s'\n", __func__, __LINE__,
-              __gg__encoding_iconv_name(encoding) );
-      }
-    return encoding;
-    }
-  } rt_encoding;
-
 extern "C"
 void
 __gg__onetime_initialization( )
@@ -10003,6 +10654,7 @@ static int
 is_numeric_display_numeric(cblc_field_t *field, size_t offset, size_t size)
   {
   charmap_t *charmap = __gg__get_charmap(field->encoding);
+  int stride = charmap->stride();
 
   int retval = 1;
   bool signable = !!(field->attr & signable_e);
@@ -10015,22 +10667,24 @@ is_numeric_display_numeric(cblc_field_t *field, size_t offset, size_t size)
   if( leading && separate && signable )
     {
     // First character must be +/-
+    cbl_char_t ch = charmap->getch(digits, size_t(0));
     if(     digits < digits_e
-        || (   *digits != charmap->mapped_character(ascii_plus)
-            && *digits !=  charmap->mapped_character(ascii_minus)) )
+        || (   ch != charmap->mapped_character(ascii_plus)
+            && ch !=  charmap->mapped_character(ascii_minus)) )
       {
       retval = 0;
       }
-    digits += 1;
+    digits += stride;
     }
 
   if( !leading && separate && signable )
     {
     // Last character must be +/-
-    digits_e -= 1;
+    digits_e -= stride;
+    cbl_char_t ch = charmap->getch(digits_e, size_t(0));
     if(     digits < digits_e
-        || (   *digits_e != charmap->mapped_character(ascii_plus)
-            && *digits_e !=  charmap->mapped_character(ascii_minus)) )
+        || (   ch != charmap->mapped_character(ascii_plus)
+            && ch !=  charmap->mapped_character(ascii_minus)) )
       {
       retval = 0;
       }
@@ -10038,10 +10692,11 @@ is_numeric_display_numeric(cblc_field_t *field, size_t offset, size_t size)
 
   if( leading && !separate && signable )
     {
-    // The first character is allowed to have a sign bit.
+    // The first character is allowed to have a sign bit. Let's make sure that
+    // making that first digit unsigned leaves us with zero through nine:
     if( digits < digits_e )
       {
-      unsigned char first_char = (unsigned char)*digits;
+      cbl_char_t first_char = charmap->getch(digits, size_t(0));
       first_char = charmap->set_digit_negative(first_char, false);
       if(  first_char < charmap->mapped_character(ascii_0)
         || first_char > charmap->mapped_character(ascii_9))
@@ -10058,7 +10713,7 @@ is_numeric_display_numeric(cblc_field_t *field, size_t offset, size_t size)
     if( digits < digits_e )
       {
       digits_e -= 1;
-      unsigned char final_char = (unsigned char)*digits_e;
+      cbl_char_t final_char = charmap->getch(digits, size_t(0));
       final_char = charmap->set_digit_negative(final_char, false);
       if(   final_char<charmap->mapped_character(ascii_0)
          || final_char>charmap->mapped_character(ascii_9) )
@@ -10151,27 +10806,94 @@ is_packed_numeric(const cblc_field_t *field, size_t offset, size_t size)
       {
       retval = 0;
       break;
-      }
-    nybble += 1;
+      }
+    nybble += 1;
+    }
+  return retval;
+  }
+
+static int
+is_alpha_a_number(const cblc_field_t *field,
+                  size_t offset,
+                  size_t size)
+  {
+  charmap_t *charmap = __gg__get_charmap(field->encoding);
+  cbl_char_t mapped_0 = charmap->mapped_character(ascii_0);
+  cbl_char_t mapped_9 = charmap->mapped_character(ascii_9);
+  int retval = 1;
+  size_t i = offset;
+  while(i < size)
+    {
+    cbl_char_t ch = charmap->getch(field->data, &i);
+    if(    (ch < mapped_0)
+        || (ch > mapped_9) )
+      {
+      retval = 0;
+      break;
+      }
+    }
+  return retval;
+  }
+
+static int
+classify_numeric_type(cblc_field_t *field,
+                      size_t offset,
+                      size_t size)
+  {
+  int retval = 1;
+  switch( field->type )
+    {
+    case FldNumericEdited:
+      retval = is_numeric_edited_numeric(field, offset, size);
+      break;
+    case FldNumericDisplay:
+      retval = is_numeric_display_numeric(field, offset, size);
+      break;
+    case FldPacked:
+      retval = is_packed_numeric(field, offset, size);
+      break;
+    case FldGroup:
+    case FldAlphanumeric:
+    case FldAlphaEdited:
+      retval = is_alpha_a_number(field, offset, size);
+      break;
+
+    case FldNumericBinary:
+    case FldNumericBin5:
+      // These need to checked for fitting into field->digits
+      break;
+
+    default:
+      fprintf(stderr,
+              "We need code for %s numeric type %d\n",
+              field->name,
+              field->type);
+      abort();
+      break;
     }
   return retval;
   }
 
 static int
-is_alpha_a_number(const cblc_field_t *field,
-                  size_t offset,
-                  size_t size)
+classify_alphabetic_type( const cblc_field_t *field,
+                          size_t offset,
+                          size_t size,
+                          int (checker)( std::wint_t ch ))
   {
-  charmap_t *charmap = __gg__get_charmap(field->encoding);
-  int mapped_0 = charmap->mapped_character(ascii_0);
-  int mapped_9 = charmap->mapped_character(ascii_9);
   int retval = 1;
-  const unsigned char *bytes = (field->data + offset);
-  for( size_t i=0; i<size; i++ )
+  charmap_t *charmap = __gg__get_charmap(DEFAULT_32_ENCODING);
+  cbl_char_t space = charmap->mapped_character(ascii_space);
+  size_t nbytes_converted;
+  const char *converted = __gg__iconverter(field->encoding,
+                                           DEFAULT_32_ENCODING,
+                                           field->data+offset,
+                                           size,
+                                           &nbytes_converted);
+  size_t i=0;
+  while( i < nbytes_converted )
     {
-    unsigned char ch = bytes[i];
-    if(    (ch < mapped_0)
-        || (ch > mapped_9) )
+    cbl_char_t ch = charmap->getch(converted, &i);
+    if( !checker(ch) && ch != space )
       {
       retval = 0;
       break;
@@ -10188,171 +10910,43 @@ __gg__classify( classify_t type,
                 size_t size)
   {
   // The default answer is TRUE
-  int retval = 1;
-
-  const unsigned char *alpha = reinterpret_cast<unsigned char *>(field->data+offset);
-
-  size_t str_length = size;
-
-  const unsigned char *omega = alpha + str_length;
+  int retval;
 
-  if(alpha >= omega)
+  if( size == 0 )
     {
-    // If there is nothing there, then it can't be TRUE.  Can it?
+    // If there is nothing there, then it can't be TRUE.
     retval = 0;
     }
-
-  unsigned char ch;
-  switch(type)
+  else
     {
-    case ClassNumericType:
+    switch(type)
       {
-      switch( field->type )
-        {
-        case FldNumericEdited:
-          retval = is_numeric_edited_numeric(field, offset, size);
-          break;
-        case FldNumericDisplay:
-          retval = is_numeric_display_numeric(field, offset, size);
-          break;
-        case FldPacked:
-          retval = is_packed_numeric(field, offset, size);
-          break;
-        case FldGroup:
-        case FldAlphanumeric:
-        case FldAlphaEdited:
-          retval = is_alpha_a_number(field, offset, size);
-          break;
-
-        case FldNumericBinary:
-        case FldNumericBin5:
-          // These need to checked for fitting into field->digits
-          break;
-
-        default:
-          fprintf(stderr,
-                  "We need code for %s numeric type %d\n",
-                  field->name,
-                  field->type);
-          abort();
-          break;
-        }
+      case ClassNumericType:
+        retval = classify_numeric_type(field, offset, size);
+        break;
 
-      break;
-      }
+      case ClassAlphabeticType:
+        retval = classify_alphabetic_type(field, offset, size, std::iswalpha);
+        break;
 
-    case ClassAlphabeticType:
-      {
-      charmap_t *charmap = __gg__get_charmap(field->encoding);
-      int mapped_space = charmap->mapped_character(ascii_space);
-      int mapped_A = charmap->mapped_character(ascii_A);
-      int mapped_I = charmap->mapped_character(ascii_I);
-      int mapped_J = charmap->mapped_character(ascii_J);
-      int mapped_R = charmap->mapped_character(ascii_R);
-      int mapped_S = charmap->mapped_character(ascii_S);
-      int mapped_Z = charmap->mapped_character(ascii_Z);
-      int mapped_a = charmap->mapped_character(ascii_a);
-      int mapped_i = charmap->mapped_character(ascii_i);
-      int mapped_j = charmap->mapped_character(ascii_j);
-      int mapped_r = charmap->mapped_character(ascii_r);
-      int mapped_s = charmap->mapped_character(ascii_s);
-      int mapped_z = charmap->mapped_character(ascii_z);
-      while(alpha < omega)
-        {
-        ch = (*alpha++)&0xFF;
-        if( ch == mapped_space )
-          {
-          continue;
-          }
-        // If necessary, this could be sped up with the creation of
-        // appropriate mapping tables.
-
-        // The oddball construction of this if() statement is a consequence of
-        // EBCDIC.  Because of peculiarities going all the back to the encoding
-        // of characters on IBM cards, where it wasn't a good idea to have too
-        // many consecutive punches in a column because it would weaken the card
-        // to the point where its structural integrity might be threatened, the
-        // coding for the letter of the alphabet are not contiguous.
-        if(!(   (    ch >= mapped_A && ch <= mapped_I)
-                ||  (ch >= mapped_J && ch <= mapped_R)
-                ||  (ch >= mapped_S && ch <= mapped_Z)
-                ||  (ch >= mapped_a && ch <= mapped_i)
-                ||  (ch >= mapped_j && ch <= mapped_r)
-                ||  (ch >= mapped_s && ch <= mapped_z) ) )
-          {
-          // The character is not alphabetic
-          retval = 0;
-          break;
-          }
-        }
-      break;
-      }
+      case ClassLowerType:
+        retval = classify_alphabetic_type(field, offset, size, std::iswlower);
+        break;
 
-    case ClassLowerType:
-      {
-      charmap_t *charmap = __gg__get_charmap(field->encoding);
-      int mapped_space = charmap->mapped_character(ascii_space);
-      int mapped_a = charmap->mapped_character(ascii_a);
-      int mapped_i = charmap->mapped_character(ascii_i);
-      int mapped_j = charmap->mapped_character(ascii_j);
-      int mapped_r = charmap->mapped_character(ascii_r);
-      int mapped_s = charmap->mapped_character(ascii_s);
-      int mapped_z = charmap->mapped_character(ascii_z);
-      while(alpha < omega)
-        {
-        ch = *alpha++;
-        if( ch == mapped_space )
-          {
-          continue;
-          }
-        if(!(   (    ch >= mapped_a && ch <= mapped_i)
-                ||  (ch >= mapped_j && ch <= mapped_r)
-                ||  (ch >= mapped_s && ch <= mapped_z) ) )
-          {
-          retval = 0;
-          break;
-          }
-        }
-      break;
-      }
+      case ClassUpperType:
+        retval = classify_alphabetic_type(field, offset, size, std::iswupper);
+        break;
 
-    case ClassUpperType:
-      {
-      charmap_t *charmap = __gg__get_charmap(field->encoding);
-      int mapped_space = charmap->mapped_character(ascii_space);
-      int mapped_A = charmap->mapped_character(ascii_A);
-      int mapped_I = charmap->mapped_character(ascii_I);
-      int mapped_J = charmap->mapped_character(ascii_J);
-      int mapped_R = charmap->mapped_character(ascii_R);
-      int mapped_S = charmap->mapped_character(ascii_S);
-      int mapped_Z = charmap->mapped_character(ascii_Z);
-      while(alpha < omega)
-        {
-        ch = *alpha++;
-        if( ch == mapped_space )
-          {
-          continue;
-          }
-        if(!(   (    ch >= mapped_A && ch <= mapped_I)
-                ||  (ch >= mapped_J && ch <= mapped_R)
-                ||  (ch >= mapped_S && ch <= mapped_Z) ) )
-          {
-          retval = 0;
-          break;
-          }
-        }
-      break;
+      case ClassInvalidType:
+      case ClassDbcsType:
+      case ClassKanjiType:
+      default:
+        warnx("%s(): Don't know how to handle %s",
+              __func__,
+              classify_str(type));
+        abort();
+        break;
       }
-
-    case ClassInvalidType:
-    case ClassDbcsType:
-    case ClassKanjiType:
-    default:
-      warnx("%s(): Don't know how to handle %s",
-            __func__,
-            classify_str(type));
-      abort();
-      break;
     }
 
   return retval;
@@ -10365,15 +10959,18 @@ __gg__convert_encoding( char *psz,
                         cbl_encoding_t to )
   {
   // This does an in-place conversion of psz
+  charmap_t *charmap_from = __gg__get_charmap(from);
+  const charmap_t *charmap = __gg__get_charmap(to);
   if( from > custom_encoding_e )
     {
     size_t charsout;
     const char *converted  = __gg__iconverter(from,
                                               to,
                                               psz,
-                                              strlen(psz),
+                                              charmap_from->strlen(psz),
                                               &charsout);
-    strcpy(psz, converted);
+    // Copy over the converted string, including the final NUL
+    memcpy(psz, converted, charsout + charmap->stride());
     }
   }
 
@@ -10406,36 +11003,49 @@ accept_envar( cblc_field_t  *tgt,
               cbl_encoding_t encoding)
   {
   int retval = 1; // 1 means we couldn't find it
+
   if( psz_name )
     {
-    tgt_length = tgt_length  ? tgt_length  : tgt->capacity;
-
-    // Pick up the environment variable name
-    char *env = strdup(psz_name);
-    massert(env);
-
+    charmap_t *charmap = __gg__get_charmap(encoding);
+    size_t psz_name_length = charmap->strlen(psz_name);
+
+    // convert psz_name to the console encoding:
+    size_t converted_length;
+    const char *converted = __gg__iconverter(encoding,
+                                       __gg__console_encoding,
+                                       psz_name,
+                                       psz_name_length,
+                                       &converted_length);
+    // Copy converted, because brute_force_trim uses charmap_t:
+    char *env = strdup(converted);
     // Get rid of leading and trailing space characters:
-    char *trimmed_env = brute_force_trim( env,
-                                          encoding );
+    const char *trimmed_env = brute_force_trim( env,
+                                          __gg__console_encoding );
 
-    // Convert the name to the console codeset:
-    __gg__convert_encoding( trimmed_env,
-                            encoding,
-                            DEFAULT_SOURCE_ENCODING);
-
-    // Pick up the environment variable, and convert it to the internal codeset
+    // Pick up the environment variable
     const char *p = getenv(trimmed_env);
+    free(env);
     if(p)
       {
-      retval = 0; // Okay
-      move_string(tgt, tgt_offset, tgt_length, p, DEFAULT_SOURCE_ENCODING);
+      retval = 0; // We found the environment variable:
+      // Convert it to the target encoding:
+      converted = __gg__iconverter(__gg__console_encoding,
+                                   tgt->encoding,
+                                   p,
+                                   strlen(p),
+                                   &converted_length);
+      __gg__field_from_string(tgt, tgt_offset, tgt_length,
+                              converted, converted_length);
+      }
+    else
+      {
+      // Leave the target unchanged, as per spec.
       }
-    free(env);
     }
 
   if( retval == 1 )
     {
-    // Could't find it
+    // Could't find that environment variable
     exception_raise(ec_argument_imp_environment_e);
     }
 
@@ -10451,10 +11061,12 @@ __gg__accept_envar( cblc_field_t *tgt,
                     size_t        name_offset,
                     size_t        name_length)
   {
-  //  We need the name to be nul-terminated:
-  char *p = static_cast<char *>(malloc(name_length + 1));
+  // We need the name to be nul-terminated, so we will tack on four extra
+  // nulls to handle characters up to 32 bits wide
+  char *p = static_cast<char *>(malloc(name_length + width_of_utf32));
   massert(p);
   memcpy(p, name->data+name_offset, name_length);
+  memset(p + name_length, 0, width_of_utf32);
   p[name_length] = '\0';
   int retval = accept_envar(tgt,
                             tgt_offset,
@@ -10600,7 +11212,15 @@ __gg__get_argc(cblc_field_t *dest, size_t offset, size_t length)
   command_line_plan_b();
   char ach[128];
   sprintf(ach, "%d", stashed_argc);
-  move_string(dest, offset, length, ach, __gg__console_encoding);
+  size_t nbytes;
+  char *converted = __gg__miconverter(__gg__console_encoding,
+                                            dest->encoding,
+                                            ach,
+                                            strlen(ach),
+                                            &nbytes );
+  __gg__field_from_string(dest, offset, length, converted, nbytes);
+  __gg__adjust_dest_size(dest, nbytes);
+  free(converted);
   }
 
 extern "C"
@@ -10635,11 +11255,15 @@ __gg__get_argv( cblc_field_t *dest,
     }
   else
     {
-    move_string(dest,
-                dest_offset,
-                dest_length,
-                stashed_argv[N],
-                DEFAULT_SOURCE_ENCODING);
+    size_t nbytes;
+    char *converted = __gg__miconverter(__gg__console_encoding,
+                                        dest->encoding,
+                                        stashed_argv[N],
+                                        strlen(stashed_argv[N]),
+                                        &nbytes );
+    __gg__field_from_string(dest, dest_offset, dest_length, converted, nbytes);
+    __gg__adjust_dest_size(dest, nbytes);
+    free(converted);
     retcode = 0;  // Okay
     }
   return retcode;
@@ -10676,7 +11300,15 @@ __gg__get_command_line( cblc_field_t *field,
   if( *retval )
     {
     flength = flength ? flength : field->capacity;
-    move_string(field, offset, flength, retval, __gg__console_encoding);
+    size_t nbytes;
+    char *converted = __gg__miconverter(__gg__console_encoding,
+                                        field->encoding,
+                                        retval,
+                                        strlen(retval),
+                                        &nbytes );
+    __gg__field_from_string(field, offset, flength, converted, nbytes);
+    __gg__adjust_dest_size(field, nbytes);
+    free(converted);
     retcode = 0; // Okay
     }
   else
@@ -10847,7 +11479,7 @@ __gg__parser_set_conditional(cblc_field_t *var, int figconst_)
 
   cbl_figconst_t figconst = (cbl_figconst_t)figconst_;
 
-  unsigned char special = charmap->mapped_character(ascii_space);
+  cbl_char_t special = charmap->mapped_character(ascii_space);
   switch(figconst)
     {
     case space_value_e:
@@ -10868,7 +11500,7 @@ __gg__parser_set_conditional(cblc_field_t *var, int figconst_)
     default:
       break;
     }
-  memset( var->data, special, var->capacity);
+  charmap->memset( var->data, special, var->capacity);
   }
 
 extern "C"
@@ -11038,7 +11670,7 @@ __gg__assign_value_from_stack(cblc_field_t *dest, __int128 parameter)
 
 extern "C"
 int
-__gg__literaln_alpha_compare(const char         *left_side,
+__gg__literaln_alpha_compare(      char         *left_side,
                              const cblc_field_t *right,
                              size_t              offset,
                              size_t              length,
@@ -11050,45 +11682,19 @@ __gg__literaln_alpha_compare(const char         *left_side,
     length = right->capacity;
     }
 
+  cbl_encoding_t right_encoding = right->encoding;
+  if( right->attr & hex_encoded_e )
+    {
+    right_encoding = iconv_CP1252_e;
+    }
   retval = compare_strings(   left_side,
                               strlen(left_side),
                               false,
                               reinterpret_cast<char *>((right->data + offset)),
                               length,
                               !!(flags & REFER_T_MOVE_ALL),
-                              right->encoding);
-  return retval;
-  }
-
-static char *
-string_in(      char *str,
-          const char *str_e,
-          const char *frag,
-          const char *frag_e)
-  {
-  // This simple routine could be improved.  Instead of using memcmp, we could
-  // use established, albeit complex, techniques of string searching:
-
-  // Looking for "abcde" in "abcdabcde", for example.  One could notice that
-  // starting at the first 'a' results in a mismatch at the second 'a'.  There
-  // is thus no need to start the second search at the first 'b' in the searched
-  // string; one could jump ahead to the second 'a' and continue from there.
-
-  // Feel free.  It won't matter in the real world; a program whose innermost
-  // loop is an UNSTRING is difficult to imagine.  But feel free.
-
-  char *retval = NULL;
-  size_t nchars = frag_e - frag;
-  char *p = str;
-  while( p + nchars <= str_e )
-    {
-    if( memcmp(p, frag, nchars) == 0 )
-      {
-      retval = p;
-      break;
-      }
-    p += 1;
-    }
+                              right_encoding,
+                              right_encoding);
   return retval;
   }
 
@@ -11137,11 +11743,16 @@ __gg__unstring( const cblc_field_t *id1,        // The string being unstring
   // Initialize the state variables
   int overflow = 0;
   int tally = 0;
-  int pointer = 1;
+  size_t pointer = 1;
   size_t nreceiver;
-  char *left  = NULL;
-  char *right = NULL;
-  int previous_delimiter;
+  size_t left;
+  size_t right;
+
+  std::u32string str_id1;
+  std::vector<std::u32string> delimiters;
+
+  const charmap_t *charmap_id1 = __gg__get_charmap(id1->encoding);
+  int stride_id1 = charmap_id1->stride();
 
   if( id8  )
     {
@@ -11155,10 +11766,16 @@ __gg__unstring( const cblc_field_t *id1,        // The string being unstring
   if( id7 )
     {
     int rdigits;
-    pointer = (int)__gg__binary_value_from_qualified_field(&rdigits,
+    int p = (int)__gg__binary_value_from_qualified_field(&rdigits,
                                                          id7,
                                                          id7_o,
                                                          id7_s);
+    if( p < 1 )
+      {
+      overflow = 1;
+      goto done;
+      }
+    pointer = p;
     }
 
   // As per the spec, if the string is zero-length; we are done.
@@ -11169,46 +11786,66 @@ __gg__unstring( const cblc_field_t *id1,        // The string being unstring
 
   // As per the spec, we have an overflow condition if pointer is out of
   // range:
-  if( pointer < 1 || pointer > (int)id1_s )
+  if( pointer > id1_s/stride_id1 )
     {
     overflow = 1;
     goto done;
     }
+  // pointer is one-based throughout; don't forget that
 
-  left  = reinterpret_cast<char *>(id1->data+id1_o) + pointer-1;
-  right = reinterpret_cast<char *>(id1->data+id1_o) + id1_s;
+  /* I thought long and hard about converting things to UTF32 for UNSTRING. It
+     was not obviously necessary.  But, darn it all, sooner or later somebody
+     is going to demand UTF-8 capability and I can't think of any obvious way
+     of being able to handle multibyte codepoints as single characters without
+     doing something like converting to UTF32.  */
 
+  str_id1 = normalize_for_inspect_format_4( id1,
+                                            id1_o,
+                                            id1_s,
+                                            id1->encoding);
+  left = pointer-1;
+  right = str_id1.size();
   if( ndelimiteds == 0 )
     {
     // There are no DELIMITED BY identifier-2 values, so we just peel off
     // characters from identifier-1 and put them into each identifier-4:
-    for( size_t i=0; i<nreceivers; i++ )
+    for( size_t receiver=0; receiver<nreceivers; receiver++ )
       {
       if( left >= right )
         {
+        // We have run out of input characters.
         break;
         }
-      size_t id_4_size = id4_s[i];
-      if( id4[i]->attr & separate_e )
+      // We will peel off enough characters to fit the receiving id4:
+      size_t id_4_size = id4_s[receiver]/stride_id1;
+      if( id4[receiver]->attr & separate_e )
         {
-        // The receiver is NumericDisplay with a separate signe
-        id_4_size = id4_s[i] - 1;
+        // The receiver is NumericDisplay with a separate sign, so, as per
+        // the spec, we reduce the size by one character.
+        id_4_size = id4_s[receiver] - 1;
         }
 
-      // Make sure id_4_size doesn't move past the end of the universe
+      // Make sure id_4_size doesn't take us past the end of the universe
       if( left + id_4_size > right )
         {
         id_4_size = right - left;
         }
 
-      // Move the data into place:
-      move_string(id4[i],
-                  id4_o[i],
-                  id4_s[i],
-                  left,
-                  id1->encoding,
-                  id_4_size);
-
+      // Convert the specified str_id1 characters back to id1->encoding.
+      size_t bytes_converted;
+      const char *converted = __gg__iconverter(DEFAULT_32_ENCODING,
+                                               id1->encoding,
+                                               &str_id1[left],
+                                               (right-left)*width_of_utf32,
+                                               &bytes_converted );
+      char *duped = static_cast<char *>(__gg__memdup(converted, bytes_converted));
+      // Put the converted string into place:
+      __gg__field_from_string(id4[receiver],
+                        id4_o[receiver],
+                        id4_s[receiver],
+                        duped,
+                        bytes_converted);
+      free(duped);
       // Update the state variables:
       left += id_4_size;
       pointer += id_4_size;
@@ -11219,170 +11856,125 @@ __gg__unstring( const cblc_field_t *id1,        // The string being unstring
 
   // Arriving here means there is some number of ndelimiteds
 
+  // Convert them to the same encoding as str_id1:
+  for( size_t i=0; i<ndelimiteds; i++ )
+    {
+    std::u32string delimiter
+        = normalize_for_inspect_format_4(id2[i],
+                                         id2_o[i],
+                                         id2_s[i],
+                                         id1->encoding);
+    delimiters.push_back(delimiter);
+    }
+
   nreceiver = 0;
-  previous_delimiter = -1;
   while( left < right )
     {
-    // Starting at 'left', see if we can find any of the delimiters
-    char *leftmost_delimiter = NULL;
-    int ifound = -1;
-    cbl_figconst_t figconst;
-    char achfigconst[1];
-    cbl_encoding_t fig_encoding;
+    // Starting at 'left', see if we can find any of the delimiters.  For each
+    // 'left' position, we look through all of the delimiters,
+
+    int    best_delimiter = -1;
+    size_t best_leftmost = right; // This is the location of the start of ALL
+    size_t best_location = right; // This is the location of the last of ALL
     for( size_t i=0; i<ndelimiteds; i++ )
       {
-      fig_encoding = id1->encoding;
-      charmap_t *charmap = __gg__get_charmap(fig_encoding);
-      char *pfound;
-      figconst = (cbl_figconst_t)(id2[i]->attr  & FIGCONST_MASK);
-
-      switch(figconst)
-        {
-        case low_value_e    :
-          achfigconst[0] = charmap->figconst_character(figconst);
-          pfound = string_in( left,
-                              right,
-                              achfigconst,
-                              achfigconst+1);
-          break;
-
-        case zero_value_e   :
-          achfigconst[0] = charmap->figconst_character(figconst);
-          pfound = string_in( left,
-                              right,
-                              achfigconst,
-                              achfigconst+1);
-          break;
-
-        case space_value_e  :
-          achfigconst[0] = charmap->figconst_character(figconst);
-          pfound = string_in( left,
-                              right,
-                              achfigconst,
-                              achfigconst+1);
-          break;
-
-        case quote_value_e  :
-          achfigconst[0] = charmap->figconst_character(figconst);
-          pfound = string_in( left,
-                              right,
-                              achfigconst,
-                              achfigconst+1);
-          break;
-
-        case high_value_e   :
-          achfigconst[0] = charmap->figconst_character(figconst);
-          pfound = string_in( left,
-                              right,
-                              achfigconst,
-                              achfigconst+1);
-          break;
-
-        case normal_value_e :
-        default:
-          pfound = string_in( left,
-                              right,
-                              reinterpret_cast<char *>(id2[i]->data+id2_o[i]),
-                              reinterpret_cast<char *>((id2[i]->data+id2_o[i])
-                                                                 + id2_s[i]));
-          break;
-        }
-
-      if( pfound )
+      std::u32string str_id2 = delimiters[i];
+      size_t nfound = str_id1.find(str_id2, left);
+      if( nfound != std::u32string::npos )
         {
         // We found a delimiter
-        if( !leftmost_delimiter || pfound < leftmost_delimiter )
+        if( nfound > best_leftmost )
           {
-          ifound = i;
-          leftmost_delimiter = pfound;
+          // This delimiter lives to the right of the best one we found so far.
+          // Ignore it, and proceed to the next delimiter.
+          continue;
           }
-        }
-      }
+        // This delimiter is the leftmost we've seen so far:
+        best_delimiter = i;
+        best_leftmost  = nfound;
+        best_location  = nfound;
 
-    if(    ifound >= 0
-        && leftmost_delimiter == left
-        && ifound == previous_delimiter )
-      {
-      // We found another instance of an ALL delimiter.
-      // So, we just skip it.
-      left    += id2_s[previous_delimiter];
-      pointer += id2_s[previous_delimiter];
-      continue;
+        if( all_flags[i] == ascii_1 )
+          {
+          // This delimiter is flagged as ALL, so we need to see if we have
+          // a flock of them:
+          size_t next = nfound + str_id2.size() ;
+          while( str_id1.find(str_id2, next ) == next )
+            {
+            // We found another consecutive one at next:
+            best_location = next;
+            next += str_id2.size();
+            }
+          }
+        }
       }
 
-    // We did not re-find an ALL DELIMITER
-    previous_delimiter = -1;
-
     // If we've used up all receivers, we bail at this point
     if( nreceiver >= nreceivers )
       {
       break;
       }
 
-    if( ifound >= 0 && all_flags[ifound] == ascii_1 )
-      {
-      // Arriving here means we found a new delimiter.
-      // If the ALL flag was on, set up to notice repeats
-      previous_delimiter = ifound;
-      }
-
-    if( !leftmost_delimiter )
+    if( best_delimiter == -1 )
       {
       // We were unable to find a delimiter, so we eat up the remainder
       // of the sender:
-      leftmost_delimiter = right;
+      best_leftmost = right;
+      best_location = right;
       }
 
     // Apply what we have learned to the next receiver:
 
-    size_t examined = leftmost_delimiter - left;
-
-    // Move the data into place:
-    move_string(id4[nreceiver],
-                id4_o[nreceiver],
-                id4_s[nreceiver],
-                left,
-                id1->encoding,
-                examined);
-
-    // Update the left pointer
-    left = leftmost_delimiter;
-    if( ifound >= 0 )
-      {
-      // And skip over the delimiter
-      left += id2_s[ifound];
-      }
-
+    size_t examined = best_leftmost - left;
+
+    // Convert the data from left to leftmost_delimiter back to encoding of
+    // id1:
+    size_t bytes_converted;
+    const char *converted = __gg__iconverter(
+                           DEFAULT_32_ENCODING,
+                           id1->encoding,
+                           &str_id1[left],
+                           (best_leftmost-left)*width_of_utf32,
+                           &bytes_converted );
+    char *duped = static_cast<char *>(__gg__memdup(converted, bytes_converted));
+    // Put the converted string into place:
+    __gg__field_from_string(id4[nreceiver],
+                      id4_o[nreceiver],
+                      id4_s[nreceiver],
+                      duped,
+                      bytes_converted);
+    free(duped);
+    // Update the left edge
+    left = best_location + (best_delimiter > -1
+                            ? delimiters[best_delimiter].size()
+                            : 0) ;
     if( id5[nreceiver] )
       {
-      if( ifound >= 0 )
-        {
-        if( figconst )
-          {
-          move_string(id5[nreceiver],
-                      id5_o[nreceiver],
-                      id5_s[nreceiver],
-                      achfigconst,
-                      fig_encoding,
-                      1);
-          }
-        else
-          {
-          move_string(id5[nreceiver],
-                      id5_o[nreceiver],
-                      id5_s[nreceiver],
-                      reinterpret_cast<char *>(id2[ifound]->data+id2_o[ifound]),
-                      id2[ifound]->encoding,
-                      id2_s[ifound]);
-          }
+      // The caller wants to know what the delimiter was:
+      if( best_delimiter > -1 )
+        {
+        converted = __gg__iconverter(
+                             DEFAULT_32_ENCODING,
+                             id1->encoding,
+                             delimiters[best_delimiter].data(),
+                             delimiters[best_delimiter].size()*width_of_utf32,
+                             &bytes_converted );
+        duped = static_cast<char *>(__gg__memdup(converted, bytes_converted));
+        __gg__field_from_string(id5[nreceiver],
+                          id5_o[nreceiver],
+                          id5_s[nreceiver],
+                          duped,
+                          bytes_converted);
+        free(duped);
         }
       else
         {
-        move_string(id5[nreceiver],
-                    id5_o[nreceiver],
-                    id5_s[nreceiver],
-                    "",
-                    DEFAULT_SOURCE_ENCODING);
+        // We didn't find a delimiter
+        __gg__field_from_string(id5[nreceiver],
+                          id5_o[nreceiver],
+                          id5_s[nreceiver],
+                          "",
+                          0);
         }
       }
 
@@ -11400,9 +11992,9 @@ __gg__unstring( const cblc_field_t *id1,        // The string being unstring
     // Update the state variables:
     tally += 1;
     nreceiver += 1;
-    if( ifound >= 0 )
+    if( best_delimiter > -1  )
       {
-      pointer += examined + id2_s[ifound];
+      pointer = left+1 ;
       }
     }
 
@@ -12023,7 +12615,6 @@ __gg__integer_from_float128(const cblc_field_t *field)
   return (__int128)fvalue;
   }
 
-
 extern "C"
 void
 __gg__adjust_dest_size(cblc_field_t *dest, size_t ncount)
@@ -12032,7 +12623,9 @@ __gg__adjust_dest_size(cblc_field_t *dest, size_t ncount)
     {
     if( dest->allocated < ncount )
       {
-      fprintf(stderr, "libgcobol.cc:__gg__adjust_dest_size(): Adjusting size upward is not possible.\n");
+      fprintf(stderr, "libgcobol.cc:__gg__adjust_dest_size(): "
+                      "Adjusting %s size upward is not possible.\n",
+                      dest->name);
       abort();
 //      dest->allocated = ncount;
 //      dest->data = (unsigned char *)realloc(dest->data, ncount);
@@ -12716,7 +13309,7 @@ __gg__just_mangle_name( const cblc_field_t  *field,
   // We need ach_name to be in ASCII:
   size_t charsout;
   const char *converted = __gg__iconverter(field->encoding,
-                                           DEFAULT_SOURCE_ENCODING,
+                                           __gg__console_encoding,
                                            PTRCAST(char, field->data),
                                            length,
                                            &charsout);
@@ -12812,6 +13405,12 @@ __gg__function_handle_from_name(int                 program_id,
                                            length,
                                            &charsout);
   memcpy(ach_name, converted, length);
+  char *p = strchr(ach_name, ascii_space);
+  if(p)
+    {
+    *p = '\0';
+    }
+  length = strlen(ach_name);
 
   // At this point we have a null-terminated ascii function name.
 
@@ -13122,7 +13721,7 @@ get_the_byte(cblc_field_t *field)
     {
     // Get the encoded character associated with the figconst
     retval = __gg__fc_char(field);
-    if(retval == -1)
+    if(retval == NOT_A_CHARACTER)
       {
       retval = (int)(unsigned char)__gg__get_integer_binary_value(field);
       }
@@ -13726,9 +14325,312 @@ __gg__fc_char(const cblc_field_t *field)
   {
   // This returns the figconst character for a field, if the field->attr
   // indicates that the field is a figconst.  Otherwise, it comes back -1
-  int retval = -1;
+  int retval = NOT_A_CHARACTER;
   charmap_t *charmap = __gg__get_charmap(field->encoding);
   cbl_figconst_t figconst = (cbl_figconst_t)(field->attr & FIGCONST_MASK);
-  retval = charmap->figconst_character(figconst);
+  if( figconst )
+    {
+    retval = charmap->figconst_character(figconst);
+    }
+  return retval;
+  }
+
+extern "C"
+void
+__gg__refer_from_string(cblc_field_t *field,
+                         size_t field_offset,
+                         size_t field_size,
+                   const char *string)
+  {
+  // 'string' has to be in the 'field' encoding.  Use this when the input
+  // might, or might not, be nul-terminated, and you don't want a
+  // nul-terminator in the data of the target field.
+  charmap_t *charmap = __gg__get_charmap(field->encoding);
+  size_t nbytes = charmap->strlen(string, field_size);
+  __gg__field_from_string(field, field_offset, field_size, string, nbytes);
+  }
+
+extern "C"
+void
+__gg__refer_from_psz(cblc_field_t *field,
+                     size_t field_offset,
+                     size_t field_size,
+               const char *string)
+  {
+  // 'string' has to be in the 'field' encoding.  Use this when the input
+  // might, or might not, be nul-terminated, and you *do* want a
+  // nul-terminator in the data of the target field if there was one in the
+  // input.
+
+  // One typical use is processing returned values from external C-style
+  // functions, which often return nul-terminated strings.
+  charmap_t *charmap = __gg__get_charmap(field->encoding);
+  size_t nbytes = charmap->strlen(string, field_size);
+  __gg__field_from_string(field,
+                    field_offset,
+                    field_size,
+                    string,
+                    nbytes);
+  }
+
+
+extern "C"
+void
+__gg__find_string(        cblc_field_t *dest,
+                    const cblc_field_t *haystack,
+                          size_t        haystack_o,
+                          size_t        haystack_s,
+                    const cblc_field_t *needle,
+                          size_t        needle_o,
+                          size_t        needle_s,
+                    const cblc_field_t *after,
+                          size_t        after_o,
+                          size_t        after_s,
+                          bool          last,
+                          bool          anycase)
+  {
+  int retval = 0;
+  cbl_encoding_t encoding = dest->encoding;
+  std::u32string str_id1 = normalize_for_inspect_format_4(
+                                                  haystack,
+                                                  haystack_o,
+                                                  haystack_s,
+                                                  encoding);
+  std::u32string str_id2 = normalize_for_inspect_format_4(
+                                                  needle,
+                                                  needle_o,
+                                                  needle_s,
+                                                  encoding);
+  if( !str_id1.empty() && !str_id2.empty() )
+    {
+    if( anycase )
+      {
+      std::transform( str_id1.begin(),
+                      str_id1.end(),
+                      str_id1.begin(),
+                      std::towlower);
+      std::transform( str_id2.begin(),
+                      str_id2.end(),
+                      str_id2.begin(),
+                      std::towlower);
+      }
+
+    // This is the count of how many to skip before returning an answer:
+    int after_count = 1;
+    size_t search_position;
+    if( last )
+      {
+      // We will search from right to left:
+      search_position = str_id1.size();
+      }
+    else
+      {
+      // We will search from left to right:
+      search_position = 0;
+      }
+
+    if( after )
+      {
+      int rdigits;
+      after_count = static_cast<int>(get_binary_value_local(&rdigits,
+                                                           after,
+                                                           after->data+after_o,
+                                                           after_s));
+      after_count += 1;
+      }
+    while( after_count-- >= 1 )
+      {
+      if( !last )
+        {
+        // We are searching from left to right
+        search_position = str_id1.find(str_id2, search_position);
+        if( search_position == std::u32string::npos )
+          {
+          // Alas, our search was fruitless
+          retval = 0;
+          break;
+          }
+        search_position += 1;
+        if( after_count == 0 )
+          {
+          // This was the find we were looking for!
+          // COBOL positions are 1-based positions, not zero-based offsets:
+          retval = search_position;
+          break;
+          }
+        }
+      else
+        {
+        // We are searching from right_to)left
+        search_position = str_id1.rfind(str_id2, search_position);
+        if( search_position == std::u32string::npos )
+          {
+          // Alas, our search was fruitless
+          break;
+          }
+        if( after_count == 0 )
+          {
+          // This was the find we were looking for!
+          // COBOL positions are 1-based positions, not zero-based offsets:
+          retval = search_position + 1;
+          break;
+          }
+        if( search_position == 0)
+          {
+          // There's no point in continuing the search leftwardsspast the
+          // left edge, and if we subtract 1 from the size_t search_position,
+          // we are not going to be happy with the result.
+          break;
+          }
+        search_position -= 1;
+        }
+      }
+    }
+  // Set the return value:
+  __gg__int128_to_field(dest,
+                        retval,
+                        NO_RDIGITS,
+                        truncation_e,
+                        NULL);
+  }
+
+static
+char *
+convert_for_convert(      cbl_encoding_t dest_enc,
+                    const cblc_field_t *input,
+                          size_t        input_o,
+                          size_t        input_s,
+                          size_t       *nbytes)
+  {
+  // iconverter takes care of untranslateable characters.
+  char *retval = __gg__miconverter(input->encoding,
+                                   dest_enc,
+                                   input->data + input_o,
+                                   input_s,
+                                   nbytes);
   return retval;
   }
+
+extern "C"
+void
+__gg__convert(cblc_field_t *dest,
+        const cblc_field_t *input,
+              size_t        input_o,
+              size_t        input_s,
+              int           /*source_format*/,
+              int           dest_format)
+  {
+  /* convert formulations: 
+   *  1. ANY to ALNUM HEX, or NAT HEX
+   *  2. HEX to BYTE
+   *  3. ALNUM to NAT, ALNUM HEX, or NAT HEX
+   *  4. NAT to ALNUM, ALNUM HEX, or NAT HEX
+   */
+
+  /* enum convert_type_t
+   *convert_alpha_e      = 0x01,
+   *convert_nat_e        = 0x02,
+   *convert_any_e        = 0x03, // i.e., both
+   *convert_byte_e       = 0x04,
+   *convert_hex_e        = 0x08, // may be combined with alpha or national
+   *convert_just_bit_e   = 0x10, 
+   *convert_just_e       = 0x18, // combined with HEX
+   *convert_rjust_bit_e  = 0x20, 
+   *convert_rjust_e      = 0x38, // combined with JUSTIFY
+   */
+  cbl_encoding_t tgt_enc = (dest_format & convert_nat_e) 
+                         ? __gg__national_encoding
+                         : __gg__display_encoding;
+  const charmap_t *charmap_tgt = __gg__get_charmap(tgt_enc);
+
+  charmap_t *charmap_dest = __gg__get_charmap(dest->encoding);
+  
+  if( dest_format & convert_hex_e )
+    {
+    size_t nbytes;
+    char *converted = convert_for_convert(tgt_enc,
+                                          input,
+                                          input_o,
+                                          input_s,
+                                          &nbytes);
+    // We output 'converted' as a stream of hexadecimal characters in the
+    // destination encoding:
+    size_t i = 0;
+    size_t d = 0;
+    while(i < nbytes && d < dest->capacity )
+      {
+      cbl_char_t byte = charmap_tgt->getch(converted, &i);
+      unsigned char hi = byte>>4;
+      hi += hi < 10 ? ascii_0 : ascii_A-10;
+      char lo = byte & 0x0F;
+      lo += lo < 10 ? ascii_0 : ascii_A-10;
+      charmap_dest->putch(charmap_dest->mapped_character(hi), dest->data, &d);
+      charmap_dest->putch(charmap_dest->mapped_character(lo), dest->data, &d);
+      }
+    free(converted);
+    __gg__adjust_dest_size(dest, d);
+    }
+  else if( dest_format == convert_byte_e )
+    {
+    // The input is a series of hexadecimal characters
+    size_t nbytes;
+    char *converted = __gg__miconverter( input->encoding,
+                                         iconv_CP1252_e,
+                                         input->data+input_o,
+                                         input_s,
+                                         &nbytes);
+    size_t i = 0;
+    size_t d = 0;
+    while(i < nbytes && d < dest->capacity )
+      {
+      // Each character is part of a string of hexadecimal digits.  So, the
+      // idea is that A1 should be turned into 1010.0001.  There is no
+      // guarantee that these characters actually are hexadecimal.
+      cbl_char_t nybble = charmap_tgt->getch(converted, &i);
+      if( nybble >= ascii_0 && nybble <= ascii_9 )
+        {
+        nybble -= ascii_0;
+        }
+      else if( nybble >= ascii_a && nybble <= ascii_f )
+        {
+        nybble -= ascii_a - 10;
+        }
+      else if( nybble >= ascii_A && nybble <= ascii_F )
+        {
+        nybble -= ascii_A - 10;
+        }
+      else
+        {
+        nybble = 0;
+        }
+
+      for(int j=0; j<4; j++)
+        {
+        if( nybble & 0x08 )
+          {
+          charmap_dest->putch(ascii_1, dest->data, &d);
+          }
+        else
+          {
+          charmap_dest->putch(ascii_0, dest->data, &d);
+          }
+        nybble <<= 1;
+        }
+      }
+    free(converted);
+    __gg__adjust_dest_size(dest, d);
+    }
+  else
+    {
+    size_t nbytes;
+    char *converted = convert_for_convert(dest->encoding,
+                                          input,
+                                          input_o,
+                                          input_s,
+                                          &nbytes);
+    size_t len = std::min(nbytes, dest->capacity);
+    memcpy(dest->data, converted, len);
+    free(converted);
+    __gg__adjust_dest_size(dest, len);
+    }
+  }
index 9fe6bf2a524939c67f020eac9a79c525eb7bbd0c..e4d0ab9fd78a7ae6e867cfd0aba29168303509da 100644 (file)
@@ -59,17 +59,17 @@ extern "C" __int128 __gg__dirty_to_binary(const char *dirty,
 extern "C" __int128 __gg__binary_value_from_field(  int *rdigits,
                                                     cblc_field_t *var);
 
-extern "C" int __gg__compare_2( cblc_field_t *left_side,
-                                unsigned char   *left_location,
-                                size_t  left_length,
-                                int     left_attr,
-                                int     left_flags,
-                                cblc_field_t *right_side,
-                                unsigned char   *right_location,
-                                size_t  right_length,
-                                int     right_attr,
-                                int     right_flags,
-                                int     second_time_through);
+extern "C" int __gg__compare_2( cblc_field_t  *left_side,
+                                unsigned char *left_location,
+                                size_t         left_length,
+                                uint64_t       left_attr,
+                                int            left_flags,
+                                cblc_field_t  *right_side,
+                                unsigned char *right_location,
+                                size_t         right_length,
+                                uint64_t       right_attr,
+                                int            right_flags,
+                                int            second_time_through);
 extern "C" void __gg__int128_to_field(cblc_field_t   *tgt,
                                       __int128        value,
                                       int             source_rdigits,
@@ -144,4 +144,17 @@ void __gg__convert_encoding_length(char *pch,
 
 const unsigned short *__gg__current_collation();
 
+// Warning:  field_from_string uses charmap_t, so you can't safely feed it
+// the results of __gg__iconverter without copying them.
+extern "C"
+void __gg__field_from_string( cblc_field_t *field,
+                              size_t field_o,
+                              size_t field_s,
+                        const char *string,
+                              size_t string_length);
+extern "C"
+void *__gg__memdup(const void *p, size_t size);
+
+enum {width_of_utf32 = 4};
+
 #endif
index 335f205068b49ad24533cb8841a4fb4db351d5bb..f3c8f911285222def19a75647f43971bf025e396 100644 (file)
@@ -1,18 +1,37 @@
+.SUFFIXES: .scr .cbl
+
+ROOT = $(shell git rev-parse --show-toplevel)
+
 #
 # Demonstrate how to generate a new COBOL binding from a man page. 
 #
 
+SCRAPE =  $(ROOT)/libgcobol/posix/bin/scrape.awk
+UDF.GEN = $(ROOT)/libgcobol/posix/bin/udf-gen
+
+posix-funcs:
+       test "$(FUNCS)"
+       for F in $(FUNCS); \
+               do man 2 $$F | col -b | $(SCRAPE) > posix-$$F.scr; \
+               $(MAKE) -f $(ROOT)/libgcobol/posix/bin/Makefile posix-$${F}.cbl; done
+
+posix-$(FUNC).cbl:
+       man 2 $(FUNC) | col -b | $(SCRAPE) | \
+       $(UDF.GEN) -D mode_t=unsigned\ long  > $@~
+       @mv $@~ $@
+
 posix-mkdir.cbl:
-       man 2 mkdir | ./scrape.awk | \
-       ../udf-gen -D mode_t=unsigned\ long  > $@~
+       man 2 mkdir | col -b | $(SCRAPE) | \
+       $(UDF.GEN) -D mode_t=unsigned\ long  > $@~
        @mv $@~ $@
 
 # ... or 
 
 posix-stat-many.scr:
-       man 2 stat | col -b | ./scrape.awk > $@~
+       man 2 stat | col -b | $(SCRAPE) > $@~
        @mv $@~ $@
 
-.scr.cbl:
-       ./udf-gen -D mode_t=unsigned\ long  $^ > $@~
+%.cbl : %.scr
+       test -s $^
+       $(UDF.GEN) $(CPPFLAGS)  $^ > $@~
        @mv $@~ $@
index 4d244d0ee3dbd4de6b9a0b6da50e37f9428521a4..ba83146b883c395a4a7d2f13a7da5c8a0edf60a7 100755 (executable)
@@ -12,6 +12,7 @@
   exit
 }
 
+# Print lines that end in dots, a comma, a brace, or a semicolon.
 /SYNOPSIS/,/DESCRIPTION/ {
   if( /([.][.]|[{},;]) *$/ ) {
     print
index 4ad9f7fffe75216cd9c5f5911225a5cb9631e0c7..35c8caba2684fa056fbaad9b5234a75845923c81 100755 (executable)
@@ -30,6 +30,7 @@
 
 import sys, os, getopt, re, copy
 from pycparser import c_parser, c_generator, c_ast, parse_file
+from pycparser.plyparser import ParseError
 
 def starify(param):
     stars = ""
@@ -283,7 +284,11 @@ for var in ('CPATH', 'C_INCLUDE_PATH'):
         cpp_args = ''
 
 def process(srcfile):
-    ast = parse_file(srcfile, use_cpp=True, cpp_args=cpp_args)
+    try:
+        ast = parse_file(srcfile, use_cpp=True, cpp_args=cpp_args)
+    except ParseError as oops:
+        print(oops, file=sys.stderr)
+        sys.exit(1)
     # print(c_generator.CGenerator().visit(ast))
     v = VisitPrototypes()
     v.visit(ast)
diff --git a/libgcobol/posix/cpy/psx-lseek.cpy b/libgcobol/posix/cpy/psx-lseek.cpy
new file mode 100644 (file)
index 0000000..e53e071
--- /dev/null
@@ -0,0 +1,14 @@
+        >> PUSH source format
+        >>SOURCE format is fixed
+
+      * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * *
+      *  This file is in the public domain.
+      *  Contributed by James K. Lowden of Cobolworx in November 2025.
+      * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * *
+
+        >>DEFINE SEEK_SET      AS     2
+        >>DEFINE SEEK_CUR      AS     4
+        >>DEFINE SEEK_END      AS     8
+
+        >> POP source format
+
diff --git a/libgcobol/posix/shim/lseek.cc b/libgcobol/posix/shim/lseek.cc
new file mode 100644 (file)
index 0000000..52407ed
--- /dev/null
@@ -0,0 +1,31 @@
+#include <sys/types.h>
+#include <unistd.h>
+
+#include <cassert>
+#include <map>
+
+#define offsetof(TYPE, MEMBER) __builtin_offsetof (TYPE, MEMBER)
+
+extern "C" {
+
+off_t 
+posix_lseek(int fd, off_t offset, int whence) {
+
+  static const std::map<int, int> whences {
+    { 2, SEEK_SET },
+    { 4, SEEK_CUR },
+    { 8, SEEK_END },
+  };    
+
+  /* 
+   * Map valid input whence value onto C standard library value. 
+   * Invalid values are passed through and rejected by lseek(2) per its documentation. 
+   * (The caller always needs to check for errors anyway.)
+   */
+  auto p = whences.find(whence);
+  if( p != whences.end() ) whence = p.second;
+
+  return lseek(fd, offset, whence);
+}
+
+} // extern "C"
index ec007a92802d92d73ffe23548fd542402754dd59..e82e3d0fe0e22754fd097a60d712aaf1d89813e6 100644 (file)
           01 Lk-fd PIC 9(8) Usage COMP.
           01 Lk-offset Binary-Long.
           01 Lk-whence Binary-Long.
+             88 SEEK-SET VALUE 2.
+             88 SEEK-CUR VALUE 4.
+             88 SEEK-END VALUE 8.
         Procedure Division using
              By Value Lk-fd,
              By Value Lk-offset,
              By Value Lk-whence
              Returning Return-Value.
-          Call "lseek" using
+          Call "posix_lseek" using
              By Value Lk-fd,
              By Value Lk-offset,
              By Value Lk-whence
index 16dab3eebaac7b588f96bf210d7f99d4ccf9794e..5285d7ab90df28688530b5eaf4132ecae5b720ad 100644 (file)
              Returning Return-Value.
 
           Move Lk-pathname To Ws-pathname.
-          Inspect Ws-pathname 
-                  Replacing Trailing Space By Low-Value
+      D   Inspect Ws-pathname 
+      D           Replacing Trailing Space By Low-Value
 
           Inspect Backward Ws-pathname Replacing Leading Space, 
-      -      By Low-Value.
+                                                      By Low-Value.
           Call "unlink" using
              By Reference Ws-pathname,
              Returning Return-Value.
index acbc510ace2afaf14607bc579b7e13e84741c139..713bc59f8634de8992bb507f0898537657aa493d 100644 (file)
@@ -299,12 +299,14 @@ __gg__binary_to_string_ascii(char *result, int digits, __int128 value)
 
 bool
 __gg__binary_to_string_encoded( char *result,
-                                int digits,
+                                size_t digits,
                                 __int128 value,
                                 cbl_encoding_t encoding)
   {
-  charmap_t *charmap = __gg__get_charmap(encoding);
-  zero_char = charmap->mapped_character(ascii_0);
+  // A non-zero retval means the number was too big to fit into the desired
+  // number of digits.
+
+  zero_char = ascii_0;
 
   // Note that this routine does not terminate the generated string with a
   // NUL.  This routine is sometimes used to generate a NumericDisplay string
@@ -317,8 +319,6 @@ __gg__binary_to_string_encoded( char *result,
     value = -value;
     }
 
-  // A non-zero retval means the number was too big to fit into the desired
-  // number of digits:
   bool retval = !!(value / mask);
 
   // mask off the bottom digits to avoid garbage when value is too large
@@ -328,7 +328,13 @@ __gg__binary_to_string_encoded( char *result,
   combined.run = digits;
   combined.val128 = value;
   string_from_combined(combined);
-  memcpy(result, combined_string, digits);
+  size_t converted_bytes;
+  const char *converted = __gg__iconverter(DEFAULT_SOURCE_ENCODING,
+                                           encoding,
+                                           combined_string,
+                                           digits,
+                                           &converted_bytes);
+  memcpy(result, converted, converted_bytes);
   return retval;
   }
 
@@ -482,8 +488,8 @@ __gg__binary_to_packed( unsigned char *result,
 extern "C"
 __int128
 __gg__numeric_display_to_binary(unsigned char *signp,
-                          const unsigned char *psz,
-                                int            n,
+                          const unsigned char *pdigits,
+                                int            ndigits,
                                 cbl_encoding_t encoding)
   {
   /*  This is specific to numeric display values.
@@ -507,12 +513,13 @@ __gg__numeric_display_to_binary(unsigned char *signp,
   /*  We are assuming that 64-bit arithmetic is faster than 128-bit arithmetic,
       and so we build up a 128-bit result in three 64-bit pieces, and assemble
       them at the end.  */
+  size_t digit_index = 0;
+  cbl_char_t ch;
 
   charmap_t *charmap = __gg__get_charmap(encoding);
-  unsigned char zero  = charmap->mapped_character(ascii_0);
-  unsigned char minus = charmap->mapped_character(ascii_minus);
+  cbl_char_t minus = charmap->mapped_character(ascii_minus);
 
-  bool is_ebcdic = (zero == 0xF0);
+  bool is_ebcdic = charmap->is_like_ebcdic();
 
   static const uint8_t lookup[] =
     {
@@ -557,7 +564,7 @@ __gg__numeric_display_to_binary(unsigned char *signp,
     0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, // 0x40
     0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, // 0x50
     0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, // 0x60
-    0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, // 0x70
+    0,1,2,3,4,5,6,7,8,9,0,0,0,0,0,0, // 0x70
     0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, // 0x80
     0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, // 0x90
     0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, // 0xa0
@@ -581,7 +588,7 @@ __gg__numeric_display_to_binary(unsigned char *signp,
   bool is_negative = false;
 
   // Pick up the original sign byte:
-  unsigned char sign_byte = *signp;
+  cbl_char_t sign_byte = charmap->getch(signp, (size_t)0);
 
   const unsigned char *mapper;
   if( is_ebcdic )
@@ -599,7 +606,7 @@ __gg__numeric_display_to_binary(unsigned char *signp,
     // forcing the zone to 0xF0.  Note that this is harmless if redundant, and
     // harmless as well if the data SIGN IS SEPARATE.  Whatever we do to this
     // byte will be undone at the end of the routine.
-    *signp |= 0xF0;
+    charmap->putch(sign_byte|0xF0, signp, (size_t)0);
     }
   else
     {
@@ -613,46 +620,49 @@ __gg__numeric_display_to_binary(unsigned char *signp,
       is_negative = true;
 
       // Make it a valid positive digit by turning the zone to 0x30
-      *signp &= 0x3F;
+      charmap->putch(sign_byte&0x3F, signp, (size_t)0);
       }
     }
 
   // Digits 1 through 18 come from the bottom:
-  if( n <= 18 )
+  if( ndigits <= 18 )
     {
-    count_bottom = n;
+    count_bottom = ndigits;
     count_middle = 0;
     count_top = 0;
     }
-  else if( n<= 36 )
+  else if( ndigits<= 36 )
     {
     count_bottom = 18;
-    count_middle = n - 18;
+    count_middle = ndigits - 18;
     count_top = 0;
     }
   else
     {
     count_bottom = 18;
     count_middle = 18;
-    count_top = n - 36;
+    count_top = ndigits - 36;
     }
 
-  if( n & 1 )
+  if( ndigits & 1 )
     {
     // We are dealing with an odd number of digits
     if( count_top )
       {
-      top = mapper[*psz++];
+      ch = charmap->getch(pdigits, &digit_index);
+      top = mapper[ch];
       count_top -= 1;
       }
     else if( count_middle )
       {
-      middle = mapper[*psz++];
+      ch = charmap->getch(pdigits, &digit_index);
+      middle = mapper[ch];
       count_middle -= 1;
       }
     else
       {
-      bottom = mapper[*psz++];
+      ch = charmap->getch(pdigits, &digit_index);
+      bottom = mapper[ch];
       count_bottom -= 1;
       }
     }
@@ -661,8 +671,10 @@ __gg__numeric_display_to_binary(unsigned char *signp,
 
   while( count_top )
     {
-    add_me  = mapper[*psz++] << 4;
-    add_me += mapper[*psz++];
+    ch = charmap->getch(pdigits, &digit_index);
+    add_me  = mapper[ch] << 4;
+    ch = charmap->getch(pdigits, &digit_index);
+    add_me += mapper[ch];
     top *= 100 ;
     top += lookup[add_me];
     count_top -= 2;
@@ -670,8 +682,10 @@ __gg__numeric_display_to_binary(unsigned char *signp,
 
   while( count_middle )
     {
-    add_me  = mapper[*psz++] << 4;
-    add_me += mapper[*psz++];
+    ch = charmap->getch(pdigits, &digit_index);
+    add_me  = mapper[ch] << 4;
+    ch = charmap->getch(pdigits, &digit_index);
+    add_me += mapper[ch];
     middle *= 100 ;
     middle += lookup[add_me];
     count_middle -= 2;
@@ -679,8 +693,10 @@ __gg__numeric_display_to_binary(unsigned char *signp,
 
   while( count_bottom )
     {
-    add_me  = mapper[*psz++] << 4;
-    add_me += mapper[*psz++];
+    ch = charmap->getch(pdigits, &digit_index);
+    add_me  = mapper[ch] << 4;
+    ch = charmap->getch(pdigits, &digit_index);
+    add_me += mapper[ch];
     bottom *= 100 ;
     bottom += lookup[add_me];
     count_bottom -= 2;
@@ -700,7 +716,7 @@ __gg__numeric_display_to_binary(unsigned char *signp,
     }
 
   // Replace the original sign byte:
-  *signp = sign_byte; // cppcheck-suppress redundantAssignment
+  charmap->putch(sign_byte, signp, (size_t)0);
   return retval;
   }
 
index 48c4874292a6a2b2a1ca57f704433f75b6491f59..0f30a9ff701c75824f37801465d08e4725ebff38 100644 (file)
@@ -36,7 +36,7 @@ bool __gg__binary_to_string_ascii(char *result,
                                   __int128 value);
 extern "C"
 bool __gg__binary_to_string_encoded(char *result,
-                                    int digits,
+                                    size_t digits, // Desired digits
                                     __int128 value,
                                     cbl_encoding_t encoding);
 
index 00fa986bda5af60839890dda1459b01cd5acec47..012f881d4cd38d59bdc44da1f0bab7ad672decad 100644 (file)
@@ -226,6 +226,9 @@ __gg__string_to_numeric_edited( char * const dest,
   // We need to expand the picture string.  We assume that the caller left
   // enough room in dest to take the expanded picture string.
 
+  // Note that we do not put on a nul terminator, so if you need one, it's
+  // your job to put it there.
+
   int dlength = expand_picture(dest, picture);
 
   // At the present time, I am taking a liberty. In principle, a 'V'