]> git.ipfire.org Git - thirdparty/gcc.git/commitdiff
cobol: Mainly extends compilation and execution in finternal-ebcdic.
authorRobert Dubner <rdubner@symas.com>
Thu, 6 Nov 2025 12:26:18 +0000 (07:26 -0500)
committerRobert Dubner <rdubner@symas.com>
Thu, 6 Nov 2025 13:57:54 +0000 (08:57 -0500)
We expanded our extended testing regime to execute many testcases in
EBCDIC mode as well as in ASCII. This exposed hundreds of problems in
both compilation (where conversions must be made between the ASCII
source code and the EBCDIC execution environment) and in run-time
functionality, where results from calls to system routines and internal
calculations that must be done in ASCII have to be converted to EBCDIC.

These changes also switch to using FIXED_WIDE_INT(128) instead of
REAL_VALUE_TYPE when initializing fixed-point COBOL variable types.
This provides for accurate initialization up to 37 digits, instead of
losing accuracy after 33 digits.

These changes also support the implementation of the COBOL DELETE FILE
(Format 2) statement.

These changes also introduce expanded support for specifying character
encodings, including support for locales.

co-authored-by: Robert Dubner <rdubner@symas.com>
co-authored-by: James K. Lowden <jklowden@cobolworx.com>

gcc/cobol/ChangeLog:

* Make-lang.in: Repair documentation generation.
* cdf.y: Changes to tokens.
* cobol1.cc (cobol_langhook_handle_option): Add comment.
* genapi.cc (function_pointer_from_name): Use data.original() for
function name.
(parser_initialize_programs): Likewise.
(cobol_compare): Make sure encodings of comparands are the same.
(move_tree): Change name of DEFAULT_SOURCE_ENCODING macro.
(parser_enter_program): Typo.
(psa_FldLiteralN): Break out dirty_to_binary() support routine.
(dirty_to_binary): Likewise.
(parser_alphabet): Rename 'alphabet' to 'collation_sequence'.
(parser_allocate): Change wsclear() to be uint32_t instead of char.
(parser_label_label): Formatting.
(parser_label_goto): Likewise.
(get_the_filename): Breakout get_the_filename(), which handles
encoding.
(parser_file_open): Likewise.
(set_up_delete_file_label): Implement DELETE FILE (Format 2).
(parser_file_delete_file): Likewise.
(parser_file_delete_on_exception): Likewise.
(parser_file_delete_not_exception): Likewise.
(parser_file_delete_end): Likewise.
(parser_call): Use data.original().
(parser_entry): Use data.original().
(mh_source_is_literalN): Convert from
sourceref.field->codeset.encoding.
(binary_initial_from_float128): Change to "binary_initial".
(binary_initial): Calculate in FIXED_WIDE_INT(128) instead of
REAL_VALUE_TYPE.
(digits_from_int128): New routine uses binary_initial.
(digits_from_float128): Removed.  Kept as comment for reference.
(initial_from_initial): Use binary_initial.
(actually_create_the_static_field): Use correct encoding.
(parser_symbol_add): Likewise.
* genapi.h (parser_file_delete_file): Implement FILE DELETE.
(parser_file_delete_on_exception): Implement FILE DELETE.
(parser_file_delete_not_exception): Implement FILE DELETE.
(parser_file_delete_end): Implement FILE DELETE.
* genmath.cc: Include charmaps.h.
* genutil.cc (get_literal_string):  Change name of
DEFAULT_SOURCE_ENCODING macro.
* parse.y: Token changes; numerous changes in support of encoding;
support for DELETE FILE.
* parse_ante.h (name_of): Use data.original().
(class prog_descr_t): Support of locales.
(current_options): Formatting.
(current_encoding):  Formatting.
(current_program_index): Formatting.
(current_section): Formatting.
(current_paragraph): Formatting.
(is_integer_literal): Use correct encoding.
(value_encoding_check): Handle encoding changes.
(alphabet_add): Likewise.
(data_division_ready): Likewise.
* scan.l: Use data.original().
* show_parse.h: Use correct encoding.
* symbols.cc (elementize): Likewise.
(symbol_elem_cmp): Handle locale.
(struct symbol_elem_t): Likewise.
(symbol_locale): Likewise.
(field_str): Change DEFAULT_SOURCE_ENCODING macro name.
(symbols_alphabet_set): Formatting.
(symbols_update): Modify consistency checks.
(symbol_locale_add): Locale support.
(cbl_locale_t::cbl_locale_t): Locale support.
(cbl_alphabet_t::cbl_alphabet_t): New structure.
(cbl_alphabet_t::reencode): Formatting.
(cbl_alphabet_t::assign): Change name of collation_sequence.
(cbl_alphabet_t::also): Likewise.
(new_literal_add): Anticipate the need for four-byte characters.
(guess_encoding): Eliminate.
(cbl_field_t::internalize): Refine conversion of data.initial to
specified encoding.
* symbols.h (enum symbol_type_t): Add SymLocale.
(struct cbl_field_data_t): Incorporate data.orig.
(struct cbl_field_t): Likewise.
(struct cbl_delete_file_t): New structure.
(struct cbl_label_t): Incorporate cbl_delete_file_t.
(struct cbl_locale_t): Support for locale.
(hex_decode): Comment.
(struct cbl_alphabet_t): Incorporate locale; change variable name
to collation_sequence.
(struct symbol_elem_t): Incorporate locale.
(cbl_locale_of): Likewise.
(cbl_alphabet_of): Likewise.
(symbol_locale_add): Likewise.
(wsclear): Type is now uint32_t instead of char.
* util.cc (symbol_type_str):  Incorporate locale.
(cbl_field_t::report_invalid_initial_value): Change test so that
pure PIC A() variables are limited to [a-zA-Z] and space.
(valid_move): Use DEFAULT_SOURCE_ENCODING macro.
(cobol_filename): Formatting.

libgcobol/ChangeLog:

* charmaps.cc (__gg__encoding_iconv_type): Eliminate trailing
'/' characters from encoding names.
(__gg__get_charmap): Switch to DEFAULT_SOURCE_ENCODING macro name.
* charmaps.h (DEFAULT_CHARMAP_SOURCE): Likewise.
(DEFAULT_SOURCE_ENCODING): Likewise.
(class charmap_t): Enhance constructor.
* encodings.h (valid_encoding): New routine.
* gcobolio.h (enum cblc_file_prior_op_t): Support DELETE FILE.
* gfileio.cc (get_filename): Likewise.
(__io__file_remove): Likewise.
(__gg__file_reopen): Likewise.
(__io__file_open): Likewise.
(gcobol_fileops): Likewise.
(__gg__file_delete): Likewise.
(__gg__file_remove): Likewise.
* intrinsic.cc (get_all_time):  Switch to DEFAULT_SOURCE_ENCODING
macro name.
(ftime_replace): Support ASCII/EBCDIC encoding.
(__gg__current_date): Likewise.
(__gg__max): Likewise.
(__gg__lower_case): Likewise.
(numval): Likewise.
(numval_c): Likewise.
(__gg__upper_case): Likewise.
(__gg__when_compiled): Likewise.
(gets_int): Likewise.
(gets_nanoseconds): Likewise.
(fill_cobol_tm): Likewise.
(floating_format_tester): Likewise.
(__gg__numval_f): Likewise.
(__gg__test_numval_f): Likewise.
(iscasematch): Likewise.
(strcasestr): Likewise.
(strcaselaststr): Likewise.
(__gg__substitute): Likewise.
(__gg__locale_compare): Support for locale.
(__gg__locale_date): Likewise.
(__gg__locale_time): Likewise.
(__gg__locale_time_from_seconds): Likewise.
* libgcobol.cc (class ec_status_t): Support for encoding.
(int128_to_field): Likewise.
(__gg__dirty_to_float): Likewise.
(format_for_display_internal): Likewise.
(get_float128): Likewise.
(compare_field_class): Likewise.
(__gg__compare_2): Likewise.
(init_var_both): Likewise.
(__gg__move): Likewise.
(display_both): Likewise.
(is_numeric_display_numeric): Likewise.
(accept_envar): Likewise.
(__gg__get_argv): Likewise.
(__gg__unstring): Likewise.
(__gg__check_fatal_exception): Likewise.
(__gg__adjust_encoding): Likewise.
(__gg__func_exception_location): Likewise.
(__gg__func_exception_statement): Likewise.
(__gg__func_exception_status): Likewise.
(__gg__func_exception_file): Likewise.
(__gg__just_mangle_name): Likewise.
(__gg__function_handle_from_name): Likewise.
(get_the_byte): Likewise.
(__gg__module_name): Likewise.
(__gg__accept_arg_value): Likewise.
* xmlparse.cc (fatalError): Formatting.
(setDocumentLocator): Formatting.
(xmlchar_of): Formatting.
(xmlParserErrors_str): Formatting.

22 files changed:
gcc/cobol/Make-lang.in
gcc/cobol/cdf.y
gcc/cobol/cobol1.cc
gcc/cobol/genapi.cc
gcc/cobol/genapi.h
gcc/cobol/genmath.cc
gcc/cobol/genutil.cc
gcc/cobol/parse.y
gcc/cobol/parse_ante.h
gcc/cobol/scan.l
gcc/cobol/show_parse.h
gcc/cobol/symbols.cc
gcc/cobol/symbols.h
gcc/cobol/util.cc
libgcobol/charmaps.cc
libgcobol/charmaps.h
libgcobol/encodings.h
libgcobol/gcobolio.h
libgcobol/gfileio.cc
libgcobol/intrinsic.cc
libgcobol/libgcobol.cc
libgcobol/xmlparse.cc

index ed6b588fe729cab946e314e20ede5ab5dc5cf2fe..9f28752f165ee6cd9d8e01bef05162cac3b02c45 100644 (file)
@@ -330,7 +330,7 @@ cobol.srcpdf: gcobol.pdf gcobol-io.pdf
        ln $^ $(srcdir)/cobol/
 
 gcobol.pdf: $(srcdir)/cobol/gcobol.1
-       groff -mdoc -T pdf  $^ > $@~
+       groff -mdoc -t -T pdf  $^ > $@~
        @mv $@~ $@
 gcobol-io.pdf: $(srcdir)/cobol/gcobol.3
        groff -mdoc -T pdf  $^ > $@~
index f72ed77c964155f757181043a5d826b3f32bd550..2d3f8192bc61b6f2ce8be6047586a63628349c77 100644 (file)
@@ -244,21 +244,21 @@ apply_cdf_turn( const exception_turn_t& turn ) {
 %type  <boolean>            DEFINED
 %token OTHER 699  PARAMETER_kw 369    "PARAMETER"
 %token OFF 688  OVERRIDE 370
-%token THRU 949
-%token TRUE_kw 814    "True"
+%token THRU 950
+%token TRUE_kw 815    "True"
 
 %token CALL_COBOL 393    "CALL"
 %token CALL_VERBATIM 394    "CALL (as C)"
 
-%token TURN 816  CHECKING 497  LOCATION 650  ON 690  WITH 843
+%token TURN 817  CHECKING 497  LOCATION 650  ON 690  WITH 844
 
-%left OR 950
-%left AND 951
-%right NOT 952
-%left '<'  '>'  '='  NE 953  LE 954  GE 955
+%left OR 951
+%left AND 952
+%right NOT 953
+%left '<'  '>'  '='  NE 954  LE 955  GE 956
 %left '-'  '+'
 %left '*'  '/'
-%right NEG 957
+%right NEG 958
 
 %define api.prefix {ydf}
 %define api.token.prefix{YDF_}
index 3146da5789988b45694188c9218e4c208ac81772..77c457d496c66b83ec505d9206efde4c33d366e8 100644 (file)
@@ -365,6 +365,7 @@ cobol_langhook_handle_option (size_t scode,
             return true;
 
         case OPT_fdefaultbyte:
+            // cobol_default_byte is an unsigned ing
             wsclear(cobol_default_byte);
             return true;
 
index 9d30dde96ebcc63406d79b9bbd40632bb5d34aa4..8c5f28ac07d86cd8b20f3b0783a8fbf8af3e290d 100644 (file)
@@ -863,8 +863,12 @@ function_pointer_from_name(const cbl_refer_t &name,
                        NULL); // And, hence, no types
 
     // Fetch the FUNCTION_DECL for that FUNCTION_TYPE
-    tree function_decl = gg_build_fn_decl(name.field->data.initial,
+    char *tname = static_cast<char *>(xmalloc(name.field->data.capacity+1));
+    memcpy(tname, name.field->data.original(), name.field->data.capacity);
+    tname[name.field->data.capacity] = '\0';
+    tree function_decl = gg_build_fn_decl(tname,
                                           fndecl_type);
+    free(tname);
     // Take the address of the function decl:
     tree address_of_function = gg_get_address_of(function_decl);
     gg_assign(function_pointer, address_of_function);
@@ -877,11 +881,11 @@ function_pointer_from_name(const cbl_refer_t &name,
       gg_assign(function_pointer,
                 gg_cast(build_pointer_type(function_type),
                         gg_call_expr( VOID_P,
-                                  "__gg__function_handle_from_literal",
-                                  build_int_cst_type(INT,
-                                    current_function->our_symbol_table_index),
-                                  gg_string_literal(name.field->data.initial),
-                                  NULL_TREE)));
+                              "__gg__function_handle_from_literal",
+                              build_int_cst_type(INT,
+                                current_function->our_symbol_table_index),
+                              gg_string_literal(name.field->data.original()),
+                              NULL_TREE)));
       }
     else
       {
@@ -919,7 +923,7 @@ parser_initialize_programs( size_t nprogs,
       if( progs[i].field->type == FldLiteralA )
         {
         SHOW_PARSE_TEXT("\"")
-        SHOW_PARSE_TEXT(progs[i].field->data.initial)
+        SHOW_PARSE_TEXT(progs[i].field->data.original())
         SHOW_PARSE_TEXT("\"")
         }
       else
@@ -2246,21 +2250,19 @@ cobol_compare(  tree return_int,
               {
               // Comparing a FldLiteralN to an alphanumeric
 
-              // CONVERSION ALERT.  lefty->field->data.initial is an ASCII
-              // string.  We want to convert it to the same encoding as the
-              // right side.
-
-              cbl_encoding_t enc_left = DEFAULT_CHARMAP_SOURCE;
-              cbl_encoding_t enc_right =
-                  static_cast<cbl_encoding_t>(righty->field->codeset.encoding);
-
+              // This next conversion may be overkill.  But just in case
+              // the encodings of the two variables are different, we are
+              // going to convert left-side text to the right-side encoding
+              cbl_encoding_t enc_left  = lefty->field->codeset.encoding;
+              cbl_encoding_t enc_right = righty->field->codeset.encoding;
               size_t outlength;
-              char *converted = __gg__iconverter(enc_left,
-                                           enc_right,
-                                           lefty->field->data.initial,
-                                           strlen(lefty->field->data.initial)+1,
-                                           &outlength );
-
+              size_t inlength = strlen(lefty->field->data.initial);
+              char *converted = __gg__iconverter(
+                                         enc_left,
+                                         enc_right,
+                                         lefty->field->data.initial,
+                                         inlength,
+                                         &outlength );
               gg_assign(  return_int, gg_call_expr(
                           INT,
                           "__gg__literaln_alpha_compare",
@@ -2458,7 +2460,7 @@ move_tree(  cbl_field_t  *dest,
       gg_call(VOID,
               "__gg__string_to_alpha_edited",
               location,
-              build_int_cst_type(INT, DEFAULT_CHARMAP_SOURCE),
+              build_int_cst_type(INT, DEFAULT_SOURCE_ENCODING),
               psz_source,
               min_length,
               member(dest->var_decl_node, "picture"),
@@ -3956,7 +3958,7 @@ parser_enter_program( const char *funcname_,
 
   if( strcmp(funcname_, "main") == 0 && this_module_has_main )
     {
-    // setting 'retval' to 1 let's the caller know that we are being told
+    // Setting 'retval' to 1 lets the caller know that we are being told
     // both to synthesize a main() entry point to duplicate GCC's default
     // behavior, and to create an explicit entry point named "main".  This will
     // eventually result in a link error (because of the duplicated entry
@@ -4164,178 +4166,197 @@ parser_init_list()
   gg_call(VOID,
           "__gg__variables_to_init",
           gg_get_address_of(array),
-          wsclear() ? gg_string_literal(wsclear()) : null_pointer_node,
+          wsclear() ? build_string_literal(1, (const char *)wsclear())
+                    : null_pointer_node,
           NULL_TREE);
   }
 
-static void
-psa_FldLiteralN(struct cbl_field_t *field )
+static
+FIXED_WIDE_INT(128)
+dirty_to_binary(const char  *instring,
+                uint32_t    &capacity,
+                uint32_t    &digits,
+                int32_t     &rdigits,
+                uint64_t    &attr)
   {
-  Analyze();
-  SHOW_PARSE
-    {
-    SHOW_PARSE_HEADER
-    SHOW_PARSE_FIELD(" ", field)
-    SHOW_PARSE_END
-    }
-  // We are constructing a completely static constant structure, based on the
-  // text string in .initial
-
-  CHECK_FIELD(field);
+  digits = 0;
+  rdigits = 0;
+  attr = 0;
 
   FIXED_WIDE_INT(128) value = 0;
 
-  do
+  // We need to convert data.initial to an FIXED_WIDE_INT(128) value
+  const char *p = instring;
+  int sign = 1;
+  if( *p == '-' )
     {
-    // This is a false do{}while, to isolate the variables:
+    attr |= signable_e;
+    sign = -1;
+    p += 1;
+    }
+  else if( *p == '+' )
+    {
+    // We set it signable so that the instruction DISPLAY +1
+    // actually outputs "+1"
+    attr |= signable_e;
+    p += 1;
+    }
 
-    // We need to convert data.initial to an FIXED_WIDE_INT(128) value
-    char *p = const_cast<char *>(field->data.initial);
-    int sign = 1;
-    if( *p == '-' )
-      {
-      field->attr |= signable_e;
-      sign = -1;
-      p += 1;
-      }
-    else if( *p == '+' )
-      {
-      // We set it signable so that the instruction DISPLAY +1
-      // actually outputs "+1"
-      field->attr |= signable_e;
-      p += 1;
-      }
+  //  We need to be able to handle
+  //  123
+  //  123.456
+  //  123E<exp>
+  //  123.456E<exp>
+  //  where <exp> can be N, +N and -N
+  //
+  //  Oh, yeah, and we're talking handling up to 32 digits, or more, so using
+  //  library routines is off the table.
+
+  int rdigit_delta = 0;
+  int exponent = 0;
+  const char *exp = strchr(p, 'E');
+  if( !exp )
+    {
+    exp = strchr(p, 'e');
+    }
+  if(exp)
+    {
+    exponent = atoi(exp+1);
+    }
 
-    //  We need to be able to handle
-    //  123
-    //  123.456
-    //  123E<exp>
-    //  123.456E<exp>
-    //  where <exp> can be N, +N and -N
-    //
-    //  Oh, yeah, and we're talking handling up to 32 digits, or more, so using
-    //  library routines is off the table.
+  // We can now calculate the value, and the number of digits and rdigits.
 
-    int digits = 0;
-    int rdigits = 0;
-    int rdigit_delta = 0;
-    int exponent = 0;
+  // We count up leading zeroes as part of the attr->digits calculation.
+  // It turns out that certain comparisons need to know the number of digits,
+  // because "IF "2" EQUAL 002" is false, while "IF "2" EQUAL 2" is true.  So,
+  // we need to count up leading zeroes.
 
-    const char *exp = strchr(p, 'E');
-    if( !exp )
+  for(;;)
+    {
+    char ch = *p++;
+    if( ch == symbol_decimal_point() )
       {
-      exp = strchr(p, 'e');
+      rdigit_delta = 1;
+      continue;
       }
-    if(exp)
+    if( ch < '0' || ch > '9' )
       {
-      exponent = atoi(exp+1);
+      break;
       }
+    digits += 1;
+    rdigits += rdigit_delta;
+    value *= 10;
+    value += ch - '0';
+    }
 
-    // We can now calculate the value, and the number of digits and rdigits.
-
-    // We count up leading zeroes as part of the attr->digits calculation.
-    // It turns out that certain comparisons need to know the number of digits,
-    // because "IF "2" EQUAL 002" is false, while "IF "2" EQUAL 2" is true.  So,
-    // we need to count up leading zeroes.
-
-    for(;;)
+  if( exponent < 0 )
+    {
+    rdigits += -exponent;
+    }
+  else
+    {
+    while(exponent--)
       {
-      char ch = *p++;
-      if( ch == symbol_decimal_point() )
+      if(rdigits)
         {
-        rdigit_delta = 1;
-        continue;
+        rdigits -= 1;
         }
-      if( ch < '0' || ch > '9' )
+      else
         {
-        break;
+        digits += 1;
+        value *= 10;
         }
-      digits += 1;
-      rdigits += rdigit_delta;
-      value *= 10;
-      value += ch - '0';
       }
+    }
 
-    if( exponent < 0 )
-      {
-      rdigits += -exponent;
-      }
-    else
-      {
-      while(exponent--)
-        {
-        if(rdigits)
-          {
-          rdigits -= 1;
-          }
-        else
-          {
-          digits += 1;
-          value *= 10;
-          }
-        }
-      }
+  if( (int32_t)digits < rdigits )
+    {
+    digits = rdigits;
+    }
 
-    if(digits < rdigits)
-      {
-      digits = rdigits;
-      }
-    field->data.digits = digits;
-    field->data.rdigits = rdigits;
+  // We now need to calculate the capacity.
 
-    // We now need to calculate the capacity.
+  unsigned int min_prec = wi::min_precision(value, UNSIGNED);
+  if( min_prec > 64 )
+    {
+    // Bytes 15 through 8 are non-zero
+    capacity = 16;
+    }
+  else if( min_prec > 32 )
+    {
+    // Bytes 7 through 4 are non-zero
+    capacity = 8;
+    }
+  else if( min_prec > 16 )
+    {
+    // Bytes 3 and 2
+    capacity = 4;
+    }
+  else if( min_prec > 8 )
+    {
+    // Byte 1 is non-zero
+    capacity = 2;
+    }
+  else
+    {
+    // The value is zero through 0xFF
+    capacity = 1;
+    }
 
-    unsigned int min_prec = wi::min_precision(value, UNSIGNED);
-    int capacity;
-    if( min_prec > 64 )
-      {
-      // Bytes 15 through 8 are non-zero
-      capacity = 16;
-      }
-    else if( min_prec > 32 )
-      {
-      // Bytes 7 through 4 are non-zero
-      capacity = 8;
-      }
-    else if( min_prec > 16 )
-      {
-      // Bytes 3 and 2
-      capacity = 4;
-      }
-    else if( min_prec > 8 )
+  value *= sign;
+
+  // One last adjustment.  The number is signable, so the binary value
+  // is going to be treated as twos complement.  That means that the highest
+  // bit has to be 1 for negative signable numbers, and 0 for positive.  If
+  // necessary, adjust capacity up by one byte so that the variable fits:
+
+  if( capacity < 16 && (attr & signable_e) )
+    {
+    FIXED_WIDE_INT(128) mask
+      = wi::set_bit_in_zero<FIXED_WIDE_INT(128)>(capacity * 8 - 1);
+    if( wi::neg_p (value) && (value & mask) == 0 )
       {
-      // Byte 1 is non-zero
-      capacity = 2;
+      capacity *= 2;
       }
-    else
+    else if( !wi::neg_p (value) && (value & mask) != 0 )
       {
-      // The value is zero through 0xFF
-      capacity = 1;
+      capacity *= 2;
       }
+    }
 
-    value *= sign;
+  return value;
+  }
 
-    // One last adjustment.  The number is signable, so the binary value
-    // is going to be treated as twos complement.  That means that the highest
-    // bit has to be 1 for negative signable numbers, and 0 for positive.  If
-    // necessary, adjust capacity up by one byte so that the variable fits:
+static void
+psa_FldLiteralN(struct cbl_field_t *field )
+  {
+  Analyze();
+  SHOW_PARSE
+    {
+    SHOW_PARSE_HEADER
+    SHOW_PARSE_FIELD(" ", field)
+    SHOW_PARSE_END
+    }
+  // We are constructing a completely static constant structure, based on the
+  // text string in .initial
 
-    if( capacity < 16 && (field->attr & signable_e) )
-      {
-      FIXED_WIDE_INT(128) mask
-        = wi::set_bit_in_zero<FIXED_WIDE_INT(128)>(capacity * 8 - 1);
-      if( wi::neg_p (value) && (value & mask) == 0 )
-        {
-        capacity *= 2;
-        }
-      else if( !wi::neg_p (value) && (value & mask) != 0 )
-        {
-        capacity *= 2;
-        }
-      }
-    field->data.capacity = capacity;
+  CHECK_FIELD(field);
 
-    }while(0);
+  uint32_t capacity;
+  uint32_t digits;
+  int32_t  rdigits;
+  uint64_t attr;
+  FIXED_WIDE_INT(128) value = dirty_to_binary(field->data.original(),
+                                              capacity,
+                                              digits,
+                                              rdigits,
+                                              attr);
+  // This is a rare occurrence of a parser_xxx call changing the entry
+  // in the symbol table.
+  field->data.capacity = capacity;
+  field->data.digits   = digits;
+  field->data.rdigits  = rdigits;
+  field->attr         |= attr;
 
   char base_name[257];
   char id_string[32] = "";
@@ -5136,9 +5157,9 @@ parser_alphabet( const cbl_alphabet_t& alphabet )
         // character i has the ordinal alphabet[i]
         unsigned char ch = i;
 
-        ach[ch] = (alphabet.alphabet[i]);
+        ach[ch] = (alphabet.collation_sequence[i]);
         gg_assign(  gg_array_value(table256, ch),
-                    build_int_cst_type(UCHAR, (alphabet.alphabet[i])) );
+                    build_int_cst_type(UCHAR, (alphabet.collation_sequence[i])) );
         }
 
       unsigned int low_char  = alphabet.low_char;
@@ -6811,7 +6832,7 @@ parser_allocate(cbl_refer_t size_or_based,
   cbl_field_t *f_working = current_options().initial_working();
   cbl_field_t *f_local   = current_options().initial_local();
 
-  int default_byte = wsclear() ? *wsclear() : -1;
+  unsigned int default_byte = wsclear() ? *wsclear() : (uint32_t)(-1);
 
   gg_call(VOID,
           "__gg__allocate",
@@ -8201,7 +8222,7 @@ parser_label_label(struct cbl_label_t *label)
     }
 
   CHECK_LABEL(label);
-  
+
 #if 1
   // At the present time, label_verify.lay is returning true, so I edited
   // out the if( !... ) to quiet cppcheck
@@ -8252,7 +8273,7 @@ parser_label_goto(struct cbl_label_t *label)
     }
 
   CHECK_LABEL(label);
-  
+
   label_verify.go_to(label);
 
   label_verify.go_to(label);
@@ -9933,6 +9954,44 @@ parser_file_open( size_t nfiles, struct cbl_file_t *files[], int mode_char )
     }
   }
 
+static
+tree get_the_filename(bool &quoted_name, const cbl_file_t *file)
+  {
+  // The cbl_file_t has a cbl_field_t *filename.  This can be a FldAlphanumeric.
+  // The runtime has a (char *)filename, so we need to
+  // do a runtime conversion.
+
+  tree psz;   // This is going to be either the name of the file, or the
+  // possible run-time environment variable that will contain
+  // the name of the file.
+
+  cbl_field_t *field_of_name = symbol_field_forward(file->filename);
+  quoted_name = false;
+  if( field_of_name->type == FldForward )
+    {
+    // The target of ASSIGN TO was unquoted, but didn't resolve to a
+    // cbl_field_t.  This means that the name of the field is an
+    // environment variable that will hold the file name
+    psz = gg_define_char_star();
+    gg_assign(psz, gg_strdup(gg_string_literal(field_of_name->name)));
+    }
+  else
+    {
+    // The name is coming from a presumably FldAlphaNumeric variable
+    psz = get_string_from(field_of_name);
+    gg_call( CHAR_P,
+             "__gg__convert_encoding",
+             psz,
+             build_int_cst_type(INT,
+                                field_of_name->codeset.encoding),
+             build_int_cst_type(INT,
+                                DEFAULT_SOURCE_ENCODING),
+             NULL_TREE);
+    quoted_name = true;
+    }
+  return psz;
+  }
+
 void
 parser_file_open( struct cbl_file_t *file, int mode_char )
   {
@@ -9985,45 +10044,15 @@ parser_file_open( struct cbl_file_t *file, int mode_char )
     TRACE1_END
     }
 
-  // The cbl_file_t has a cbl_field_t *filename.  This can be a FldAlphanumeric.
-  // The runtime has a (char *)filename, so we need to
-  // do a runtime conversion.
-
-  tree psz;   // This is going to be either the name of the file, or the
-  // possible run-time environment variable that will contain
-  // the name of the file.
-
-  cbl_field_t *field_of_name = symbol_field_forward(file->filename);
-  bool quoted_name = false;
-  if( field_of_name->type == FldForward )
-    {
-    // The target of ASSIGN TO was unquoted, but didn't resolve to a
-    // cbl_field_t.  This means that the name of the field is an
-    // environment variable that will hold the file name
-    psz = gg_define_char_star();
-    gg_assign(psz, gg_strdup(gg_string_literal(field_of_name->name)));
-    }
-  else
-    {
-    // The name is coming from a presumably FldAlphaNumeric variable
-    psz = get_string_from(field_of_name);
-    gg_call( CHAR_P,
-             "__gg__convert_encoding",
-             psz,
-             build_int_cst_type(INT,
-                                field_of_name->codeset.encoding),
-             build_int_cst_type(INT,
-                                DEFAULT_CHARMAP_SOURCE),
-             NULL_TREE);
-    quoted_name = true;
-    }
+  bool quoted_name;
+  tree pszFilename = get_the_filename(quoted_name, file);
 
   sv_is_i_o = true;
   store_location_stuff("OPEN");
   gg_call(VOID,
           "__gg__file_open",
           gg_get_address_of(file->var_decl_node),
-          psz,
+          pszFilename,
           build_int_cst_type(INT, mode_char),
           quoted_name ? integer_one_node : integer_zero_node,
           NULL_TREE);
@@ -10384,6 +10413,121 @@ parser_file_delete( struct cbl_file_t *file, bool /*sequentially*/ )
     }
   }
 
+static void
+set_up_delete_file_label(cbl_label_t *delete_file_label)
+  {
+  if( delete_file_label )
+    {
+    if( !delete_file_label->structs.delete_file )
+      {
+      delete_file_label->structs.delete_file
+        = static_cast<cbl_delete_file_t *>
+                                  (xmalloc(sizeof(struct cbl_delete_file_t)));
+      // Set up the address pairs for this clause
+      gg_create_goto_pair(
+                  &delete_file_label->structs.delete_file->over.go_to,
+                  &delete_file_label->structs.delete_file->over.label);
+      gg_create_goto_pair(
+                  &delete_file_label->structs.delete_file->exception.go_to,
+                  &delete_file_label->structs.delete_file->exception.label);
+      gg_create_goto_pair(
+                  &delete_file_label->structs.delete_file->no_exception.go_to,
+                  &delete_file_label->structs.delete_file->no_exception.label);
+      gg_create_goto_pair(
+                  &delete_file_label->structs.delete_file->bottom.go_to,
+                  &delete_file_label->structs.delete_file->bottom.label);
+      }
+    }
+  }
+
+void
+parser_file_delete_file( cbl_label_t *name,
+                         std::vector<cbl_file_t*> filenames )
+  {
+  // This removes a file from the file system.  It is distinct from the
+  // FILE DELETE statement, which deletes a record from a file.
+  SHOW_PARSE
+    {
+    SHOW_PARSE_HEADER
+    SHOW_PARSE_TEXT(" ");
+    SHOW_PARSE_TEXT(name->name);
+    for(size_t i=0; i<filenames.size(); i++)
+      {
+      SHOW_PARSE_INDENT
+      SHOW_PARSE_TEXT(filenames[i]->name)
+      }
+    SHOW_PARSE_END
+    }
+  set_up_delete_file_label(name);
+  tree there_was_an_error = gg_define_int(0);
+  for(size_t i=0; i<filenames.size(); i++)
+    {
+    bool quoted_name;
+    tree pszFilename = get_the_filename(quoted_name, filenames[i]);
+    gg_assign(there_was_an_error,
+              gg_bitwise_or(there_was_an_error,
+                            gg_call_expr(
+                            INT,
+                            "__gg__file_remove",
+                            gg_get_address_of(filenames[i]->var_decl_node),
+                            pszFilename,
+                            quoted_name ? integer_one_node : integer_zero_node,
+                            NULL_TREE)));
+    set_user_status(filenames[i]);
+    }
+  IF( there_was_an_error, eq_op, integer_zero_node )
+    {
+    // There was no error detected.
+    gg_append_statement(name->structs.delete_file->no_exception.go_to);
+    }
+  ELSE
+    {
+    // There was an error detected.
+    gg_append_statement(name->structs.delete_file->exception.go_to);
+    }
+  }
+
+void
+parser_file_delete_on_exception( cbl_label_t *name )
+  {
+  SHOW_PARSE
+    {
+    SHOW_PARSE_HEADER
+    SHOW_PARSE_TEXT(" ");
+    SHOW_PARSE_TEXT(name->name);
+    SHOW_PARSE_END
+    }
+  gg_append_statement(name->structs.delete_file->bottom.go_to);
+  gg_append_statement(name->structs.delete_file->exception.label);
+  }
+
+void
+parser_file_delete_not_exception( cbl_label_t *name )
+  {
+  SHOW_PARSE
+    {
+    SHOW_PARSE_HEADER
+    SHOW_PARSE_TEXT(" ");
+    SHOW_PARSE_TEXT(name->name);
+    SHOW_PARSE_END
+    }
+  gg_append_statement(name->structs.delete_file->bottom.go_to);
+  gg_append_statement(name->structs.delete_file->no_exception.label);
+  }
+
+void
+parser_file_delete_end( cbl_label_t *name )
+  {
+  SHOW_PARSE
+    {
+    SHOW_PARSE_HEADER
+    SHOW_PARSE_TEXT(" ");
+    SHOW_PARSE_TEXT(name->name);
+    SHOW_PARSE_END
+    }
+  gg_append_statement(name->structs.delete_file->bottom.label);
+  }
+
 void
 parser_file_rewrite(cbl_file_t *file,
                     cbl_field_t *record_area,
@@ -13639,7 +13783,7 @@ parser_call(   cbl_refer_t name,
     create_and_call(narg,
                     args,
                     NULL_TREE,
-                    name.field->data.initial,
+                    name.field->data.original(),
                     returned_value_type,
                     returned,
                     not_except);
@@ -13747,7 +13891,7 @@ parser_entry( const cbl_field_t *name, size_t nusing, cbl_ffi_arg_t *args )
     {
     SHOW_PARSE_HEADER
     SHOW_PARSE_TEXT(" ")
-    SHOW_PARSE_TEXT(name->data.initial)
+    SHOW_PARSE_TEXT(name->data.original())
     SHOW_PARSE_END
     }
 
@@ -13756,7 +13900,7 @@ parser_entry( const cbl_field_t *name, size_t nusing, cbl_ffi_arg_t *args )
 
   // Get the name of the ENTRY point.
   // cppcheck-suppress nullPointerRedundantCheck
-  char *psz = cobol_name_mangler(name->data.initial);
+  char *psz = cobol_name_mangler(name->data.original());
 
   // Create a goto/label pair.  The label will be set up here; the goto will
   // be used when we re-enter the containing function:
@@ -14642,13 +14786,12 @@ mh_source_is_literalN(cbl_refer_t &destref,
           SHOW_PARSE_TEXT("mh_source_is_literalN: __gg__psz_to_alpha_move")
           }
 
-        // We know that the encoding of the literal::initial is in ASCII
-
         // We need the data sent to __gg__psz_to_alpha_move to be in the
         // encoding of the destination
 
         size_t charsout;
-        const char *converted = __gg__iconverter(DEFAULT_CHARMAP_SOURCE,
+        const char *converted = __gg__iconverter(
+                                         sourceref.field->codeset.encoding,
                                          destref.field->codeset.encoding,
                                          sourceref.field->data.initial,
                                          strlen(sourceref.field->data.initial),
@@ -16086,54 +16229,50 @@ real_powi10 (uint32_t x)
   return pow10;
 }
 
+static
 char *
-binary_initial_from_float128(cbl_field_t *field, int rdigits,
-                             REAL_VALUE_TYPE value)
+binary_initial(cbl_field_t *field)
   {
   // This routine returns an xmalloced buffer designed to replace the
   // data.initial member of the incoming field
   char *retval = NULL;
 
-  // We need to adjust value so that it has no decimal places
-  if( rdigits )
+  uint32_t capacity;
+  uint32_t ddigits;
+  int32_t  drdigits;
+  uint64_t attr;
+  FIXED_WIDE_INT(128) value128 = dirty_to_binary(field->data.original(),
+                                                 capacity,
+                                                 ddigits,
+                                                 drdigits,
+                                                 attr);
+  int scaled_rdigits = get_scaled_rdigits(field);
+
+  int i = field->data.rdigits;
+  while( i<0 )
     {
-      REAL_VALUE_TYPE pow10 = real_powi10 (rdigits);
-      real_arithmetic (&value, MULT_EXPR, &value, &pow10);
-      real_convert (&value, TYPE_MODE (float128_type_node), &value);
+    value128 = value128/10;
+    i += 1;
     }
-  // We need to make sure that the resulting string will fit into
-  // a number with 'digits' digits
 
-  // Keep in mind that pure binary types, like BINARY-CHAR, have no digits
-  if( field->data.digits )
+  // We take the digits of value128, and put them into ach.  We line up
+  // the rdigits, and we truncate the string after desired_digits
+  while(drdigits < scaled_rdigits)
     {
-      REAL_VALUE_TYPE pow10 = real_powi10 (field->data.digits);
-      mpfr_t m0, m1;
-
-      mpfr_inits2 (REAL_MODE_FORMAT (TYPE_MODE (float128_type_node))->p,
-                   m0, m1, NULL);
-      mpfr_from_real (m0, &value, MPFR_RNDN);
-      mpfr_from_real (m1, &pow10, MPFR_RNDN);
-      mpfr_clear_flags ();
-      mpfr_fmod (m0, m0, m1, MPFR_RNDN);
-      real_from_mpfr (&value, m0,
-                      REAL_MODE_FORMAT (TYPE_MODE (float128_type_node)),
-                      MPFR_RNDN);
-      real_convert (&value, TYPE_MODE (float128_type_node), &value);
-      mpfr_clears (m0, m1, NULL);
+    value128 *= 10;
+    drdigits += 1;
+    }
+  while(drdigits > scaled_rdigits)
+    {
+    value128 = value128 / 10;
+    drdigits -= 1;
     }
-
-  real_roundeven (&value, TYPE_MODE (float128_type_node), &value);
-
-  bool fail = false;
-  FIXED_WIDE_INT(128) i
-    = FIXED_WIDE_INT(128)::from (real_to_integer (&value, &fail, 128), SIGNED);
 
   retval = static_cast<char *>(xmalloc(field->data.capacity));
   gcc_assert(retval);
   switch(field->data.capacity)
     {
-      tree type;
+    tree type;
     case 1:
     case 2:
     case 4:
@@ -16141,12 +16280,12 @@ binary_initial_from_float128(cbl_field_t *field, int rdigits,
     case 16:
       type = build_nonstandard_integer_type ( field->data.capacity
                                               * BITS_PER_UNIT, 0);
-      native_encode_wide_int (type, i, PTRCAST(unsigned char, retval),
+      native_encode_wide_int (type, value128, PTRCAST(unsigned char, retval),
                               field->data.capacity);
       break;
     default:
       fprintf(stderr,
-              "Trouble in binary_initial_from_float128 at %s() %s:%d\n",
+              "Trouble in binary_initial at %s() %s:%d\n",
               __func__,
               __FILE__,
               __LINE__);
@@ -16157,6 +16296,60 @@ binary_initial_from_float128(cbl_field_t *field, int rdigits,
   return retval;
   }
 
+static void
+digits_from_int128( char                *ach,
+                    cbl_field_t         *field,
+                    uint32_t             desired_digits,
+                    FIXED_WIDE_INT(128)  value128, // cppcheck-suppress unknownMacro
+                    int32_t              rdigits)
+  {
+  if( value128 < 0 )
+    {
+    value128 = -value128;
+    }
+
+  // 'rdigits' are the number of rdigits in value128.
+
+  int scaled_rdigits = get_scaled_rdigits(field);
+
+  int i = field->data.rdigits;
+  while( i<0 )
+    {
+    value128 = value128/10;
+    i += 1;
+    }
+
+  // We take the digits of value128, and put them into ach.  We line up
+  // the rdigits, and we truncate the string after desired_digits
+  while(rdigits < scaled_rdigits)
+    {
+    value128 *= 10;
+    rdigits += 1;
+    }
+  while(rdigits > scaled_rdigits)
+    {
+    value128 = value128 / 10;
+    rdigits -= 1;
+    }
+  char conv[128];
+  print_dec (value128, conv, SIGNED);
+  size_t len = strlen(conv);
+
+  if( len<desired_digits )
+    {
+    memset(ach, ascii_0, desired_digits - len);
+    strcpy(ach+desired_digits - len, conv);
+    }
+  else
+    {
+    strcpy(ach, conv + len-desired_digits);
+    }
+  }
+
+#if 0
+// This routine was replaced with digits_from_int1289.  However, I am choosing
+// to keep it around for a while, because it is a master class in manipulating
+// REAL_VALUE_TYPE and FIXED_WIDE_INT
 
 static void
 digits_from_float128(char *retval, cbl_field_t *field, size_t width, int rdigits, REAL_VALUE_TYPE value)
@@ -16194,8 +16387,6 @@ digits_from_float128(char *retval, cbl_field_t *field, size_t width, int rdigits
   // We convert it to a integer string of digits:
   print_dec (i, ach, SIGNED);
 
-  //fprintf(stderr, "digits_from_float128() %s %f %s ", field->name, (double)value, ach);
-
   gcc_assert( strlen(ach) <= field->data.digits );
   if( strlen(ach) < width )
     {
@@ -16203,6 +16394,7 @@ digits_from_float128(char *retval, cbl_field_t *field, size_t width, int rdigits
     }
   strcpy(retval + (width-strlen(ach)), ach);
   }
+#endif
 
 static char *
 initial_from_initial(cbl_field_t *field)
@@ -16211,10 +16403,9 @@ initial_from_initial(cbl_field_t *field)
   // This routine returns an xmalloced buffer that is intended to replace the
   // data.initial member of the incoming field.
 
-  //fprintf(stderr, "initial_from_initial %s\n", field->name);
+  //fprintf(stderr, " %s\n", field->name);
 
   char *retval = NULL;
-  int rdigits;
 
   // Let's handle the possibility of a figurative constant
   cbl_figconst_t figconst = cbl_figconst_of(field->data.initial);
@@ -16253,6 +16444,8 @@ initial_from_initial(cbl_field_t *field)
   if( field->data.etc_type == cbl_field_data_t::value_e )
     value = TREE_REAL_CST (field->data.value_of ());
 
+#if 0
+  int rdigits;
   // There is always the infuriating possibility of a P-scaled number
   if( field->attr & scaled_e )
     {
@@ -16288,17 +16481,18 @@ initial_from_initial(cbl_field_t *field)
     // Not P-scaled
     rdigits = field->data.rdigits;
     }
+#endif
 
   switch(field->type)
     {
     case FldNumericBin5:
     case FldIndex:
-      retval = binary_initial_from_float128(field, rdigits, value);
+      retval = binary_initial(field);
       break;
 
     case FldNumericBinary:
       {
-      retval = binary_initial_from_float128(field, rdigits, value);
+      retval = binary_initial(field);
       size_t left = 0;
       size_t right = field->data.capacity - 1;
       while(left < right)
@@ -16328,7 +16522,17 @@ initial_from_initial(cbl_field_t *field)
         negative = false;
         }
 
-      digits_from_float128(ach, field, field->data.digits, rdigits, value);
+      // Convert the data.initial to a __int128
+      uint32_t capacity;
+      uint32_t ddigits;
+      int32_t  drdigits;
+      uint64_t attr;
+      FIXED_WIDE_INT(128) value128 = dirty_to_binary(field->data.initial,
+                                                     capacity,
+                                                     ddigits,
+                                                     drdigits,
+                                                     attr);
+      digits_from_int128(ach, field, field->data.digits, value128, drdigits);
 
       const char *digits = ach;
       if(    (field->attr & signable_e)
@@ -16404,7 +16608,16 @@ initial_from_initial(cbl_field_t *field)
       size_t ndigits =   (field->attr & separate_e)
                        ? field->data.capacity * 2
                        : field->data.capacity * 2 - 1;
-      digits_from_float128(ach, field, ndigits, rdigits, value);
+      uint32_t capacity;
+      uint32_t ddigits;
+      int32_t  drdigits;
+      uint64_t attr;
+      FIXED_WIDE_INT(128) value128 = dirty_to_binary(field->data.initial,
+                                                     capacity,
+                                                     ddigits,
+                                                     drdigits,
+                                                     attr);
+      digits_from_int128(ach, field, ndigits, value128, drdigits);
 
       const char *digits = ach;
       for(size_t i=0; i<ndigits; i++)
@@ -16517,13 +16730,31 @@ initial_from_initial(cbl_field_t *field)
         else
           {
           size_t ndigits = field->data.capacity;
-          digits_from_float128(ach, field, ndigits, rdigits, value);
-          /* ???  This resides in libgcobol valconv.cc.  */
+          uint32_t capacity;
+          uint32_t ddigits;
+          int32_t  drdigits;
+          uint64_t attr;
+          FIXED_WIDE_INT(128) value128 = dirty_to_binary(field->data.original(),
+                                                         capacity,
+                                                         ddigits,
+                                                         drdigits,
+                                                         attr);
+          digits_from_int128(ach, field, ndigits, value128, drdigits);
+
+          // __gg__string_to_numeric_edited operates in ASCII space:
           __gg__string_to_numeric_edited( retval,
                                           ach,
                                           field->data.rdigits,
                                           negative,
                                           field->data.picture);
+          // So now we convert it to the target encoding:
+          size_t nbytes;
+          const char *converted = __gg__iconverter(DEFAULT_SOURCE_ENCODING,
+                                                   field->codeset.encoding,
+                                                   retval,
+                                                   strlen(retval),
+                                                   &nbytes);
+          strcpy(retval, converted);
           }
         }
       break;
@@ -16556,10 +16787,32 @@ initial_from_initial(cbl_field_t *field)
 
     case FldLiteralN:
       {
-////      retval = static_cast<char *>(xmalloc(field->data.capacity+1));
-////      gcc_assert(retval);
-////      memcpy(retval, field->data.initial, field->data.capacity);
-////      retval[field->data.capacity] = '\0';
+      // This requires annotation.
+
+      // The compiler originally used ASCII for field->data.initial. Later we
+      // expanded the field with the addition of the codeset.encoding
+      // For consistency in the parser processing, the FldLiteralN is arriving
+      // with the Object-Computer's character encoding, and field->data.initial
+      // is showing up encoded.
+
+      // But on the run-time side, if the initial string is needed, it is
+      // invariably more useful in ASCII.  Consider converting that string to
+      // a floating-point value, for example.
+
+      // So, we are going to convert the data.initial string back to ASCII
+      // here.  Later on, when we establish the run-time encoding, we will
+      // check for FldLiteralN and set that to ASCII as well. See
+      // actually_create_the_static_field().
+
+      size_t nbytes;
+      const char *converted = __gg__iconverter(field->codeset.encoding,
+                                               DEFAULT_SOURCE_ENCODING,
+                                               field->data.initial,
+                                               strlen(field->data.initial),
+                                               &nbytes);
+      retval = static_cast<char *>(xmalloc(strlen(field->data.initial)+1));
+      gcc_assert(retval);
+      strcpy(retval, converted);
       break;
       }
 
@@ -16716,9 +16969,14 @@ actually_create_the_static_field( cbl_field_t *new_var,
   next_field = TREE_CHAIN(next_field);
 
   //  INT,     "encoding",
+  //  For FldLiteralN we force the encoding to be ASCII.
+  //  See initial_from_initial() for an explanation.
   CONSTRUCTOR_APPEND_ELT(CONSTRUCTOR_ELTS(constr),
                         next_field,
-                        build_int_cst_type(INT, new_var->codeset.encoding));
+                        build_int_cst_type(INT,
+                                           new_var->type == FldLiteralN ?
+                                                DEFAULT_SOURCE_ENCODING
+                                              : new_var->codeset.encoding));
   next_field = TREE_CHAIN(next_field);
 
   //  INT,     "alphabet",
@@ -17643,6 +17901,10 @@ parser_symbol_add(struct cbl_field_t *new_var )
           length_of_initial_string = new_var->data.capacity+1;
           break;
 
+        case FldLiteralN:
+          length_of_initial_string = strlen(new_initial)+1;
+          break;
+
         default:
           length_of_initial_string = new_var->data.capacity;
           break;
index 6582d2e8898d6ac388233393b269e63568e9cb26..802bba7594468ac18606ea0b4307d67dae30d4b3 100644 (file)
@@ -400,6 +400,12 @@ parser_file_rewrite( cbl_file_t *file, cbl_field_t *field,
 void
 parser_file_delete( cbl_file_t *file, bool sequentially );
 
+void parser_file_delete_file( cbl_label_t *name,
+                              std::vector<cbl_file_t*> filenames );
+void parser_file_delete_on_exception( cbl_label_t *name );
+void parser_file_delete_not_exception( cbl_label_t *name );
+void parser_file_delete_end( cbl_label_t *name );
+
 #if condition_lists
 struct cbl_conditional_t {
   cbl_field_t *tgt;
index 320e6bf43664fa0bc0ca2590e973c5631db69c77..7d6ae8c93d48538d5b262c90949b6473bba659f7 100644 (file)
@@ -42,6 +42,7 @@
 #include "gengen.h"
 #include "structs.h"
 #include "../../libgcobol/gcobolio.h"
+#include "../../libgcobol/charmaps.h"
 #include "show_parse.h"
 
 void
index 56b6b83728b55c3c5e5ed5186a6deaf74dd0ca06..63f37f68806b6f600f4889200f3f65e82c7fb25b 100644 (file)
@@ -1744,7 +1744,7 @@ get_literal_string(cbl_field_t *field)
   char *buffer = static_cast<char *>(xcalloc(1, buffer_length));
 
   size_t charsout;
-  const char *converted = __gg__iconverter(DEFAULT_CHARMAP_SOURCE,
+  const char *converted = __gg__iconverter(DEFAULT_SOURCE_ENCODING,
                                      field->codeset.encoding,
                                      field->data.initial,
                                      field->data.capacity,
index 9187a59a3cfcd97b70b7e5e12f9145d657ea27fb..d54a686511f5bef5e1eac22a98aea8ef45573d9c 100644 (file)
@@ -51,7 +51,7 @@
     accept_envar_e,
   };
 
-  struct collating_an_t {
+  struct coll_alphanat_t {
     const char *alpha, *national; 
   };
 
@@ -575,7 +575,7 @@ class locale_tgt_t {
                        RD RECORD RECORDING RECORDS RECURSIVE
                        REDEFINES REEL REFERENCE RELATIVE REM REMAINDER REMARKS
                        REMOVAL RENAMES  REPLACE REPLACING REPORT REPORTING REPORTS
-                       REPOSITORY RERUN RESERVE RESTRICTED RESUME
+                       REPOSITORY RERUN RESERVE RESTRICTED RESUME RETRY
                        REVERSE REVERSED REWIND RF RH RIGHT ROUNDED RUN
 
                        SAME SCREEN SD
@@ -702,8 +702,8 @@ class locale_tgt_t {
 %type   <number>        open_io alphabet_etc
 %type   <special_type>  device_name
 %type   <string>        numed  context_word ctx_name locale_spec
-%type   <collating_sequences> collating_sequences collating_an
-%type   <collating_name> collating_an 
+%type   <char_class_locales> char_class_locales coll_alphanat
+%type   <collating_name> coll_alphanat 
 %type   <literal>       namestr alphabet_lit program_as repo_as
 %type   <field>         perform_cond kind_of_name
 %type   <refer>         alloc_ret
@@ -738,6 +738,9 @@ class locale_tgt_t {
                         relative_key_clause reserve_clause sharing_clause
 
 %type   <file>          filename read_body write_body delete_body
+%type   <label>         delete_file_body
+%type   <error>         delete_error delete_except delete_excepts
+                                                
 %type   <file>         start_impl start_cond start_body
 %type   <rewrite_t>     rewrite_body
 %type   <min_max>       record_vary rec_contains from_to record_desc
@@ -833,6 +836,7 @@ class locale_tgt_t {
                         global is_global anycase backward
                         end_display
                         exh_changed exh_named
+                        override
 %type   <number>        mistake globally first_last
 %type   <io_mode>   io_mode
 
@@ -874,6 +878,7 @@ class locale_tgt_t {
 %type  <opt_init_sect>         opt_init_sect
 %type  <number>        opt_init_value
 %type  <number>        locale_current loc_category user_default
+%type   <string>        locale_name
 %type   <token_list>    loc_categories locale_tgt
 %type  <opt_round>     rounded round_between rounded_type rounded_mode
 %type  <opt_arith>     opt_arith_type
@@ -901,7 +906,7 @@ class locale_tgt_t {
     struct { YYLTYPE loc; int token; literal_t name; } prog_end;
     struct { int token; special_name_t id; } special_type;
     struct { char locale_type; const char * name; } locale_phrase;
-             collating_an_t collating_sequences;
+             coll_alphanat_t char_class_locales;
     struct collating_name_t { int token; const char *name; } collating_name;
     struct { size_t isym; cbl_encoding_t encoding; } codeset;
     struct { cbl_field_type_t type;
@@ -2371,6 +2376,23 @@ config_paragraphs: config_paragraph
 config_paragraph:
                 SPECIAL_NAMES '.'
         |       SPECIAL_NAMES '.' special_names '.'
+                {
+                  std::reverse_iterator<symbol_elem_t *>
+                                        p(symbols_end()),
+                                     pend(symbols_begin(PROGRAM));
+                  for( ++p; p != pend; p++ ) {
+                    if( p->type == SymAlphabet ) {
+                      const auto& alphabet = *cbl_alphabet_of(&*p);
+                      if( alphabet.encoding == no_encoding_e ) {
+                        assert(alphabet.locale != 0 );
+                        const auto& missing = *cbl_locale_of(symbol_at(alphabet.locale));
+                        error_msg(alphabet.loc,
+                          "ALPHABET %qs references LOCALE %qs, which is not defined",
+                                  alphabet.name, missing.name);
+                      }
+                    }
+                  }
+                }
         |       SOURCE_COMPUTER  '.' 
         |       SOURCE_COMPUTER  '.' NAME '.'
         |       SOURCE_COMPUTER  '.' NAME with_debug '.'
@@ -2507,19 +2529,36 @@ with_debug:     with DEBUGGING MODE {
                 ;
 
 collations:     %empty
-        |       collation_classification
-        |       collation_sequence
-        |       collation_classification collation_sequence
-        |       collation_sequence collation_classification
+        |       char_classification
+        |       collating_sequence
+        |       char_classification collating_sequence
+        |       collating_sequence char_classification
                 ;
-collation_classification:
-                character CLASSIFICATION collating_sequences[seq]
+char_classification:
+                character CLASSIFICATION char_class_locales[seq]
                 {
-                  warn_msg(@seq, "CHARACTER CLASSIFICATION ignored");
+                  if( $seq.alpha ) {
+                    auto e = symbol_locale(PROGRAM, $seq.alpha);
+                    if( !e ) {
+                      error_msg(@seq, "no LOCALE defined as %qs", $seq.alpha);
+                    } else {
+                      auto& encoding = cbl_locale_of(e)->encoding;
+                      current.alpha_encoding(symbol_index(e), encoding);
+                    }
+                  }
+                  if( $seq.national ) {
+                    auto e = symbol_locale(PROGRAM, $seq.national);
+                    if( !e ) {
+                      error_msg(@seq, "no LOCALE defined as %qs", $seq.national);
+                    } else {
+                      auto& encoding = cbl_locale_of(e)->encoding;
+                      current.national_encoding(symbol_index(e), encoding);
+                    }
+                  }
                 }
                 ;
-collation_sequence:
-                program_kw collating SEQUENCE collating_sequences[seq]
+collating_sequence:
+                program_kw collating SEQUENCE char_class_locales[seq]
                 {
                   if( !current.collating_sequence($seq.alpha) ) {
                     error_msg(@seq, "collating sequence already defined as '%s'",
@@ -2529,20 +2568,20 @@ collation_sequence:
                 }
                 ;
 
-collating_sequences:
+char_class_locales:
                 is NAME[name] {
                   $$.alpha = $name;
                   $$.national = nullptr;
                 }
-        |       collating_ans { $$ = $1; }
+        |       coll_alphanats { $$ = $1; }
                 ;
-collating_ans:  collating_an[encoding] {
-                  $$ = collating_an_t();
+coll_alphanats: coll_alphanat[encoding] {
+                  $$ = coll_alphanat_t();
                   const char **pname =
                     $encoding.token == ALPHANUMERIC? &$$.alpha : &$$.national;
                   *pname = $encoding.name;
                 }
-        |       collating_ans collating_an[encoding]
+        |       coll_alphanats coll_alphanat[encoding]
                 {
                   const char **pname =
                     $encoding.token == ALPHANUMERIC? &$$.alpha : &$$.national;
@@ -2553,7 +2592,7 @@ collating_ans:  collating_an[encoding] {
                   *pname = $encoding.name;
                 }
                 ;
-collating_an:   for alphanational is locale_phrase[locale] {
+coll_alphanat:  for alphanational is locale_phrase[locale] {
                   $$.token = $alphanational;
                   $$.name = $locale.name;
                   if( ! $locale.name ) {
@@ -2568,7 +2607,6 @@ collating_an:   for alphanational is locale_phrase[locale] {
                                       keyword_str($$.token),
                                       locale_name);
                   }
-                  warn_msg(@locale, "LOCALE phrase ignored");
                 }
                 ;             
 
@@ -2643,9 +2681,20 @@ special_name:   dev_mnemonic
                 {
                   symbol_decimal_point_set(',');
                 }
-        |       LOCALE NAME is locale_spec[spec] {
-                  current.locale($NAME, $spec);
-                  cbl_unimplementedw("sorry, unimplemented: LOCALE %qs", $spec);
+        |       LOCALE NAME is locale_spec[spec]
+                {
+                  cbl_locale_t locale($NAME, $spec);
+                  if( locale.encoding == no_encoding_e ) {
+                    error_msg(@NAME, "invalid iconv LOCALE name %qs", $spec);
+                    YYERROR;
+                  }
+                  if( locale.encoding == UTF8_e ) {
+                    cbl_unimplemented("UTF-8");
+                    YYERROR; 
+                  }
+                  if( ! current.locale_add(locale) ) {
+                    error_msg(@NAME, "%qs already defined as LOCALE name", $NAME);
+                  }
                 }
                 ;
         |       upsi
@@ -2655,6 +2704,8 @@ special_name:   dev_mnemonic
                 }
                 ;
 locale_spec:    NAME    { $$ = $1; }
+        |       UTF_8   { static char s[] ="UTF-8";  $$ = s; }
+        |       UTF_16  { static char s[] ="UTF-16"; $$ = s; }
         |       LITERAL { $$ = string_of($1); }
 
                 ;
@@ -2746,14 +2797,16 @@ device_name:    SYSIN           { $$.token = SYSIN; $$.id = SYSIN_e; }
 alphabet_name:  STANDARD_ALPHABET  { $$ = alphabet_add(@1, CP1252_e); }
         |       NATIVE             { $$ = alphabet_add(@1, EBCDIC_e); }
         |       EBCDIC             { $$ = alphabet_add(@1, EBCDIC_e); }
-        |       LOCALE ctx_name
+        |       LOCALE locale_name[name]
                 {
-                  auto e = symbol_alphabet(PROGRAM, $ctx_name);
+                  auto e = symbol_locale(PROGRAM, $name);                  
                   if( !e ) {
-                    error_msg(@ctx_name, "no such ALPHABET %qs", $ctx_name);
-                    YYERROR;
-                  }
-                  $$ = cbl_alphabet_of(e);
+                    dbgmsg("no such LOCALE yet %s", $name);
+                    cbl_locale_t locale($name); // locale is named but not defined 
+                    e = symbol_locale_add(PROGRAM, &locale); 
+                  } 
+                  cbl_alphabet_t alphabet( @name, symbol_index(e), $name);
+                  $$ = alphabet_add(alphabet);
                 }
         |       alphabet_seqs
                 {
@@ -3592,7 +3645,7 @@ const_value:    cce_expr
 
 value78:        literalism
                 {
-                  cbl_field_data_t data = {};
+                  cbl_field_data_t data;
                  data.capacity = capacity_cast(strlen($1.data));
                   data.initial = $1.data;
                   $$.encoding = $1.encoding;
@@ -3600,13 +3653,15 @@ value78:        literalism
                 }
         |       const_value
                 {
-                  cbl_field_data_t data = {};
+                  cbl_field_data_t data;
                  data = build_real (float128_type_node, $1);
+                  $$.encoding = current_encoding('A');
                   $$.data = new cbl_field_data_t(data);
                 }
         |       reserved_value[value]
                 {
                  const auto field = constant_of(constant_index($value));
+                  $$.encoding = current_encoding('A');
                   $$.data = new cbl_field_data_t(field->data);
                 }
 
@@ -3638,6 +3693,7 @@ data_descr1:    level_name
                   field.type = FldLiteralN;
                  field.data = build_real (float128_type_node, $const_value);
                   field.data.initial = string_of($const_value);
+                  field.codeset.set();
 
                   if( !cdf_value(field.name, cdfval_t($const_value)) ) {
                     error_msg(@1, "%s was defined by CDF", field.name);
@@ -3674,13 +3730,12 @@ data_descr1:    level_name
                   if( !cdf_value(field.name, $lit.data) ) {
                     error_msg(@1, "%s was defined by CDF", field.name);
                   }
-                  if( ! field.codeset.valid() ) {
-                    if( ! field.codeset.set(field.codeset.standard_internal.type) ) {
-                      error_msg(@lit, "CONSTANT inconsistent with encoding %s",
-                                cbl_alphabet_t::encoding_str(field.codeset.encoding));
-                    }
+                  if( ! field.codeset.set() ) {
+                    error_msg(@lit, "CONSTANT inconsistent with encoding %s",
+                              cbl_alphabet_t::encoding_str(field.codeset.encoding));
                   }
-                  value_encoding_check(@lit, $1, $lit.encoding);
+                  
+                  value_encoding_check(@lit, $1);
                 }
         |       level_name CONSTANT is_global FROM NAME
                 {
@@ -3718,6 +3773,7 @@ data_descr1:    level_name
                   } else {
                     field.type = FldLiteralN;
                     field.data.initial = string_of(field.data.value_of());
+                    field.codeset.set($data.encoding);
                     if( !cdf_value(field.name, field.as_integer()) ) {
                       yywarn("%s was defined by CDF", field.name);
                     }
@@ -3975,6 +4031,15 @@ data_descr1:    level_name
                   // Verify VALUE
                   $field->report_invalid_initial_value(@data_clauses);
 
+                  bool numerical =
+                      $field->type == FldNumericDisplay || is_numeric($field);
+
+                  if( $field->data.initial && ! numerical ) {
+                    if( normal_value_e == cbl_figconst_of($field->data.initial) ) {
+                      value_encoding_check(@data_clauses, $field);
+                    }
+                  }
+
                   // verify REDEFINES
                   const auto parent = parent_of($field);
                   if( parent && $field->level == parent->level ) {
@@ -4287,14 +4352,16 @@ picture_clause: PIC signed nps[fore] nines nps[aft]
                   if( field->data.initial != NULL ) {
                     if( 0 < field->data.capacity &&
                             field->data.capacity < uint32_t($size) ) {
-                      auto p = blank_pad_initial( field->data.initial,
-                                                  field->data.capacity, $size );
+                      auto p = blank_pad_initial(field->data.initial,
+                                                 field->data.capacity, $size );
                       if( !p ) YYERROR;
                       field->data.initial = p;
                     }
                   }
 
-                  field->data.capacity = $size;
+                  charmap_t *charmap = 
+                                    __gg__get_charmap(field->codeset.encoding);
+                  field->data.capacity = $size * charmap->stride();
                   field->data.picture = NULL;
 
                   if( false ) dbgmsg("PIC alphanum_pic[size]:%d: %s",
@@ -4708,14 +4775,23 @@ usage_clause1:  usage BIT
 
 value_clause:   VALUE all LITERAL[lit] {
                   cbl_field_t *field = current_field();
-                  if( ! field->codeset.set($lit.encoding) ) {
-                    error_msg(@lit, "VALUE inconsistent with encoding %s",
-                              cbl_alphabet_t::encoding_str(field->codeset.encoding));
+
+                  if( $lit.prefix[0] ) { // not the default encoding
+                    if( ! field->codeset.set($lit.encoding) ) {
+                      error_msg(@lit, "VALUE inconsistent with encoding %s",
+                                cbl_alphabet_t::encoding_str(field->codeset.encoding));
+                    }
+                  } else {
+                    field->codeset.set();
                   }
+
+                  if( field->codeset.encoding != $lit.encoding ) {
+                    error_msg(@lit, "PICTURE inconsistent with VALUE %s'%s'",
+                              $lit.prefix, $lit.data);
+                  }
+
                   field->data.initial  = $lit.data;
                   field->attr |= literal_attr($lit.prefix);
-                  // The __gg__initialize_data routine needs to know that VALUE is a
-                  // quoted literal. This is critical for NumericEdited variables
                   field->attr |= quoted_e;
 
                   if( field->data.capacity == 0 ) {
@@ -4732,7 +4808,6 @@ value_clause:   VALUE all LITERAL[lit] {
                       }
                     }
                   }
-                  value_encoding_check(@lit, field, $lit.encoding);
                 }
         |       VALUE all cce_expr[value] {
                   cbl_field_t *field = current_field();
@@ -4761,11 +4836,9 @@ value_clause:   VALUE all LITERAL[lit] {
         |       VALUE all reserved_value[value]
                 {
                   cbl_field_t *field = current_field();
-                  if( ! field->codeset.valid() ) {
-                    if( ! field->codeset.set(field->codeset.standard_internal.type) ) {
-                      error_msg(@value, "VALUE inconsistent with encoding %s",
-                                cbl_alphabet_t::encoding_str(field->codeset.encoding));
-                    }
+                  if( ! field->codeset.set() ) {
+                    error_msg(@value, "VALUE inconsistent with encoding %s",
+                              cbl_alphabet_t::encoding_str(field->codeset.encoding));
                   }
                   if( $value != NULLS ) {
                     auto fig = constant_of(constant_index($value));
@@ -5017,6 +5090,7 @@ typedef_clause: is TYPEDEF strong
                     error_msg(@2, "%s %s IS TYPEDEF must be level 01",
                            field->level_str(), field->name);
                   }
+                  field->codeset.set();
                   field->attr |= typedef_e;
                   if( $strong ) field->attr |= strongdef_e;
                   if( ! current.typedef_add(field) ) {
@@ -7007,6 +7081,8 @@ context_word:   APPLY                   { static char s[] ="APPLY";
                                          $$ = s; } // LOCK MODE clause
         |       MULTIPLE               { static char s[] ="MULTIPLE";
                                          $$ = s; } // LOCK ON phrase
+        |       NAT                    { static char s[] ="NAT";
+                                         $$ = s; } // CONVERT function
         |       NEAREST_AWAY_FROM_ZERO { static char s[] ="NEAREST-AWAY-FROM-ZERO";
                                          $$ = s; } // INTERMEDIATE ROUNDING clause and ROUNDED phrase
         |       NEAREST_EVEN           { static char s[] ="NEAREST-EVEN";
@@ -8544,7 +8620,7 @@ advance_by:     scalar lines { $$ = $1; } /* BUG: should accept reference */
                    * number of lines is negative.  So, we use the
                    * negative Number Of The Beast as a PAGE flag.
                    */
-                  $$ = new_reference( new_literal("-666") );
+                  $$ = new_reference( new_literal(xstrdup("-666")) );
                 }
         |       device_name { $$ = new_reference(literally_one); }
                 ;
@@ -8601,7 +8677,33 @@ io_invalid:     INVALID key {
 
 delete:         delete_impl  end_delete
         |       delete_cond  end_delete
+        |       delete_file  end_delete
                 ;
+delete_file:    DELETE delete_file_body[stmt] delete_error[err] {
+                  if( ! $err.on_error )  parser_file_delete_on_exception($stmt);
+                  if( ! $err.not_error ) parser_file_delete_not_exception($stmt);
+                  parser_file_delete_end($stmt);
+                  current.declaratives_evaluate();
+                }
+delete_file_body:
+                FILE_KW override filenames retry_phrase {
+                  $$ = label_add(@$, LblXml, uniq_label("xfile"));
+                  xml_statements.push($$);
+                  statement_begin(@$, DELETE);
+                  std::vector<cbl_file_t*>
+                      filenames($filenames->files.begin(),
+                                $filenames->files.end() );
+                  parser_file_delete_file( $$, filenames);
+                }
+                ;
+retry_phrase:   %empty
+        |       RETRY expr TIMES
+        |       FOR expr  SECONDS
+        |       FOREVER {
+                  cbl_unimplemented("DELETE FILE RETRY");
+                }
+                ;
+
 delete_impl:    DELETE delete_body[file]
                 {
                   file_delete_args.call_parser_file_delete(true);
@@ -8634,6 +8736,63 @@ delete_body:    filename[file] record
                   $$ = $file;
                 }
                 ;
+
+delete_error:   %empty %prec DELETE {
+                  $$.on_error = $$.not_error = nullptr;
+                }
+        |       delete_excepts %prec DELETE
+                ;
+delete_excepts:     delete_except[a] statements %prec DELETE
+                {
+                  assert( $a.on_error || $a.not_error );
+                  assert( ! ($a.on_error && $a.not_error) );
+                  $$ = $a;
+                }
+        |       delete_excepts[a] delete_except[b] statements %prec DELETE
+                {
+                  if( $a.on_error && $a.not_error ) {
+                    error_msg(@1, "too many ON ERROR clauses");
+                    YYERROR;
+                  }
+                  // "ON" and "NOT ON" could be reversed, but not duplicated.
+                  if( $a.on_error && $b.on_error ) {
+                    error_msg(@1, "duplicate ON ERROR clauses");
+                    YYERROR;
+                  }
+                  if( $a.not_error && $b.not_error ) {
+                    error_msg(@1, "duplicate NOT ON ERROR clauses");
+                    YYERROR;
+                  }
+                  $$ = $a;
+                  if( $$.on_error ) {
+                    assert($b.not_error);
+                    $$.not_error = $b.not_error;
+                  } else {
+                assert($b.on_error);
+                    $$.on_error = $b.on_error;
+                  }
+                }
+                ;
+delete_except:  EXCEPTION
+                {
+                  auto xml_stmt = xml_statements.top();
+                  // The value of the pointer no longer matters, only NULL or not. 
+                  $$.on_error = $$.not_error = nullptr;
+                  switch($1) {
+                  case EXCEPTION:
+                    $$.on_error = xml_stmt;
+                    parser_file_delete_on_exception(xml_stmt);
+                    break;
+                  case NOT:
+                    $$.not_error = xml_stmt;
+                    parser_file_delete_not_exception(xml_stmt);
+                    break;
+                  default:
+                    gcc_unreachable();
+                  }
+                }
+                ;
+
 end_delete:     %empty %prec DELETE
         |       END_DELETE
                 ;
@@ -10536,7 +10695,9 @@ function_udf:   FUNCTION_UDF '(' arg_list[args] ')' {
                                    cbl_ffi_arg_t actual(param.crv, ar);
                                    return actual;
                                  } );
-                 auto name = new_literal(strlen(L->name), L->name, quoted_e);
+                  // Pretend hex-encoded because that means use verbatim.
+                  auto attr = cbl_field_attr_t(quoted_e | hex_encoded_e);
+                  auto name = new_literal(strlen(L->name), L->name, attr);
                  ast_call( @1, name, $$, args.size(), args.data(), NULL, NULL, true );
                 }
         |       FUNCTION_UDF_0 {
@@ -10547,8 +10708,11 @@ function_udf:   FUNCTION_UDF '(' arg_list[args] ')' {
                  const auto returning = cbl_field_of(symbol_at(L->returning));
                   $$ = new_temporary_clone(returning);
                  $$->data.initial = returning->name; // user's name for the field
-
-                  auto name = new_literal(strlen(L->name), L->name, quoted_e);
+                  cbl_field_attr_t call_attr
+                                 = (cbl_field_attr_t)(quoted_e|hex_encoded_e);
+                  cbl_field_t *name = new_literal(strlen(L->name),
+                                                  L->name,
+                                                  call_attr);
                   ast_call( @1, name, $$, narg, args, NULL, NULL, true );
                 }
                 ;
@@ -11135,6 +11299,18 @@ subst_input:    anycase first_last varg[v1] varg[v2] {
                 }
                 ;
 
+locale_name:    NAME
+                {
+                  auto e = symbol_locale(PROGRAM, $NAME);
+                  if( !e ) {
+                    error_msg(@NAME, "no such SPECIAL-NAMES LOCALE: %qs", $NAME);
+                    YYERROR;
+                  }
+                  $$ = const_cast<char*>(
+                           __gg__encoding_iconv_name(cbl_locale_of(e)->encoding) );
+                }
+                ;
+
 intrinsic_locale:
                 LOCALE_COMPARE '(' varg[r1] varg[r2]  ')'
                 {
@@ -11143,11 +11319,12 @@ intrinsic_locale:
                   cbl_refer_t dummy = {};
                   if( ! intrinsic_call_3($$, LOCALE_COMPARE, $r1, $r2, &dummy) ) YYERROR;
                 }
-        |       LOCALE_COMPARE '(' varg[r1] varg[r2] varg[r3] ')'
+        |       LOCALE_COMPARE '(' varg[r1] varg[r2] locale_name ')'
                 {
                   location_set(@1);
                   $$ = new_alphanumeric();
-                  if( ! intrinsic_call_3($$, LOCALE_COMPARE, $r1, $r2, $r3) ) YYERROR;
+                  cbl_refer_t locale(new_literal($locale_name));
+                  if( ! intrinsic_call_3($$, LOCALE_COMPARE, $r1, $r2, &locale) ) YYERROR;
                 }
 
         |       LOCALE_DATE '(' varg[r1]  ')'
@@ -11453,6 +11630,10 @@ optional:       %empty   { $$ = false; }
         |       OPTIONAL { $$ = true;  }
                 ;
 
+override:      %empty   { $$ = false; }
+       |       OVERRIDE { $$ = true; }
+               ;
+
 program_kw:     %empty
         |       PROGRAM_kw
                 ;
@@ -11900,6 +12081,7 @@ void ast_call( const YYLTYPE& loc, cbl_refer_t name, const cbl_refer_t& returnin
   if( is_literal(name.field) ) {
     cbl_field_t called = { FldLiteralA, quoted_e | constant_e,
                            name.field->data, 77 };
+    called.attr |= name.field->attr;
     snprintf(called.name, sizeof(called.name), "_%s", name.field->data.initial);
     name.field = cbl_field_of(symbol_field_add(PROGRAM, &called));
     symbol_field_location(field_index(name.field), loc);
@@ -13030,13 +13212,13 @@ struct expand_group : public std::list<cbl_refer_t> {
 };
 
 
-static const char * initial_default_value;
-       const char * wsclear() { return initial_default_value; }
+static const uint32_t * initial_default_value;
+       const uint32_t * wsclear() { return initial_default_value; }
 
 void
-wsclear( char ch ) {
-  static char byte = ch;
-  initial_default_value = &byte;
+wsclear( uint32_t i ) {
+  static uint32_t init_val = i;
+  initial_default_value = &init_val;
   current.program_needs_initial();
 }
 
@@ -13558,16 +13740,16 @@ literal_t::set( const cbl_field_t * field ) {
 
 literal_t&
 literal_t::set_prefix( const char *input, size_t len ) {
-  encoding = current_encoding('A');
+  encoding = current_encoding(display_encoding_e);
   assert(len < sizeof(prefix));
   std::fill(prefix, prefix + sizeof(prefix), '\0');
   std::transform(input, input + len, prefix, toupper);
   switch(prefix[0]) {
   case '\0': case 'Z':
-    encoding = current_encoding('A');
+    encoding = current_encoding(display_encoding_e);
     break;
   case 'N':
-    encoding = current_encoding('N');
+    encoding = current_encoding(national_encoding_e);
     if( 'X' == prefix[1] ) {
       cbl_unimplemented("NX literals");
     }
@@ -13583,7 +13765,7 @@ literal_t::set_prefix( const char *input, size_t len ) {
   default:
     gcc_unreachable();
   }
-  assert(encoding <= iconv_YU_e);
+  assert(valid_encoding(encoding));
   return *this;
 }
 
@@ -13608,8 +13790,8 @@ literal_attr( const char prefix[] ) {
     case 'X':
       switch(prefix[0]) {
       case 'B': return cbl_field_attr_t(hex_encoded_e | bool_encoded_e);
-      case 'N': 
-      case 'U': cbl_unimplemented("National"); return none_e;
+      case 'N': cbl_unimplemented("Hexadecimal National"); return none_e;
+      case 'U': cbl_unimplemented("Hexadecimal Unicode");  return none_e;
       }
       break;
     }
index 1fbc8f51774612e5670a0fa3619d480d68537d16..99c9cefae46578c383f9306ab608a0bbef28ff70 100644 (file)
@@ -273,38 +273,11 @@ static inline char * dequote( char input[] ) {
 static const char *
 name_of( cbl_field_t *field ) {
   assert(field);
-  // Because this can be called after .initial has been converted to the
-  // field->codeset.encoding, we have to undo that.  There may be some danger
-  // associated with returning a static.  I don't actually know. -- RJD.
-  static size_t static_length = 0;
-  static char * static_buffer = nullptr;
-  
-  if( field->data.initial == nullptr ) return field->name;
-
-  if( field->name[0] == '_' )
-    {
-    // Make a copy of .initial
-    if( static_length < field->data.capacity+1 )
-      {
-      static_length = field->data.capacity+1;
-      static_buffer = static_cast<char *>(xrealloc(static_buffer,
-                                                   static_length));
-      memcpy(static_buffer, field->data.initial, field->data.capacity);
-      static_buffer[field->data.capacity] = '\0';
-      }
-    // Convert it from ->encoding to DEFAULT_CHARMAP_SOURCE
-    size_t charsout;
-    char *converted =  __gg__iconverter(field->codeset.encoding,
-                                        DEFAULT_CHARMAP_SOURCE,
-                                        field->data.initial,
-                                        field->data.capacity,
-                                        &charsout );
-    memcpy(static_buffer, converted, charsout);
-    static_buffer[charsout] = '\0';
-    }
-
+  if( field->data.initial == nullptr ) {
+    return field->name;
+  }
   return field->name[0] == '_' && field->data.initial?
-    static_buffer : field->name;
+    field->data.original() : field->name;
 }
 
 static const char *
@@ -1337,6 +1310,7 @@ std::map<std::string, std::list<std::string>>
 
 class prog_descr_t {
   std::set<std::string> call_targets, subprograms;
+  std::set<cbl_locale_t> locales;
 public:
   std::set<function_descr_t> function_repository;
   size_t program_index;
@@ -1361,17 +1335,14 @@ public:
     } alpha, national;
     encoding_t() : national(EBCDIC_e) {}
   } alphabet;
-  struct locale_t {
-    cbl_name_t name; const char *os_name;
-    locale_t() : name(""), os_name(nullptr) {}
-    locale_t(const cbl_name_t name, const char *os_name)
-      : name(""), os_name(os_name) {
-      if( name ) {
-        bool ok = namcpy(YYLTYPE(), this->name, name);
-        gcc_assert(ok);
-      }
-    }
-  } locale;
+  
+  bool locale_add( const cbl_locale_t& locale ) {
+    auto e = symbol_locale_add(program_index, &locale);
+    assert(e);
+    auto p = locales.insert(locale);
+    return p.second;
+  }
+      
   cbl_options_t options;
 
   explicit prog_descr_t( size_t isymbol )
@@ -1904,7 +1875,14 @@ static class current_t {
     return program.alphabet.alpha.encoding;
   }
   cbl_encoding_t  national_encoding() const {
-    if( programs.empty() ) return EBCDIC_e;
+    cbl_encoding_t when_empty = EBCDIC_e;
+    char *alternate = getenv("NATIONAL");
+    if( alternate )
+      {
+      when_empty = __gg__encoding_iconv_type(alternate);
+      gcc_assert(when_empty);
+      }
+    if( programs.empty() ) return when_empty;
     const prog_descr_t& program = programs.top();
     return program.alphabet.national.encoding;
   }
@@ -1929,23 +1907,8 @@ static class current_t {
     return programs.top().options.default_round = mode;
   }
 
-  const char *
-  locale() {
-    return programs.empty()? NULL : programs.top().locale.os_name;
-  }
-  const char *
-  locale( const cbl_name_t name ) {
-    if( programs.empty() ) return NULL;
-    const prog_descr_t::locale_t& locale = programs.top().locale;
-    return 0 == strcmp(name, locale.name)? locale.name : NULL;
-  }
-  const prog_descr_t::locale_t&
-  locale( const cbl_name_t name, const char os_name[] ) {
-    if( programs.empty() ) {
-      static prog_descr_t::locale_t empty;
-      return empty;
-    }
-    return programs.top().locale = prog_descr_t::locale_t(name, os_name);
+  bool locale_add( const cbl_locale_t& locale ) {
+    return programs.top().locale_add(locale);
   }
 
   bool new_program ( const YYLTYPE& loc, cbl_label_type_t type,
@@ -2296,11 +2259,13 @@ add_debugging_declarative( const cbl_label_t * label ) {
   }
 }
 
-cbl_options_t current_options() {
+cbl_options_t
+current_options() {
   return current.options_paragraph;
 }
 
-cbl_encoding_t current_encoding( char a_or_n ) {
+cbl_encoding_t
+current_encoding( char a_or_n ) {
   cbl_encoding_t retval;
   switch(a_or_n) {
   case 'A':
@@ -2316,14 +2281,17 @@ cbl_encoding_t current_encoding( char a_or_n ) {
   return retval;
 }
 
-size_t current_program_index() {
+size_t
+current_program_index() {
   return current.program()? current.program_index() : 0;
 }
 
-cbl_label_t * current_section() {
+cbl_label_t *
+current_section() {
   return current.section();
 }
-cbl_label_t * current_paragraph() {
+cbl_label_t *
+current_paragraph() {
   return current.paragraph();
 }
 
@@ -2402,8 +2370,13 @@ literal_refmod_valid( YYLTYPE loc, const cbl_refer_t& r );
 static bool
 is_integer_literal( const cbl_field_t *field ) {
   if( field->type == FldLiteralN ) {
-    const char *initial = field->data.initial;
-
+    size_t nchar;
+    const char *initial = __gg__iconverter(field->codeset.encoding,
+                                           DEFAULT_SOURCE_ENCODING,
+                                           field->data.initial,
+                                           strlen(field->data.initial),
+                                           &nchar);
+    assert(strlen(initial) == nchar);
     switch( *initial ) {
     case '-': case '+': ++initial;
     }
@@ -2982,16 +2955,28 @@ blank_pad_initial( const char initial[], size_t capacity, size_t new_size ) {
   return p;
 }
 
+/*
+ * When cbl_field_t::internalize is called, its data.initial value has been
+ * set, but nothing has been done to it.  It is encoded according to the source
+ * code.  internalize() converts data.initial to the field's encoding. 
+ *
+ * If syntax used was was PIC VALUE, in that order, then PIC set the field's
+ * encoding, and the VALUE clause can verify that its encoding matches.  If the
+ * order was VALUE PIC, the value leaves the encoding uninitialized unless the
+ * value string bore an encoding prefix.  When PIC is processed, codeset_t::set
+ * allows it to set the encoding only if it's either uninitialized, or the PIC
+ * encoding matches the existing one set by VALUE.  In no event does one
+ * override the other; they must agree.
+ *
+ * internalize() fails if data.initial cannot be converted to the field's
+ * encoding.
+ */
 static void
-value_encoding_check( const YYLTYPE& loc, cbl_field_t *field, cbl_encoding_t encoding ) {
+value_encoding_check( const YYLTYPE& loc, cbl_field_t *field ) {
   if( ! field->internalize() ) {
     error_msg(loc, "inconsistent string literal encoding for '%s'",
               field->data.initial);
   }
-  if( encoding != field->codeset.encoding ) {
-    warn_msg(loc, "VALUE encoded as %qs for data item encoded as %qs",
-             __gg__encoding_iconv_name(encoding), field->codeset.name());
-  }
 }
 
 #pragma GCC diagnostic push
@@ -3046,12 +3031,16 @@ file_add( YYLTYPE loc, cbl_file_t *file ) {
 
 
 static cbl_alphabet_t *
-alphabet_add( const YYLTYPE& loc, cbl_encoding_t encoding ) {
-  cbl_alphabet_t alphabet(loc, encoding);
+alphabet_add( const cbl_alphabet_t& alphabet ) {
   symbol_elem_t *e = symbol_alphabet_add(PROGRAM, &alphabet);
   assert(e);
   return cbl_alphabet_of(e);
 }
+static cbl_alphabet_t *
+alphabet_add( const YYLTYPE& loc, cbl_encoding_t encoding ) {
+  cbl_alphabet_t alphabet(loc, encoding);
+  return alphabet_add(alphabet);
+}
 
 // The current field always exists in the symbol table, even if it's incomplete.
 static cbl_field_t *
@@ -3302,8 +3291,9 @@ data_division_ready() {
   static size_t nsymbol = 0;
   if( (nsymbol = symbols_update(nsymbol, nparse_error == 0)) > 0 ) {
     if( ! literally_one ) {
-      literally_one = new_literal("1");
-      literally_zero = new_literal("0");
+      // Use strdup so cbl_field_t::internalize can free them if need be.
+      literally_one = new_literal(xstrdup("1"));
+      literally_zero = new_literal(xstrdup("0"));
     }
   }
 
index 07aa76dbbf268e10c8a182b3ad92a57bb259a2ae..643d099f3899332936becfca7ffb5d8032bfec64 100644 (file)
@@ -1801,8 +1801,8 @@ B-SHIFT-RC
                                  if( elem->type == SymField ) {
                                    auto f = cbl_field_of(elem);
                                    if( f->type == FldLiteralA && f->has_attr(constant_e) ) {
-                                     type = date_time_fmt(f->data.initial);
-                                     yylval.string = xstrdup(f->data.initial);
+                                     type = date_time_fmt(f->data.original());
+                                     yylval.string = xstrdup(f->data.original());
                                    }
                                  } else {
                                    yylval.string = xstrdup(yytext);
index e1a8cb21b6f7647f911178cc933c546cea0665a0..7945e904573dd26c28490e49281c3185d04ca8fb 100644 (file)
@@ -140,7 +140,13 @@ extern bool cursor_at_sol;
                 fprintf(stderr, "%s", (b).field->name); \
                 if( (b).field->type == FldLiteralA || (b).field->type == FldLiteralN ) \
                     { \
-                    fprintf(stderr, " \"%s\"", (b).field->data.initial); \
+                    size_t nbytes; \
+                    const char *literal = __gg__iconverter((b).field->codeset.encoding, \
+                                                           DEFAULT_SOURCE_ENCODING, \
+                                                           (b).field->data.initial, \
+                                                           strlen((b).field->data.initial), \
+                                                           &nbytes); \
+                    fprintf(stderr, " \"%s\"", literal); \
                     } \
                 else \
                     { \
index 2a299ceee3c3e8975924937560131afb03d05927..07dc0e65f14ba1370761d2b99f2fbc8ead8e54fe 100644 (file)
@@ -293,7 +293,7 @@ elementize( const cbl_field_t& field ) {
   // Dubner did the following because he didn't feel like creating yet another
   // cbl_field_t constructor that included the hardcoded encoding for the
   // global special registers.
-  sym.elem.field.codeset.encoding = iconv_CP1252_e;
+  sym.elem.field.codeset.set();
   return sym;
 }
 
@@ -511,6 +511,9 @@ symbol_elem_cmp( const void *K, const void *E )
   case SymSpecial:
     return special_pair_cmp(k->elem.special, e->elem.special)? 0 : 1;
     break;
+  case SymLocale:
+    return strcasecmp(k->elem.locale.name, e->elem.locale.name);
+    break;
   case SymAlphabet:
     return strcasecmp(k->elem.alphabet.name, e->elem.alphabet.name);
     break;
@@ -676,6 +679,22 @@ symbol_special( size_t program, const char name[] )
   return symbol_at(p->second);
 }
 
+struct symbol_elem_t *
+symbol_locale( size_t program, const char name[] )
+{
+  cbl_locale_t locale(name);
+  assert(strlen(name) < sizeof locale.name);
+  strcpy(locale.name, name);
+
+  struct symbol_elem_t key(SymLocale, program), *e;
+  key.elem.locale = locale;
+
+  e = static_cast<struct symbol_elem_t *>(lfind( &key, symbols.elems,
+                                                 &symbols.nelem, sizeof(key),
+                                                 symbol_elem_cmp ) );
+  return e;
+}
+
 struct symbol_elem_t *
 symbol_alphabet( size_t program, const char name[] )
 {
@@ -1510,11 +1529,11 @@ field_str( const cbl_field_t *field ) {
         {
         // Apparently we need to trace back the meaning of data.literal for
         // field::type == FldNumericDisplay
-        enc_from = DEFAULT_CHARMAP_SOURCE;
+        enc_from = DEFAULT_SOURCE_ENCODING;
         }
 
       init = __gg__iconverter(enc_from,
-                              DEFAULT_CHARMAP_SOURCE,
+                              DEFAULT_SOURCE_ENCODING,
                               false_data,
                               field->data.capacity,
                               &charsout);
@@ -1522,12 +1541,12 @@ field_str( const cbl_field_t *field ) {
       auto eoinit = init + strlen(init);
       char *s = xasprintf("'%s'", init);
 
-      // No NUL within the initial data. 
+      // No NUL within the initial data.
       auto ok = std::none_of( init, eoinit,
                               []( char ch ) { return ch == '\0'; } );
       assert(ok);
 
-      // If any of the init are unprintable, provide a hex version. 
+      // If any of the init are unprintable, provide a hex version.
       if( ! std::all_of(init, eoinit, fisprint) ) {
         if( is_elementary(field->type) && field->type != FldPointer ) {
           const size_t len = strlen(s) + 8 + 2 * field->data.capacity;
@@ -1663,7 +1682,7 @@ symbols_alphabet_set( size_t program, const char name[]) {
 ////  // Define alphabets for codegen.
 ////  const cbl_alphabet_t *alphabet = nullptr;
 ////  bool supported = true;
-////  
+////
 ////  std::for_each( symbols_begin(program), symbols_end(),
 ////                 [&alphabet, &supported]( const auto& sym ) {
 ////                   if( sym.type == SymAlphabet ) {
@@ -1679,7 +1698,7 @@ symbols_alphabet_set( size_t program, const char name[]) {
 ////    cbl_unimplemented("alphabet %qs (as %qs)", alphabet->name, encoding);
 ////    return false;
 ////  }
-////  
+////
 ////  // Set collation sequence before parser_symbol_add.`
 ////  if( name ) {
 ////    symbol_elem_t *e = symbol_alphabet(program, name);
@@ -1906,38 +1925,46 @@ symbols_update( size_t first, bool parsed_ok ) {
       }
     }
 
-    if( ! field->codeset.valid() ) {
-      switch(field->type) {
-      case FldForward:
-      case FldInvalid:
-        gcc_unreachable();
-      case FldAlphaEdited:
-      case FldAlphanumeric:
-      case FldClass:
-      case FldDisplay:
-      case FldGroup:
-      case FldLiteralA:
-      case FldNumericDisplay:
-      case FldNumericEdited:
+    if( ! field->codeset.consistent() ) {
+      if( ! field->codeset.valid() ) {
+        switch(field->type) {
+        case FldForward:
+        case FldInvalid:
+          gcc_unreachable();
+        case FldAlphaEdited:
+        case FldAlphanumeric:
+        case FldClass:
+        case FldDisplay:
+        case FldGroup:
+        case FldLiteralA:
+        case FldLiteralN:
+        case FldNumericDisplay:
+        case FldNumericEdited:
+          if( ! (field->has_attr(register_e) || field->has_attr(hex_encoded_e)) ) {
+            error_msg(symbol_field_location(field_index(field)),
+                      "internal: %qs encoding not defined", field->name);
+          }
+          break;
+        case FldConditional:
+        case FldFloat:
+        case FldIndex:
+        case FldNumericBin5:
+        case FldNumericBinary:
+        case FldPacked:
+        case FldPointer:
+        case FldSwitch:
+          break;
+        }
+      } else {
         if( ! (field->has_attr(register_e) || field->has_attr(hex_encoded_e)) ) {
           error_msg(symbol_field_location(field_index(field)),
-                   "internal: %qs encoding not defined", field->name);
+                    "internal: %qs encoding %qs inconsistent",
+                    field->name,
+                    cbl_alphabet_t::encoding_str(field->codeset.encoding) );
         }
-        break;
-      case FldConditional:
-      case FldFloat:
-      case FldIndex:
-      case FldLiteralN:
-      case FldNumericBin5:
-      case FldNumericBinary:
-      case FldPacked:
-      case FldPointer:
-      case FldSwitch:
-        break;
       }
     }
 
-
     assert( ! field->is_typedef() );
 
     if( parsed_ok ) parser_symbol_add(field);
@@ -2541,6 +2568,13 @@ symbol_file_add( size_t program, cbl_file_t *file ) {
   return e;
 }
 
+symbol_elem_t *
+symbol_locale_add( size_t program, const cbl_locale_t *locale ) {
+  symbol_elem_t sym{ SymLocale, program };
+  sym.elem.locale = *locale;
+  return symbol_add(&sym);
+}
+
 symbol_elem_t *
 symbol_alphabet_add( size_t program, const cbl_alphabet_t *alphabet ) {
   symbol_elem_t sym{ SymAlphabet, program };
@@ -3202,19 +3236,56 @@ constant_of( size_t isym )
   return field;
 }
 
+cbl_locale_t::cbl_locale_t( const cbl_name_t name, const char iconv_name[] ) {
+  gcc_assert(strlen(name) < sizeof this->name);
+  strcpy(this->name, name);
+
+  if( iconv_name ) {
+    encoding = __gg__encoding_iconv_type(iconv_name);
+
+    strcpy(collation, "C");
+    // If the iconv_name is prefixed by langauge_COUNTRY (e.g. en_US), capture that.
+    auto pend = iconv_name + strlen(iconv_name);
+    auto p = std::find(iconv_name, pend, '.');
+    if( p < pend ) {
+      auto pend2 = std::copy(iconv_name, p, collation);
+      std::fill(pend2, collation + sizeof(collation), '\0');
+      iconv_name = ++p;
+    }
+    encoding = __gg__encoding_iconv_type(iconv_name);
+  }
+}
+
+cbl_alphabet_t::cbl_alphabet_t(const YYLTYPE& loc, size_t locale, cbl_name_t name )
+  : loc(loc)
+  , locale(locale)
+  , low_index(0)
+  , high_index(255)
+  , last_index(0)
+{
+  if( locale >  0 ) {
+    encoding = cbl_locale_of(symbol_at(locale))->encoding;
+  }
+  memset(collation_sequence, 0xFF, sizeof(collation_sequence));
+  if( name ) { // from Special-Names collation_sequence
+    assert(strlen(name) < sizeof(cbl_name_t));
+    strcpy(this->name, name);
+  }
+}
+
 /*
  * As parsed, the alphabet reflects the encoding of the source code.  If the
  * program uses a different encoding for alphanumeric, convert the alphabet to
- * that. 
- * 
+ * that.
+ *
  * Because a custom alphabet is rare and occurs at most only once per program,
  * we don't attempt to avoid re-encoding.  "Conversion" of ASCII to ASCII is at
- * most 256 calls to iconv(3).  
+ * most 256 calls to iconv(3).
  */
 void
 cbl_alphabet_t::reencode()  {
 
-  const unsigned char * const pend = alphabet + sizeof(alphabet);
+  const unsigned char * const pend = collation_sequence + sizeof(collation_sequence);
   std::vector<char> tgt(256, (char)0xFF);
 
   /*  Keep copies of low_index and last_index for use in run-time as LOW-VALUE
@@ -3230,13 +3301,14 @@ cbl_alphabet_t::reencode()  {
    * a custom alphabet are from NIST, which of course are ASCII.
    */
   const char *fromcode = __gg__encoding_iconv_name(CP1252_e);
-  const char *tocode = __gg__encoding_iconv_name(current_encoding('A'));
+  const char *tocode =
+              __gg__encoding_iconv_name(current_encoding(display_encoding_e));
   iconv_t cd = iconv_open(tocode, fromcode);
-  
+
 #if optimal_reencode
   if( fromcode == tocode ) { // semantically
     tgt.resize(0);
-    return tgt; // Return empty vector; caller copies zero bytes.  
+    return tgt; // Return empty vector; caller copies zero bytes.
   }
 #endif
 
@@ -3247,14 +3319,14 @@ cbl_alphabet_t::reencode()  {
    * that letter in the alphanumeric encoding, and set its collation position
    * in that alphabet.
    */
-  for( const unsigned char *p = alphabet; p < pend; p++ ) {
+  for( const unsigned char *p = collation_sequence; p < pend; p++ ) {
     if( *p == 0xFF ) continue;
-    unsigned char ch = p - alphabet;
+    unsigned char ch = p - collation_sequence;
     unsigned char pos[8] = {};
     size_t  inbytesleft = 1, outbytesleft = sizeof(pos);
     char *inbuf = reinterpret_cast<char*>(&ch),
         *outbuf = reinterpret_cast<char*>(pos);
-    
+
     size_t n = iconv(cd, &inbuf, &inbytesleft, &outbuf, &outbytesleft);
 
     if( n == size_t(-1) ) {
@@ -3273,7 +3345,7 @@ cbl_alphabet_t::reencode()  {
                 fromcode, ch, ch, n, tocode);
       continue;
     }
-    
+
     if( ch == low_index ) {
       low_index = pos[0];
     }
@@ -3283,21 +3355,21 @@ cbl_alphabet_t::reencode()  {
     if( ch == high_index ) {
      high_index = pos[0];
     }
-    
+
     tgt.at(pos[0]) = *p;
   }
-  
-  std::copy(tgt.begin(), tgt.end(), alphabet); 
+
+  std::copy(tgt.begin(), tgt.end(), collation_sequence);
 }
 
 bool
 cbl_alphabet_t::assign( const YYLTYPE& loc, unsigned char ch, unsigned char high_value ) {
-  if( alphabet[ch] == 0xFF || alphabet[ch] == high_value) {
-    alphabet[ch] = high_value;
+  if( collation_sequence[ch] == 0xFF || collation_sequence[ch] == high_value) {
+    collation_sequence[ch] = high_value;
     last_index = ch;
     return true;
   }
-  auto taken = alphabet[ch];
+  auto taken = collation_sequence[ch];
   error_msg(loc, "ALPHABET %s, character %<%c%> (X%'%x%') "
            "in position %d already defined at position %d",
            name,
@@ -3310,7 +3382,7 @@ cbl_alphabet_t::assign( const YYLTYPE& loc, unsigned char ch, unsigned char high
 void
 cbl_alphabet_t::also( const YYLTYPE& loc, size_t ch ) {
   if( ch < 256 ) {
-    alphabet[ch] = alphabet[last_index];
+    collation_sequence[ch] = collation_sequence[last_index];
     if( ch == high_index ) high_index--;
     return;
   } // else it's a figurative constant ...
@@ -3323,20 +3395,20 @@ cbl_alphabet_t::also( const YYLTYPE& loc, size_t ch ) {
 
   // last_index is already set; use it as the "last value before ALSO"
   if( attr & low_value_e ) {
-    alphabet[0] = alphabet[last_index];
+    collation_sequence[0] = collation_sequence[last_index];
     return;
   }
   if( attr & high_value_e ) {
-    alphabet[high_index--] = alphabet[last_index];
+    collation_sequence[high_index--] = collation_sequence[last_index];
     return;
   }
   if( attr & (space_value_e|quote_value_e) ) {
     ch = field->data.initial[0];
-    alphabet[ch] = alphabet[last_index];
+    collation_sequence[ch] = collation_sequence[last_index];
     return;
   }
   if( attr & (zero_value_e) ) {
-    alphabet[0] = alphabet[last_index];
+    collation_sequence[0] = collation_sequence[last_index];
     error_msg(loc, "ALSO value '%s' is unknown", field->name);
     return;
   }
@@ -3448,18 +3520,33 @@ new_literal_add( const char initial[], uint32_t len,
     }
   else
     {
-    static char empty[2] = "\0";
     field = new_temporary_impl(FldLiteralA);
     field->attr |= attr;
-    field->data.initial = len > 0? initial : empty;
+
+    if(len == 0)
+      {
+      // This will cover UTF-32, should that arise.
+      size_t nbytes = 4;
+      char *init = static_cast<char *>(xmalloc(nbytes));
+      memset(init, 0, nbytes);
+      field->data.initial = init;
+      }
+    if(len)
+      {
+      char *init = static_cast<char *>(xmalloc(len+4));
+      memcpy(init, initial, len);
+      memset(init+len, 0, 4);
+      field->data.initial = init;
+      }
     field->data.capacity = len;
     }
 
   if( ! field->has_attr(hex_encoded_e) ) {
-    field->codeset.set(encoding);
-    if( ! field->internalize() ) {
-      ERROR_FIELD(field, "inconsistent string literal encoding for '%s'", initial);
+    // If the literal bore a prefix, set the encoding,
+    if( encoding != cbl_field_t::codeset_t::source_encoding->type ) {
+      field->codeset.set(encoding);
     }
+    field->internalize();
   }
 
   static size_t literal_count = 1;
@@ -3595,6 +3682,14 @@ new_alphanumeric( size_t capacity, const cbl_name_t name = nullptr ) {
 
 extern os_locale_t os_locale;
 
+const encodings_t cbl_field_t::codeset_t::source_encodings[2] = {
+  { false, iconv_UTF_8_e, "UTF-8" },
+  { true,  iconv_CP1252_e, "CP1252" },
+};
+const encodings_t * cbl_field_t::codeset_t::source_encoding = {
+  cbl_field_t::codeset_t::source_encodings
+};
+
 const encodings_t cbl_field_t::codeset_t::standard_internal = {
   true, iconv_CP1252_e, "CP1252"
 };
@@ -3603,7 +3698,7 @@ const encodings_t cbl_field_t::codeset_t::standard_internal = {
 cbl_field_t *
 new_temporary( enum cbl_field_type_t type, const char *initial, bool is_signed ) {
   const bool force_unsigned = type == FldNumericBin5 && ! is_signed;
-  
+
   if( ! initial && ! force_unsigned ) {
     assert( ! is_literal(type) ); // Literal type must have literal value.
     return temporaries.acquire(type, initial);
@@ -3719,29 +3814,26 @@ cbl_field_t::is_ascii() const {
  * never reverts.
  */
 
-static const char *
-guess_encoding() {
-  static const char *fromcode;
-
-  if( ! fromcode ) {
-    return fromcode = os_locale.assumed;
-  }
-
-  if( fromcode == os_locale.assumed ) {
-    fromcode = os_locale.codeset;
-    if( 0 != strcmp(fromcode, "C") ) { // anything but that
-      return fromcode;
-    }
-  }
-
-  return standard_internal.name;
-}
-
 const char *
 cbl_field_t::internalize() {
-  static const char *fromcode = guess_encoding();
+  /*  The purpose of this routine is to return a nul-terminated string which
+      is data.initial converted from the source-code characters to the
+      codeset.encoding characters.
+      
+      The contract between this routine and the routines that call it is that
+      for alphanumeric types, data.initial shall have the same number of
+      characters as will be needed to fill data.capacity.
+
+      Be aware that for PIC X(32) Z"foo", there are the characters "foo",
+      followed by a NUL, and then 28 spaces to fill it out.  It turns out that
+      iconv, given a character count of 32, converts all 32, including the
+      embedded NUL.  So, that case works even through strlen(initial) is
+      smaller than the length of initial, which is the same as capacity.
+  */
+
+  static const char *fromcode = codeset.source_encodings[0].name;
   static const size_t noconv = size_t(-1);
-  static std::map<std::string, iconv_t> tocodes;
+  static std::unordered_map<std::string, iconv_t> tocodes;
 
   if( ! codeset.valid() ) {
     dbgmsg("%s:%d: not converting %s", __func__, __LINE__, data.initial);
@@ -3769,20 +3861,33 @@ cbl_field_t::internalize() {
     assert(0 == strlen(data.initial));
     return data.initial;
   }
-  if( holds_ascii() && is_ascii() ) return data.initial;
+  if( holds_ascii() && is_ascii() ) {
+    if( type != FldNumericEdited ) {
+      if( ! data.initial_within_capacity() ) {
+        ERROR_FIELD(this, "%s %s VALUE %qs of %zu exceeds size %u",
+                    cbl_field_t::level_str(level), name, data.initial,
+                    strlen(data.initial), data.capacity );
+      }
+    }
+    return data.initial;
+  }
   assert(data.capacity > 0);
 
   // The final 2 bytes of the output are "!\0".  It's a debugging sentinel.
   size_t n;
   size_t inbytesleft = data.capacity;
   size_t outbytesleft = inbytesleft;
-  char *in  = const_cast<char*>(data.initial);
-  char *out = static_cast<char*>( xcalloc(1, outbytesleft + 2) ), *output = out;
   if( !is_literal(this) && inbytesleft < strlen(data.initial) ) {
     inbytesleft = strlen(data.initial);
   }
+  if( type == FldNumericEdited ) {
+    outbytesleft = inbytesleft;
+  }
   const unsigned int in_len = inbytesleft;
 
+  char *in  = const_cast<char*>(data.initial);
+  char *out = static_cast<char*>( xcalloc(1, outbytesleft + 2) ), *output = out;
+
   assert(fromcode != tocode);
 
   /*
@@ -3799,8 +3904,9 @@ cbl_field_t::internalize() {
 
   do {
     if( (n = iconv( cd, &in, &inbytesleft, &out, &outbytesleft)) == noconv ) {
-      if( fromcode == os_locale.assumed ) {
-        fromcode = standard_internal.name;
+      if( fromcode == codeset.source_encodings[0].name ) {
+        codeset.source_encoding = &codeset.source_encodings[1];
+        fromcode = codeset.source_encoding->name;
         tocodes.clear();
         cd = tocodes[toname] = iconv_open(tocode, fromcode);
         dbgmsg("%s: trying input encoding %s", __func__, fromcode);
@@ -3813,7 +3919,7 @@ cbl_field_t::internalize() {
 
   if( n == noconv ) {
     size_t i = in_len - inbytesleft;
-    yywarn("failed to encode %s %qs as %s (%zu of %u bytes left)",
+    yyerror("failed to encode %s %qs as %s (%zu of %u bytes left)",
            fromcode, data.initial + i, tocode, inbytesleft, in_len);
     if( false ) return NULL;
     return data.initial;
@@ -3821,7 +3927,7 @@ cbl_field_t::internalize() {
 
   if( 0 < inbytesleft ) {
     // data.capacity + inbytesleft is not correct if the remaining portion has
-    // multibyte characters.  But the fact reamins that the VALUE is too big.
+    // multibyte characters.  But the fact remains that the VALUE is too big.
     ERROR_FIELD(this, "%s %s VALUE '%s' requires %zu bytes for size %u",
             cbl_field_t::level_str(level), name, data.initial,
             data.capacity + inbytesleft, data.capacity );
@@ -3829,7 +3935,7 @@ cbl_field_t::internalize() {
 
   // Replace data.initial only if iconv output differs.
   if( 0 != memcmp(data.initial, output, out - output) ) {
-    assert(out <= output + data.capacity);
+    assert(out <= output + data.capacity || type == FldNumericEdited);
     dbgmsg("%s: converted '%.*s' to %s",
                         __func__, data.capacity, data.initial, tocode);
     struct localspace_t {
@@ -3858,14 +3964,16 @@ cbl_field_t::internalize() {
       data.capacity = out - output; // trailing '!' will be overwritten
     }
     // Pad with trailing blanks, tacking a '!' on the end.
-    for( const char *eout = output + data.capacity; 
+    for( const char *eout = output + data.capacity;
          out < eout;
          out += spc.len ) {
       memcpy(out, spc.space, spc.len);
     }
-    out[0] = '!';
+    // Numeric literal strings may have leading zeros, making their length
+    // longer than their capacity.
+    out[0] = type == FldLiteralN? '\0' : '!';
     assert(out[1] == '\0');
-    free(const_cast<char*>(data.initial));
+    data.orig = data.initial;
     data.initial = output;
   } else {
     free(output);
index 66fb2fd912ff175a9fe1f3728843a1801b09359d..6d29d060a0517544aebf3a26f6ee7f6c11886b83 100644 (file)
@@ -224,6 +224,7 @@ enum symbol_type_t {
   SymAlphabet,
   SymFile,
   SymDataSection,
+  SymLocale, 
 };
 
 // The ISO specification says alphanumeric literals have a maximum length of
@@ -237,7 +238,7 @@ struct cbl_field_data_t {
   uint32_t capacity,            // allocated space
            digits;              // magnitude: total digits (or characters)
   int32_t  rdigits;             // digits to the right
-  const char *initial, *picture;
+  const char *orig, *initial, *picture;
 
   enum etc_type_t { val88_e, upsi_e, value_e } etc_type;
   const char *
@@ -268,6 +269,7 @@ struct cbl_field_data_t {
     , capacity(0)
     , digits(0)
     , rdigits(0)
+    , orig(0)
     , initial(0)
     , picture(0)
     , etc_type(value_e)
@@ -279,6 +281,7 @@ struct cbl_field_data_t {
     , capacity(capacity)
     , digits(0)
     , rdigits(0)
+    , orig(0)
     , initial(0)
     , picture(0)
     , etc_type(value_e)
@@ -293,6 +296,7 @@ struct cbl_field_data_t {
     , capacity(capacity)
     , digits(digits)
     , rdigits(rdigits)
+    , orig(0)
     , initial(initial)
     , picture(picture)
     , etc_type(value_e)
@@ -387,6 +391,12 @@ struct cbl_field_data_t {
     return valify();
   }
 
+  bool initial_within_capacity() const {
+    return initial[capacity] == '\0'
+      ||   initial[capacity] == '!';
+  }
+  const char *original() const { return orig? orig : initial; }
+
  protected:
   cbl_field_data_t& copy_self( const cbl_field_data_t& that ) {
     memsize = that.memsize;
@@ -531,7 +541,7 @@ struct cbl_field_t {
   uint32_t level;
   cbl_occurs_t occurs;
   struct codeset_t {
-    static const encodings_t standard_internal;
+    static const encodings_t standard_internal, source_encodings[2], *source_encoding;
     cbl_encoding_t encoding;
     size_t alphabet;  // unlikely
     explicit codeset_t(cbl_encoding_t encoding = custom_encoding_e,
@@ -544,22 +554,26 @@ struct cbl_field_t {
         ||
         (alphabet != 0 && encoding == custom_encoding_e);
     }
+    bool consistent() const {
+      return valid() && ( encoding == current_encoding('A')
+                          ||
+                          encoding == current_encoding('N')
+                          ||
+                          encoding == UTF8_e );
+    }
     bool set( cbl_encoding_t encoding, size_t alphabet = 0 ) {
-      assert(encoding <= iconv_YU_e);
+      assert(valid_encoding(encoding));
       if( ! valid() ) { // setting first time
         this->encoding = encoding;
         this->alphabet = alphabet;
         return valid();
       }
-      // DUBNER override.  Encoding has to change when
-      //  01 FOO VALUE ZERO.  Just 0 is okay; ZERO is not.
-          this->encoding = encoding;
       return this->encoding == encoding && this->alphabet == alphabet;
     }
     bool set( const char picture_fragment[] = nullptr) {
       if( ! picture_fragment ) {
-        cbl_encoding_t currenc = current_encoding('A');
-        bool retval = set(currenc);
+        cbl_encoding_t enc = current_encoding('A');
+        bool retval = set(enc);
         return retval;
       }
       size_t len = strlen(picture_fragment);
@@ -568,14 +582,15 @@ struct cbl_field_t {
                      frag.begin(), ftoupper);
       switch(frag[0]) { 
       case 'A': case 'X': case '9':
-        return set(current_encoding('A'));
+        return set(current_encoding(display_encoding_e));
       case 'N': case 'U': 
         if( std::all_of(frag.begin(), frag.end(),
                         [first = frag[0]]( char ch ) {
                           return first == ch;
                         } ) ) {
           // All N's indicates National; all U's indicates UTF-8.
-          auto enc = frag[0] == 'N'? current_encoding('N') : UTF8_e;
+          auto enc = frag[0] == 'N' ? current_encoding(national_encoding_e)
+                                    : UTF8_e;
           return set(enc);
         }
         return false; // They all must be the same. 
@@ -739,7 +754,7 @@ struct cbl_field_t {
   uint32_t size() const; // table capacity or capacity
 
   const char * pretty_name() const {
-    if( name[0] == '_' && data.initial ) return data.initial;
+    if( name[0] == '_' && data.original() ) return data.original();
     return name;
   }
   static const char * level_str(uint32_t level );
@@ -1185,6 +1200,13 @@ struct cbl_arith_error_t {
     cbl_label_addresses_t bottom;
 };
 
+struct cbl_delete_file_t {
+    cbl_label_addresses_t over;
+    cbl_label_addresses_t exception;
+    cbl_label_addresses_t no_exception;
+    cbl_label_addresses_t bottom;
+};
+
 struct cbl_compute_error_t {
     // This is an int.  The value is a cbl_compute_error_code_t
     tree compute_error_code;
@@ -1232,7 +1254,10 @@ struct cbl_label_t {
     
     // for parse_xml processing:
     struct cbl_xml_parse_t *xml_parse;
-    
+
+    // For parser_file_delete_file
+    struct cbl_delete_file_t *delete_file;
+
     } structs;
 
   bool is_function() const { return type == LblFunction; }
@@ -1525,6 +1550,19 @@ struct cbl_section_t {
   }
 };
 
+struct cbl_locale_t {
+  cbl_name_t name;
+  cbl_encoding_t encoding;
+  cbl_name_t collation;
+
+  explicit cbl_locale_t(const cbl_name_t name,
+                        const char iconv_name[] = nullptr );
+
+  bool operator<( const cbl_locale_t& that ) const {
+    return strcmp(name, that.name) < 0;
+  }
+};
+
 struct cbl_special_name_t {
   int token;
   enum special_name_t id;
@@ -1536,22 +1574,35 @@ struct cbl_special_name_t {
 char * hex_decode( const char text[] );
 
 /*
- * For a custom alphabet of single-byte encoding, cbl_alphabet_t::alphabet
+ * An alphabet may just name an encoding, which implies binary collation.
+ *
+ * An alphabet may reference a Special-Names LOCALE, which defines an encoding
+ * and a collation (perhaps by default).
+ * 
+ * During Special-Names parsing, an Alphabet may reference an as-yet undefined
+ * LOCALE with an as-yet unknown encoding. As a placeholder it inserts a named,
+ * undefined cbl_locale_t symbol, which the Alphabet references.  If that
+ * locale is never defined, the encoding remains unknown, resulting in an error
+ * diagnostic at the end of Special-Names.
+ *
+ * For a custom alphabet of single-byte encoding, cbl_alphabet_t::collation_sequence
  * holds the collation position of each encoded value.  
- * If 'A' sorts first (after LOW-VALUE), then alphabet['A'] == 1. 
- * If the encoding is ASCII,         then 'A' is  65 and alphabet[ 65] == 1.
- * If the encoding is EBCDIC CP1140, then 'A' is 193 and alphabet[193] == 1.
+ * If 'A' sorts first (after LOW-VALUE), then collation_sequence['A'] == 1. 
+ * If the encoding is ASCII,         then 'A' is  65 and collation_sequence[ 65] == 1.
+ * If the encoding is EBCDIC CP1140, then 'A' is 193 and collation_sequence[193] == 1.
  */
 struct cbl_alphabet_t {
   YYLTYPE loc;
   cbl_name_t name;
   cbl_encoding_t encoding;
-  unsigned char low_index, high_index, last_index, alphabet[256];
+  size_t locale;  // index to cbl_locale_t symbol
+  unsigned char low_index, high_index, last_index, collation_sequence[256];
   unsigned char low_char, high_char;
 
   cbl_alphabet_t()
     : loc { 1,1, 1,1 }
     , encoding(ASCII_e)
+    , locale(0)
     , low_index(0)
     , high_index(255)
     , last_index(0)
@@ -1559,12 +1610,13 @@ struct cbl_alphabet_t {
     , high_char(0)
   {
     memset(name, '\0', sizeof(name));
-    memset(alphabet, 0xFF, sizeof(alphabet));
+    memset(collation_sequence, 0xFF, sizeof(collation_sequence));
   }
 
   cbl_alphabet_t(const YYLTYPE& loc, cbl_encoding_t enc)
     : loc(loc)
     , encoding(enc)
+    , locale(0)
     , low_index(0)
     , high_index(255)
     , last_index(0)
@@ -1572,14 +1624,17 @@ struct cbl_alphabet_t {
     , high_char(0)
   {
     memset(name, '\0', sizeof(name));
-    memset(alphabet, 0xFF, sizeof(alphabet));
+    memset(collation_sequence, 0xFF, sizeof(collation_sequence));
   }
 
+  cbl_alphabet_t(const YYLTYPE& loc, size_t locale, cbl_name_t name );
+
   cbl_alphabet_t( const YYLTYPE& loc, const cbl_name_t name,
                   unsigned char low_index, unsigned char high_index,
-                  unsigned char alphabet[] )
+                  unsigned char collation_sequence[] )
     : loc(loc)
     , encoding(custom_encoding_e)
+    , locale(0)
     , low_index(low_index), high_index(high_index)
     , last_index(high_index)
     , low_char(low_index)
@@ -1587,21 +1642,23 @@ struct cbl_alphabet_t {
   {
     assert(strlen(name) < sizeof(this->name));
     strcpy(this->name, name);
-    std::copy(alphabet, alphabet + sizeof(this->alphabet), this->alphabet);
+    std::copy(collation_sequence,
+              collation_sequence + sizeof(this->collation_sequence),
+              this->collation_sequence);
   }
 
   unsigned char low_value() const {
-    return alphabet[low_index];
+    return collation_sequence[low_index];
   }
   unsigned char high_value() const {
-    return alphabet[high_index];
+    return collation_sequence[high_index];
   }
 
   void
   add_sequence( const YYLTYPE& loc, const unsigned char seq[] ) {
     if( low_index == 0 ) low_index = seq[0];
 
-    unsigned char last = last_index > 0? alphabet[last_index] + 1 : 0;
+    unsigned char last = last_index > 0? collation_sequence[last_index] + 1 : 0;
 
     for( const unsigned char *p = seq; !end_of_string(p); p++  ) {
       assign(loc, *p, last++);
@@ -1612,7 +1669,7 @@ struct cbl_alphabet_t {
   add_interval( const YYLTYPE& loc, unsigned char low, unsigned char high ) {
     if( low_index == 0 ) low_index = low;
 
-    unsigned char last = alphabet[last_index];
+    unsigned char last = collation_sequence[last_index];
 
     for( unsigned char ch = low; ch < high; ch++  ) {
       assign(loc, ch, last++);
@@ -1649,8 +1706,11 @@ struct cbl_alphabet_t {
                "  0   1   2   3   4   5   6   7"
               "   8   9   A   B   C   C   E   F");
       unsigned int row = 0;
-      for( auto p = alphabet; p < alphabet + sizeof(alphabet); p++ ) {
-        if( (p - alphabet) % 16 == 0 ) fprintf(stderr, "\n%4X\t", row++);
+      for( auto p = collation_sequence;
+           p < collation_sequence + sizeof(collation_sequence); p++ ) {
+        if( (p - collation_sequence) % 16 == 0 ) {
+          fprintf(stderr, "\n%4X\t", row++);
+        }
         fprintf(stderr, "%3u ", *p);
       }
       fprintf(stderr, "\n");
@@ -1870,6 +1930,7 @@ struct symbol_elem_t {
     cbl_field_t        field;
     cbl_label_t        label;
     cbl_special_name_t special;
+    cbl_locale_t       locale;
     cbl_alphabet_t     alphabet;
     cbl_file_t         file;
     cbl_section_t      section;
@@ -1927,6 +1988,9 @@ struct symbol_elem_t {
     case SymSpecial:
       elem.special = that.elem.special;
       break;
+    case SymLocale:
+      elem.locale = that.elem.locale;
+      break;
     case SymAlphabet:
       elem.alphabet = that.elem.alphabet;
       break;
@@ -2092,6 +2156,18 @@ cbl_special_name_of( symbol_elem_t *e ) {
   return &e->elem.special;
 }
 
+static inline cbl_locale_t *
+cbl_locale_of( symbol_elem_t *e ) {
+  assert(e && e->type == SymLocale);
+  return &e->elem.locale;
+}
+
+static inline const cbl_locale_t *
+cbl_locale_of( const symbol_elem_t *e ) {
+  assert(e && e->type == SymLocale);
+  return &e->elem.locale;
+}
+
 static inline cbl_alphabet_t *
 cbl_alphabet_of( symbol_elem_t *e ) {
   assert(e && e->type == SymAlphabet);
@@ -2104,6 +2180,7 @@ cbl_alphabet_of( const symbol_elem_t *e ) {
   return &e->elem.alphabet;
 }
 
+
 static inline cbl_file_t *
 cbl_file_of( symbol_elem_t *e ) {
   assert(e && e->type == SymFile);
@@ -2477,6 +2554,7 @@ struct symbol_elem_t * symbol_literalA( size_t program, const char name[] );
 
 struct cbl_special_name_t * symbol_special( special_name_t id );
 struct symbol_elem_t * symbol_special( size_t program, const char name[] );
+struct symbol_elem_t * symbol_locale( size_t program, const char name[] );
 struct symbol_elem_t * symbol_alphabet( size_t program, const char name[] );
 
 struct symbol_elem_t * symbol_file( size_t program, const char name[] );
@@ -2524,6 +2602,7 @@ cbl_label_t *   symbol_label_add( size_t program,
 cbl_label_t *   symbol_program_add( size_t program, cbl_label_t *input );
 symbol_elem_t * symbol_special_add( size_t program,
                                    cbl_special_name_t *special );
+symbol_elem_t * symbol_locale_add( size_t program, const cbl_locale_t *locale );
 symbol_elem_t * symbol_alphabet_add( size_t program,
                                     const cbl_alphabet_t *alphabet );
 symbol_elem_t * symbol_file_add( size_t program,
@@ -2548,8 +2627,8 @@ static inline size_t upsi_register() {
   return symbol_index(symbol_field(0,0,"UPSI-0"));
 }
 
-void wsclear( char ch);
-const char *wsclear();
+void wsclear( uint32_t ch);
+const uint32_t *wsclear();
 
 enum cbl_call_convention_t {
   cbl_call_verbatim_e = 'V',
index 96159876f14c5532a73ef223861b3ad57ba57643..0e6ec8cfb246c9229d99a30bcf68961a64fd9cd8 100644 (file)
@@ -271,6 +271,8 @@ symbol_type_str( enum symbol_type_t type )
         return "SymLabel";
     case SymSpecial:
         return "SymSpecial";
+    case SymLocale:
+        return "SymLocale";
     case SymAlphabet:
         return "SymAlphabet";
     case SymFile:
@@ -1094,28 +1096,18 @@ cbl_field_t::report_invalid_initial_value(const YYLTYPE& loc) const {
   if( has_attr(all_alpha_e) ) {
     bool alpha_value = fig != zero_value_e;
     
-    // In order to check for all alphabetic characters, we have to convert
-    // data.initial back to ASCII:
-
-    size_t outchars;
-    char *initial = __gg__iconverter(codeset.encoding,
-                                     DEFAULT_CHARMAP_SOURCE,
-                                     data.initial,
-                                     data.capacity,
-                                     &outchars);
-
     if( fig == normal_value_e ) {
-      alpha_value = std::all_of( initial,
-                                 initial +
-                                 data.capacity,
-                                 []( char ch ) {
-                                   return ISSPACE(ch) ||
-                                     ISPUNCT(ch) ||
-                                     ISALPHA(ch); } );
+      alpha_value = std::none_of( data.initial,
+                                  data.initial +
+                                  data.capacity,
+                                  []( char ch ) {
+                                    return 
+                                      ISPUNCT(ch) ||
+                                      ISDIGIT(ch); } );
     }
     if( ! alpha_value ) {
       error_msg(loc, "alpha-only %s VALUE '%s' contains non-alphabetic data",
-               name, fig == zero_value_e? cbl_figconst_str(fig) : initial);
+               name, fig == zero_value_e? cbl_figconst_str(fig) : data.initial);
     }
   }
 
@@ -1315,7 +1307,7 @@ valid_move( const struct cbl_field_t *tgt, const struct cbl_field_t *src )
         size_t outcount;
         char *in_ascii = static_cast<char *>(xmalloc(4 * src->data.capacity));
         const char *in_asciip = __gg__iconverter( src->codeset.encoding,
-                                                  DEFAULT_CHARMAP_SOURCE,
+                                                  DEFAULT_SOURCE_ENCODING,
                                                   src->data.initial,
                                                   src->data.capacity,
                                                   &outcount );
@@ -2078,7 +2070,8 @@ cobol_lineno() {
 
 const char *
 cobol_filename() {
-  return input_filenames.empty()? input_filename_vestige : input_filenames.top().name;
+  return input_filenames.empty()?
+    input_filename_vestige : input_filenames.top().name;
 }
 
 void
index 349c669aa7ca3b508d9840ed436320c092bfa614..d4084654319850293cd3e92719d0fb43a7f17eca 100644 (file)
@@ -1376,7 +1376,7 @@ static encodings_t encodings[] = {
   { false, iconv_UTF_7_e, "UTF-7" },
   // Is UTF-8 supported??  "supported" means "recognized by parser_alphabet",
   // but UTF-8 is not a valid runtime encoding.
-  { false, iconv_UTF_8_e, "UTF-8" },  
+  { false, iconv_UTF_8_e, "UTF-8" },
   { false, iconv_UTF_16_e, "UTF-16" },
   { false, iconv_UTF_16BE_e, "UTF-16BE" },
   { false, iconv_UTF_16LE_e, "UTF-16LE" },
@@ -1439,10 +1439,20 @@ cbl_encoding_t
 __gg__encoding_iconv_type( const char *name ) {
   static encodings_t *eoencodings = encodings + COUNT_OF(encodings);
 
+  char *slashless = strdup(name);
+  assert(slashless);
+  char *pslash = strchr(slashless, '/');
+  if( pslash )
+    {
+    *pslash = '\0';
+    }
+
   auto p = std::find_if( encodings, eoencodings,
-                         [name]( const encodings_t& elem ) {
-                           return strcmp(name, elem.name) == 0;
+                         [slashless]( const encodings_t& elem ) {
+                           return strcasecmp(slashless, elem.name) == 0;
                          } );
+  free(slashless);
+
   return p < eoencodings? p->type : no_encoding_e;
 }
 
@@ -1557,7 +1567,7 @@ __gg__get_charmap(cbl_encoding_t encoding)
 
   if( encoding == custom_encoding_e)
     {
-    encoding = DEFAULT_CHARMAP_SOURCE;
+    encoding = DEFAULT_SOURCE_ENCODING;
     }
 
   charmap_t *retval;
index f35d033f910f030f0bd7dd1e2a3d64843d72070c..f48c063e60ba4c36eaa00369e39f48a5026343ac 100644 (file)
@@ -228,13 +228,17 @@ char * __gg__iconverter(cbl_encoding_t from,
                         size_t length,
                         size_t *outlength);
 
-#define DEFAULT_CHARMAP_SOURCE (iconv_CP1252_e)
+#define DEFAULT_SOURCE_ENCODING (iconv_CP1252_e)
 
 class charmap_t
   {
   private:
     // This is the encoding of this character map
     cbl_encoding_t m_encoding;
+    bool m_is_valid;
+    bool m_is_big_endian;
+    bool m_has_bom = false;
+    int  m_stride; // Number of bytes between one character and the next
 
     enum
       {
@@ -246,32 +250,114 @@ class charmap_t
     // need be called but once for each ASCII value.
     std::unordered_map<int, int>m_map_of_encodings;
 
-    void determine_sign_type()
+  public:
+    explicit charmap_t(cbl_encoding_t e) : m_encoding(e)
       {
-      if( mapped_character(ascii_0) & 0x80 )
+      // We are constructing a new charmap_t from an arbitrary encoding.  We
+      // need to figure out how wide it is, its endianness, whether or not
+      // it is EBCDIC-based, and so on.
+
+      // We do that by converting "0" to the target encoding, and we analyze
+      // what we get back.
+      
+      size_t outlength = 0;
+      const char challenge[] = "0";
+      const unsigned char *response = PTRCAST(unsigned char,
+                                   __gg__iconverter(DEFAULT_SOURCE_ENCODING,
+                                                    m_encoding,
+                                                    challenge,
+                                                    1,
+                                                    &outlength));
+      unsigned char char_0 = 0x00;
+
+      m_is_valid = false;
+      m_has_bom  = false;
+      m_is_big_endian = false;
+
+      if( outlength == 1 )
         {
-        m_numeric_sign_type = sign_type_ebcdic;
+        m_stride = 1;
+        // This is our happy place:  A single-byte encoded character set.
+        char_0 = response[0];
         }
-      else
+      else if( outlength == 2 )
+        {
+        m_stride = 2;
+        if( response[0] )
+          {
+          char_0 = response[0];
+          }
+        else if( response[1] )
+          {
+          m_is_big_endian = true;
+          char_0 = response[1];
+          }
+        }
+      else if( outlength == 4 )
         {
+        // Check for the Byte Order Mark (BOM)
+        if( response[0] == 0xFF && response[1] == 0xFE )
+          {
+          m_stride = 2;
+          m_has_bom = true;
+          char_0 = response[2];
+          }
+        else if( response[0] == 0xFE && response[1] == 0xFF )
+          {
+          m_stride = 2;
+          m_has_bom = true;
+          m_is_big_endian = true;
+          char_0 = response[3];
+          }
+        else if( response[0] )
+          {
+          m_stride = 4;
+          char_0 = response[0];
+          }
+        else
+          {
+          m_stride = 4;
+          m_is_big_endian = true;
+          char_0 = response[3];
+          }
+        }
+      else if( outlength == 8 )
+        {
+        m_stride = 4;
+        if( response[0] == 0xFF && response[1] == 0xFE )
+          {
+          char_0 = response[4];
+          }
+        else if( response[0] == 0xFE && response[1] == 0xFF )
+          {
+          m_is_big_endian = true;
+          char_0 = response[7];
+          }
+        }
+
+      // With everything else established, we now check the zero character.
+      // We know about only 0x30 for ASCII and 0xF0 for EBCDIC.
+      if( char_0 == 0x30 )
+        {
+        m_is_valid = true;
         m_numeric_sign_type = sign_type_ascii;
         }
+      else if( char_0 == 0xF0 )
+        {
+        m_is_valid = true;
+        m_numeric_sign_type = sign_type_ebcdic;
+        }
       }
 
-  public:
-    explicit charmap_t(cbl_encoding_t e) : m_encoding(e)
-      {
-      determine_sign_type();
-      }
-    explicit charmap_t(uint16_t e) : m_encoding(static_cast<cbl_encoding_t>(e))
-      {
-      determine_sign_type();
-      }
+    bool is_valid()      const{return m_is_valid     ;}
+    bool is_big_endian() const{return m_is_big_endian;}
+    bool has_bom()       const{return m_has_bom      ;}
+    int  stride()        const{return m_stride       ;}
 
     int mapped_character(int ch)
       {
       // The assumption is that anybody calling this routine is providing
-      // a single-byte character in the DEFAULT_CHARMAP_SOURCE encoding.  We
+      // a single-byte character in the DEFAULT_SOURCE_ENCODING encoding.  We
       // return the equivalent character in the m_encoding
       int retval;
       std::unordered_map<int, int>::const_iterator it =
@@ -284,7 +370,7 @@ class charmap_t
         {
         retval = 0;
         size_t outlength = 0;
-        const char *mapped = __gg__iconverter(DEFAULT_CHARMAP_SOURCE,
+        const char *mapped = __gg__iconverter(DEFAULT_SOURCE_ENCODING,
                                               m_encoding,
                                               PTRCAST(char, &ch),
                                               1,
index 51cc6c39287398709135641db8b9e9c2dd3dd6bc..37bcde305fc3332577fd076f07ed6bcaf7a32d3b 100644 (file)
@@ -1195,6 +1195,11 @@ enum cbl_encoding_t {
   iconv_YU_e,
 };
 
+static inline bool
+valid_encoding( cbl_encoding_t enc ) {
+  return enc <= iconv_YU_e;
+}
+
 #define ASCII_e  iconv_ASCII_e   
 #define CP1252_e iconv_CP1252_e
 #define EBCDIC_e iconv_CP1140_e
index 7a1c9ac07023e451df4c4ee94a395a53ec6c605d..14ef069ccb7339ef7e52de825f902e7a793d2e90 100644 (file)
@@ -81,6 +81,7 @@ enum cblc_file_prior_op_t
   file_op_rewrite,
   file_op_delete,
   file_op_close,
+  file_op_remove,
   };
 
 /* end implementation details */
index 16d75b0f186411b6795925f19861d9b74751277d..7c01f39a8c066fb018a1c46b4baba69a9a9becbe 100644 (file)
@@ -191,9 +191,10 @@ handle_errno(cblc_file_t *file, const char *function, const char *msg)
 
 static
 char *
-get_filename( const cblc_file_t *file,
-              int is_quoted)
+get_filename( const cblc_file_t *file)
   {
+  bool is_quoted = !!(file->flags & file_name_quoted_e);
+
   static size_t fname_size = MINIMUM_ALLOCATION_SIZE;
   static char *fname = static_cast<char *>(malloc(MINIMUM_ALLOCATION_SIZE));
   massert(fname);
@@ -1151,6 +1152,80 @@ __io__file_delete(cblc_file_t *file, bool is_random)
     }
   }
 
+static void
+__io__file_remove(cblc_file_t *file, char *filename, int is_quoted)
+  {
+  // filename is the result of a strdup or malloc.  Because both FILE OPEN
+  // and FILE DELETE can establish or change a name, we free it here and
+  // replace it.  The same is true in FILE DELETE Format 2
+  free(file->filename);
+  file->filename = filename;
+  file->flags &= ~file_name_quoted_e;
+  file->flags |= is_quoted ? file_name_quoted_e : file_flag_none_e;
+  int erc;
+
+  // This code copied from reopen
+  const char *trimmed_name = get_filename(file);
+  if( !trimmed_name[0] )
+    {
+    bool all_spaces = true;
+    for(size_t i=0; i<strlen(file->filename); i++)
+      {
+      if( file->filename[i] != ascii_space )
+        {
+        all_spaces = false;
+        }
+      break;
+      }
+    if( all_spaces )
+      {
+      warnx("Warning: %s specified with a filename that is all spaces",
+            file->name);
+      file->io_status = FsNameError;    // "31"
+      goto done;
+      }
+
+    warnx(  "%s(): There is no environment variable named \"%s\"\n",
+            __func__,
+            file->filename);
+    file->io_status = FsNoFile;    // "35"
+    goto done;
+    }
+  // trimmed_name is now the file system name of the file to be removed.
+
+  // If the file is open, we flag that with "41"
+  if( file->file_pointer )
+    {
+    file->io_status = FsIsOpen;    // "41"
+    goto done;
+    }
+
+  // There's been a lot of buildup.  We can now try to remove the file:
+  errno = 0;
+  erc = remove(trimmed_name);
+  if( erc == 0 )
+    {
+    // All is copacetic.  There was a file, and now it's gone.
+    file->io_status = FsSuccess;    // "00"
+    }
+  else if( errno == ENOENT )
+    {
+    // The file didn't exist.  
+    file->io_status = FsUnavail;    // "05"
+    }
+  else
+    {
+    // We have some other kind of error.  Lack of credentials, or whatever.
+    file->io_status = FsErrno;    // 
+    goto done;
+    }
+
+  file->prior_op = file_op_remove;
+  done:
+  file->errnum = errno;
+  establish_status(file, -1);
+  }
+
 static void
 indexed_file_start( cblc_file_t *file,
                     int relop,
@@ -4115,7 +4190,7 @@ __gg__file_reopen(cblc_file_t *file, int mode_char)
   // Stash the mode_char for later analysis during READ and WRITE operations
   file->mode_char = mode_char;
   char *trimmed_name;
-  trimmed_name = get_filename(file, !!(file->flags & file_name_quoted_e));
+  trimmed_name = get_filename(file);
   if( !trimmed_name[0] )
     {
     bool all_spaces = true;
@@ -4353,8 +4428,10 @@ __io__file_open(cblc_file_t *file,
     }
   else
     {
-    // filename is the result of a strdup or malloc.  We will free() it at
-    // file close time.
+    // filename is the result of a strdup or malloc.  Because both FILE OPEN
+    // and FILE DELETE can establish or change a name, we free it here and
+    // replace it.  The same is true in FILE DELETE Format 2
+    free(file->filename);
     file->filename = filename;
     file->flags &= ~file_name_quoted_e;
     file->flags |= is_quoted ? file_name_quoted_e : file_flag_none_e;
@@ -4492,6 +4569,9 @@ public:
                             size_t length, bool is_random );
   typedef void (delete_t)( cblc_file_t *file,
                           bool is_random );
+  typedef void (remove_t)( cblc_file_t *file,
+                          char *filename,
+                          int is_quoted);
 
   open_t      *Open;
   close_t     *Close;
@@ -4500,6 +4580,7 @@ public:
   write_t     *Write;
   rewrite_t   *Rewrite;
   delete_t    *Delete;
+  remove_t    *Remove;
 
   gcobol_io_t()
     : Open(NULL)
@@ -4509,15 +4590,17 @@ public:
     , Write(NULL)
     , Rewrite(NULL)
     , Delete(NULL)
+    , Remove(NULL)
   {}
 
-  gcobol_io_t(  open_t      *Open,
+  gcobol_io_t(   open_t      *Open,
                  close_t     *Close,
                  start_t     *Start,
                  read_t      *Read,
                  write_t     *Write,
                  rewrite_t   *Rewrite,
-                 delete_t    *Delete )
+                 delete_t    *Delete,
+                 remove_t    *Remove)
     : Open(Open)
     , Close(Close)
     , Start(Start)
@@ -4525,6 +4608,7 @@ public:
     , Write(Write)
     , Rewrite(Rewrite)
     , Delete(Delete)
+    , Remove(Remove)
   {}
 
 #if FILE_IO_IMPLEMENTED
@@ -4552,7 +4636,8 @@ gcobol_fileops() {
                           __io__file_read,
                           __io__file_write,
                           __io__file_rewrite,
-                          __io__file_delete );
+                          __io__file_delete,
+                          __io__file_remove);
 }
 
 /*
@@ -4657,9 +4742,19 @@ extern "C"
 void
 __gg__file_delete(cblc_file_t *file, bool is_random)
   {
+    // DELETE FILE Format 1 - deletes a record.
     gcobol_io_t *functions = gcobol_io_funcs();
     functions->Delete(file, is_random);
   }
+extern "C"
+
+void
+__gg__file_remove(cblc_file_t *file, char *name, int is_quoted)
+  {
+    // DELETE FILE Format 2 - removes a file.
+    gcobol_io_t *functions = gcobol_io_funcs();
+    functions->Remove(file, name, is_quoted);
+  }
 
 /* end interface functions */
 
index 49dee6e3aef4040358f11b48a528fc6b58217b4f..bb03f629d0a95aea3292ee10e38550c8e84c67c2 100644 (file)
@@ -55,7 +55,6 @@
 #include "libgcobol.h"
 #include "charmaps.h"
 
-
 #pragma GCC diagnostic ignored "-Wformat-truncation"
 
 #define JD_OF_1601_01_02 2305812.5
@@ -576,7 +575,7 @@ get_all_time( const cblc_field_t *dest, // needed for the target encoding
           ctm.day_of_year,
           ctm.ZZZZ);
   __gg__convert_encoding(PTRCAST(char, stime),
-                         DEFAULT_CHARMAP_SOURCE,
+                         DEFAULT_SOURCE_ENCODING,
                          dest->encoding);
   }
 
@@ -810,7 +809,6 @@ ftime_replace(char *dest,
   const char *src;
   bool saw_decimal_point = false;
   bool saw_plus_sign = false;
-  char decimal_point = __gg__get_decimal_point();
   static const int OFFSET_TO_YYYY           =  0;
   static const int OFFSET_TO_MM             =  4;
   static const int OFFSET_TO_DD             =  6;
@@ -826,18 +824,20 @@ ftime_replace(char *dest,
   static const int OFFSET_TO_DOY            = 34;
   static const int OFFSET_TO_ZZZZ           = 37;
 
-  int source_Y    = charmap_source->mapped_character(ascii_Y   );
-  int source_W    = charmap_source->mapped_character(ascii_W   );
-  int source_s    = charmap_source->mapped_character(ascii_s   );
-  int source_m    = charmap_source->mapped_character(ascii_m   );
-  int source_h    = charmap_source->mapped_character(ascii_h   );
-  int source_plus = charmap_source->mapped_character(ascii_plus);
-  int source_D    = charmap_source->mapped_character(ascii_D   );
-  int source_M    = charmap_source->mapped_character(ascii_M   );
+  unsigned int decimal_point =
+                   charmap_source->mapped_character(__gg__get_decimal_point());
+  unsigned int source_Y    = charmap_source->mapped_character(ascii_Y   );
+  unsigned int source_W    = charmap_source->mapped_character(ascii_W   );
+  unsigned int source_s    = charmap_source->mapped_character(ascii_s   );
+  unsigned int source_m    = charmap_source->mapped_character(ascii_m   );
+  unsigned int source_h    = charmap_source->mapped_character(ascii_h   );
+  unsigned int source_plus = charmap_source->mapped_character(ascii_plus);
+  unsigned int source_D    = charmap_source->mapped_character(ascii_D   );
+  unsigned int source_M    = charmap_source->mapped_character(ascii_M   );
 
   while( source < source_end && dest < dest_end )
     {
-    char fchar = *source;
+    unsigned char fchar = *source;
     if( fchar == source_Y )
       {
       // This can only be a YYYY
@@ -847,7 +847,7 @@ ftime_replace(char *dest,
       const char *p = source;
       while(p < source_end)
         {
-        if( *p++ == source_W )
+        if( (unsigned char)*p++ == source_W )
           {
           src = ftime + OFFSET_TO_ZZZZ;
           }
@@ -864,12 +864,12 @@ ftime_replace(char *dest,
     else if( fchar == source_D )
       {
       // It can be a D, DD or DDD
-      if( source[2] == source_D )
+      if( (unsigned char)source[2] == source_D )
         {
         ncount = 3;
         src = ftime + OFFSET_TO_DOY;
         }
-      else if( source[1] == source_D )
+      else if( (unsigned char)source[1] == source_D )
         {
         ncount = 2;
         src = ftime + OFFSET_TO_DD;
@@ -946,7 +946,7 @@ ftime_replace(char *dest,
       {
       // This indicates special processing for a variable number of 's'
       // characters
-      while(*source == 's' && dest < dest_end)
+      while((unsigned char)*source == source_s && dest < dest_end)
         {
         source += 1;
         *dest++ = *src++;
@@ -1279,7 +1279,7 @@ __gg__current_date(cblc_field_t *dest)
   char retval[DATE_STRING_BUFFER_SIZE];
   timespec_to_string(retval, tp);
   __gg__convert_encoding(PTRCAST(char, retval),
-                         DEFAULT_CHARMAP_SOURCE,
+                         DEFAULT_SOURCE_ENCODING,
                          dest->encoding);
   string_to_dest(dest, retval);
   }
@@ -2022,7 +2022,6 @@ __gg__max(cblc_field_t *dest,
         }
       }
 
-
     __gg__adjust_dest_size(dest, best_length);
     dest->type = FldAlphanumeric;
     assert(best_location);
@@ -2088,12 +2087,12 @@ __gg__lower_case( cblc_field_t *dest,
   __gg__convert_encoding_length(PTRCAST(char, dest->data),
                                 length,
                                 from,
-                                DEFAULT_CHARMAP_SOURCE);
+                                DEFAULT_SOURCE_ENCODING);
   std::transform(dest->data, dest->data + dest_length, dest->data,
                  [](unsigned char c) { return std::tolower(c); });
   __gg__convert_encoding_length(PTRCAST(char, dest->data),
                                 length,
-                                DEFAULT_CHARMAP_SOURCE,
+                                DEFAULT_SOURCE_ENCODING,
                                 to);
   }
 
@@ -2391,25 +2390,23 @@ numval( cblc_field_t *dest,
         size_t input_offset,
         size_t input_size)
   {
-  // Returns the one-based character position of a bad character
-  // returns zero if it is okay
+  // Returns the one-based character position of a bad character.
+  // Returns zero if it is okay.
+
+  // This routine works in ASCII space:
 
-  const char *p    = PTRCAST(char, (input->data + input_offset));
-  const char *pend =     p + input_size;
+  size_t nbytes;
+  const char *p = __gg__iconverter(input->encoding,
+                                   DEFAULT_SOURCE_ENCODING,
+                                   PTRCAST(char, input->data + input_offset),
+                                   input_size,
+                                   &nbytes);
+  const char *pend = p + input_size;
 
   int errpos = 0;
   __int128 retval = 0;
   int retval_rdigits = 0;
 
-  charmap_t *charmap = __gg__get_charmap(input->encoding);
-  unsigned char decimal_point
-                   = charmap->mapped_character(__gg__get_decimal_point());
-  int mapped_0 = charmap->mapped_character(ascii_0);
-  int mapped_9 = charmap->mapped_character(ascii_9);
-  int mapped_space = charmap->mapped_character(ascii_space);
-  int mapped_plus = charmap->mapped_character(ascii_plus);
-  int mapped_minus = charmap->mapped_character(ascii_minus);
-
   bool saw_digit= false;
   bool in_fraction  = false;
   bool leading_sign = false;
@@ -2437,31 +2434,31 @@ numval( cblc_field_t *dest,
       case SPACE1:
         // We tolerate spaces, and expect to end with a sign, digit,
         // or decimal point:
-        if( ch == mapped_space )
+        if( ch == ascii_space )
           {
           continue;
           }
-        if( ch == mapped_plus )
+        if( ch == ascii_plus )
           {
           leading_sign = true;
           state = SPACE2;
           break;
           }
-        if( ch == mapped_minus )
+        if( ch == ascii_minus )
           {
           leading_sign = true;
           is_negative  = true;
           state = SPACE2;
           break;
           }
-        if( ch >= mapped_0 && ch <= mapped_9 )
+        if( ch >= ascii_0 && ch <= ascii_9 )
           {
           saw_digit = true;
           retval = ch & 0xF;
           state = DIGITS;
           break;
           }
-        if( ch == decimal_point )
+        if( ch == __gg__decimal_point )
           {
           in_fraction = true;
           state = DIGITS;
@@ -2473,18 +2470,18 @@ numval( cblc_field_t *dest,
 
       case SPACE2:
         // We tolerate spaces, and expect to end with a digit or decimal point:
-        if( ch == mapped_space )
+        if( ch == ascii_space )
           {
           break;
           }
-        if( ch >= mapped_0 && ch <= mapped_9 )
+        if( ch >= ascii_0 && ch <= ascii_9 )
           {
           saw_digit = true;
           retval = ch & 0xF;
           state = DIGITS;
           break;
           }
-        if( ch == decimal_point )
+        if( ch == __gg__decimal_point )
           {
           in_fraction = true;
           state = DIGITS;
@@ -2499,7 +2496,7 @@ numval( cblc_field_t *dest,
         // end with a space, a sign, "DB" or "CR", or the the end of the string
         // It's a bit complicated
 
-        if( ch >= mapped_0 && ch <= mapped_9 )
+        if( ch >= ascii_0 && ch <= ascii_9 )
           {
           saw_digit = true;
           retval *= 10;
@@ -2510,43 +2507,43 @@ numval( cblc_field_t *dest,
             }
           break;
           }
-        if( ch == decimal_point && in_fraction )
+        if( ch == __gg__decimal_point && in_fraction )
           {
           // Only one decimal is allowed
           goto done;
           }
-        if( ch == decimal_point )
+        if( ch == __gg__decimal_point )
           {
           in_fraction = true;
           break;
           }
-        if( ch == mapped_space )
+        if( ch == ascii_space )
           {
           state = SPACE3;
           break;
           }
-        if( ch == mapped_plus && leading_sign)
+        if( ch == ascii_plus && leading_sign)
           {
           // We are allowed leading or trailing signs, but not both
           goto done;
           }
-        if( ch == mapped_minus && leading_sign)
+        if( ch == ascii_minus && leading_sign)
           {
           // We are allowed leading or trailing signs, but not both
           goto done;
           }
-        if( ch == mapped_plus )
+        if( ch == ascii_plus )
           {
           state = SPACE4;
           break;
           }
-        if( ch == mapped_minus )
+        if( ch == ascii_minus )
           {
           is_negative = true;
           state = SPACE4;
           break;
           }
-        if( std::tolower(ch) == 'd' )
+        if( std::tolower(ch) == ascii_d )
           {
           if( leading_sign )
             {
@@ -2554,7 +2551,7 @@ numval( cblc_field_t *dest,
             }
           ch = *p++;
           errpos += 1;
-          if( p > pend || std::tolower(ch) != 'b' )
+          if( p > pend || std::tolower(ch) != ascii_b )
             {
             goto done;
             }
@@ -2562,7 +2559,7 @@ numval( cblc_field_t *dest,
           state = SPACE4;
           break;
           }
-        if( std::tolower(ch) == 'c' )
+        if( std::tolower(ch) == ascii_c )
           {
           if( leading_sign )
             {
@@ -2570,7 +2567,7 @@ numval( cblc_field_t *dest,
             }
           ch = *p++;
           errpos += 1;
-          if( p > pend || std::tolower(ch) != 'r' )
+          if( p > pend || std::tolower(ch) != ascii_r )
             {
             goto done;
             }
@@ -2584,26 +2581,26 @@ numval( cblc_field_t *dest,
 
       case SPACE3:
         // We tolerate spaces, or we end with a sign:
-        if( ch == mapped_space )
+        if( ch == ascii_space )
           {
           break;
           }
-        if( ch == mapped_plus && leading_sign)
+        if( ch == ascii_plus && leading_sign)
           {
           // We are allowed leading or trailing signs, but not both
           goto done;
           }
-        if( ch == mapped_minus && leading_sign)
+        if( ch == ascii_minus && leading_sign)
           {
           // We are allowed leading or trailing signs, but not both
           goto done;
           }
-        if( ch == mapped_plus )
+        if( ch == ascii_plus )
           {
           state = SPACE4;
           break;
           }
-        if( ch == mapped_minus )
+        if( ch == ascii_minus )
           {
           is_negative = true;
           state = SPACE4;
@@ -2617,7 +2614,7 @@ numval( cblc_field_t *dest,
             }
           ch = *p++;
           errpos += 1;
-          if( p > pend || std::tolower(ch) != 'b' )
+          if( p > pend || std::tolower(ch) != ascii_b )
             {
             goto done;
             }
@@ -2633,7 +2630,7 @@ numval( cblc_field_t *dest,
             }
           ch = *p++;
           errpos += 1;
-          if( p > pend || std::tolower(ch) != 'r' )
+          if( p > pend || std::tolower(ch) != ascii_r )
             {
             goto done;
             }
@@ -2644,7 +2641,7 @@ numval( cblc_field_t *dest,
         goto done;
         break;
       case SPACE4:
-        if( ch == mapped_space )
+        if( ch == ascii_space )
           {
           break;
           }
@@ -2658,7 +2655,7 @@ numval( cblc_field_t *dest,
     }
   else if( p == pend )
     {
-    // If we got to the end without seeing adigit, we need to bump the
+    // If we got to the end without seeing a digit, we need to bump the
     // error pointer:
     errpos += 1;
     }
@@ -2696,7 +2693,15 @@ numval_c( cblc_field_t *dest,
   {
   size_t errcode = 0;
 
-  char *pstart = PTRCAST(char, (src->data+src_offset));
+//  char *pstart = PTRCAST(char, (src->data+src_offset));
+  size_t nbytes;
+  const char *converted = __gg__iconverter(src->encoding,
+                                  DEFAULT_SOURCE_ENCODING,
+                                  PTRCAST(char, src->data+src_offset),
+                                  src_size,
+                                  &nbytes);
+  char *pstart = strdup(converted);
+  massert(pstart);
   char *pend   = pstart + src_size;
   char *p      = pstart;
 
@@ -2704,45 +2709,41 @@ numval_c( cblc_field_t *dest,
   int sign = 0;
   int rdigits = 0;
   int rdigit_bump = 0;
-  charmap_t *charmap = __gg__get_charmap(src->encoding);
-  unsigned char decimal_point
-                   = charmap->mapped_character(__gg__get_decimal_point());
-  unsigned char decimal_separator
-                   = charmap->mapped_character(__gg__get_decimal_separator());
-  int mapped_0 = charmap->mapped_character(ascii_0);
-  int mapped_9 = charmap->mapped_character(ascii_9);
-  int mapped_space = charmap->mapped_character(ascii_space);
-  int mapped_plus = charmap->mapped_character(ascii_plus);
-  int mapped_minus = charmap->mapped_character(ascii_minus);
-  int mapped_C = charmap->mapped_character(ascii_C);
-  int mapped_R = charmap->mapped_character(ascii_R);
-  int mapped_D = charmap->mapped_character(ascii_D);
-  int mapped_B = charmap->mapped_character(ascii_B);
-  int mapped_c = charmap->mapped_character(ascii_c);
-  int mapped_r = charmap->mapped_character(ascii_r);
-  int mapped_d = charmap->mapped_character(ascii_d);
-  int mapped_b = charmap->mapped_character(ascii_b);
+  unsigned char decimal_point = __gg__decimal_point;
+  unsigned char decimal_separator = __gg__decimal_separator;
+
+  char *currency_in_ascii;
 
   char *currency_start;
   char *currency_end;
   if( crcy )
     {
-    currency_start = PTRCAST(char, (crcy->data+crcy_offset));
-    currency_end   = currency_start + crcy_size;
+    converted = __gg__iconverter(crcy->encoding,
+                                 DEFAULT_SOURCE_ENCODING,
+                                 PTRCAST(char, crcy->data+crcy_offset),
+                                 crcy_size,
+                                 &nbytes);
+    currency_in_ascii = static_cast<char*>(malloc(nbytes+1));
+    massert(currency_in_ascii);
+    strcpy(currency_in_ascii, converted);
     }
   else
     {
-    currency_start = __gg__get_default_currency_string();
-    currency_end   = currency_start + strlen(currency_start);
+    // This is in ascii
+    currency_in_ascii = strdup(__gg__get_default_currency_string());
+    massert(currency_in_ascii);
     }
+  currency_start = currency_in_ascii;
+  currency_end   = currency_start + strlen(currency_start);
+
   char *pcurrency = currency_start;
   // Trim off spaces from the currency:
-  while( *pcurrency == mapped_space && pcurrency < currency_end )
+  while( *pcurrency == ascii_space && pcurrency < currency_end )
     {
     pcurrency += 1;
     }
 
-  while( *(currency_end-1) == mapped_space && currency_end > currency_start )
+  while( *(currency_end-1) == ascii_space && currency_end > currency_start )
     {
     currency_end -= 1;
     }
@@ -2769,12 +2770,12 @@ numval_c( cblc_field_t *dest,
       {
       case first_space   :
         // Eat up spaces, if any, and then dispatch on the first non-space:
-        if( ch != mapped_space )
+        if( ch != ascii_space )
           {
           // ch can now be a plus, a minus, a digit, or the first character
           // of the currency string
-          if(   ch == mapped_plus
-             || ch == mapped_minus )
+          if(   ch == ascii_plus
+             || ch == ascii_minus )
             {
             state = first_sign;
             // Decrement to pointer in order to pick up the character again
@@ -2785,7 +2786,7 @@ numval_c( cblc_field_t *dest,
             state = currency;
             p -= 1;
             }
-          else if(  (ch >= mapped_0 && ch <= mapped_9)
+          else if(  (ch >= ascii_0 && ch <= ascii_9)
                     || ch == decimal_point )
             {
             state = digits;
@@ -2805,7 +2806,7 @@ numval_c( cblc_field_t *dest,
 
       case first_sign    :
         // We know the character is a plus or a minus:
-        if( ch == mapped_plus )
+        if( ch == ascii_plus )
           {
           sign = 1;
           state = second_space;
@@ -2820,14 +2821,14 @@ numval_c( cblc_field_t *dest,
       case second_space :
         // Eat up spaces, if any.  This segment has to end with a currency or
         // a digit:
-        if( ch != mapped_space )
+        if( ch != ascii_space )
           {
           if( ch == *pcurrency )
             {
             state = currency;
             p -= 1;
             }
-          else if(  (ch >= mapped_0 && ch <= mapped_9)
+          else if(  (ch >= ascii_0 && ch <= ascii_9)
                     || ch == decimal_point )
             {
             state = digits;
@@ -2868,9 +2869,9 @@ numval_c( cblc_field_t *dest,
 
       case before_digits :
         // Eat up spaces, if any.  This segment has to end with a digit
-        if( ch != mapped_space )
+        if( ch != ascii_space )
           {
-          if(  (ch >= mapped_0 && ch <= mapped_9)
+          if(  (ch >= ascii_0 && ch <= ascii_9)
                || ch == decimal_point )
             {
             state = digits;
@@ -2890,7 +2891,7 @@ numval_c( cblc_field_t *dest,
       case digits     :
         // The only thing allowed here are digits, decimal points, and
         // decimal separators
-        if( ch >= mapped_0 && ch <= mapped_9 )
+        if( ch >= ascii_0 && ch <= ascii_9 )
           {
           // We have a digit.
           rdigits += rdigit_bump;
@@ -2923,14 +2924,14 @@ numval_c( cblc_field_t *dest,
 
       case after_digits  :
         // after digits, the only valid things are spaces, plus, minus, D, or C
-        if( ch != charmap->mapped_character(ascii_space) )
+        if( ch != ascii_space )
           {
-          if(       ch == mapped_plus
-                 || ch == mapped_minus
-                 || ch == mapped_D
-                 || ch == mapped_d
-                 || ch == mapped_C
-                 || ch == mapped_c )
+          if(       ch == ascii_plus
+                 || ch == ascii_minus
+                 || ch == ascii_D
+                 || ch == ascii_d
+                 || ch == ascii_C
+                 || ch == ascii_c )
             {
             state = second_sign;
             p -= 1;
@@ -2945,24 +2946,24 @@ numval_c( cblc_field_t *dest,
           errcode = p - pstart;
           p = pend;
           }
-        if( ch == mapped_plus )
+        if( ch == ascii_plus )
           {
           sign = 1;
           }
-        else if( ch == mapped_minus )
+        else if( ch == ascii_minus )
           {
           sign = -1;
           }
-        else if(    (ch == mapped_D || ch == mapped_d)
+        else if(    (ch == ascii_D || ch == ascii_d)
                     && p < pend
-                    && (*p == mapped_B || *p == mapped_b) )
+                    && (*p == ascii_B || *p == ascii_b) )
           {
           sign = -1;
           p += 1;
           }
-        else if(    (ch == mapped_C || ch == mapped_c)
+        else if(    (ch == ascii_C || ch == ascii_c)
                     && p < pend
-                    && (*p == mapped_R || *p == mapped_r) )
+                    && (*p == ascii_R || *p == ascii_r) )
           {
           sign = -1;
           p += 1;
@@ -2972,7 +2973,7 @@ numval_c( cblc_field_t *dest,
 
       case final_space   :
         // There should be only spaces until the end
-        if( ch == mapped_space )
+        if( ch == ascii_space )
           {
           continue;
           }
@@ -3003,6 +3004,8 @@ numval_c( cblc_field_t *dest,
                             truncation_e,
                             NULL);
     }
+  free(currency_in_ascii);
+  free(pstart);
   return (int)errcode;
   }
 
@@ -3910,12 +3913,12 @@ __gg__upper_case( cblc_field_t *dest,
   __gg__convert_encoding_length(PTRCAST(char, dest->data),
                                 length,
                                 from,
-                                DEFAULT_CHARMAP_SOURCE);
+                                DEFAULT_SOURCE_ENCODING);
   std::transform(dest->data, dest->data + dest_length, dest->data,
                  [](unsigned char c) { return std::toupper(c); });
   __gg__convert_encoding_length(PTRCAST(char, dest->data),
                                 length,
-                                DEFAULT_CHARMAP_SOURCE,
+                                DEFAULT_SOURCE_ENCODING,
                                 to);
   }
 
@@ -3946,7 +3949,7 @@ __gg__when_compiled(cblc_field_t *dest, size_t tv_sec, long tv_nsec)
   char retval[DATE_STRING_BUFFER_SIZE];
   timespec_to_string(retval, tp);
   __gg__convert_encoding(PTRCAST(char, retval),
-                         DEFAULT_CHARMAP_SOURCE,
+                         DEFAULT_SOURCE_ENCODING,
                          dest->encoding);
   string_to_dest(dest, retval);
   }
@@ -3992,8 +3995,8 @@ gets_int( int ndigits,
   // position (starting at 1) where the problem is.
   int retval = 0;
 
-  int checked_0 = charmap->mapped_character(ascii_0);
-  int checked_9 = charmap->mapped_character(ascii_9);
+  unsigned int checked_0 = charmap->mapped_character(ascii_0);
+  unsigned int checked_9 = charmap->mapped_character(ascii_9);
 
   memset(digits, 0xFF, ndigits * sizeof(int));
   for(int i=1; i<=ndigits; i++)
@@ -4004,7 +4007,7 @@ gets_int( int ndigits,
       retval = -i;
       break;
       }
-    int ch = *p++;
+    unsigned int ch = (unsigned char)*p++;
     if( ch < checked_0 || ch > checked_9 )
       {
       // This isn't a digit zero through nine
@@ -4502,19 +4505,19 @@ gets_nanoseconds( const char *f,
   // positive return value.  A negative return value contains the number of
   // digits we processed,
 
-  int format_s = charmap_format->mapped_character(ascii_s);
-  int source_0 = charmap_source->mapped_character(ascii_0);
-  int source_9 = charmap_source->mapped_character(ascii_9);
+  unsigned int format_s = charmap_format->mapped_character(ascii_s);
+  unsigned int source_0 = charmap_source->mapped_character(ascii_0);
+  unsigned int source_9 = charmap_source->mapped_character(ascii_9);
 
   int errpos = 0;
   int ncount = 0;
   int nanoseconds = 0;
 
   const char *pinit = p;
-  while( f < f_end && *f == format_s && p < pend )
+  while( f < f_end && (unsigned char)*f == format_s && p < pend )
     {
     f += 1;
-    int ch = *p++;
+    unsigned int ch = (unsigned char)*p++;
     errpos += 1;
 
     if( ch < source_0 || ch > source_9 )
@@ -4560,6 +4563,10 @@ fill_cobol_tm(cobol_tm &ctm,
   charmap_t *charmap_format = __gg__get_charmap(par1->encoding);
   charmap_t *charmap_checked = __gg__get_charmap(par2->encoding);
   int checked_space = charmap_checked->mapped_character(ascii_space);
+  int source_plus  = charmap_checked->mapped_character(ascii_plus);
+  int source_minus = charmap_checked->mapped_character(ascii_minus);
+  int source_zero  = charmap_checked->mapped_character(ascii_zero);
+
   int format_space  = charmap_format->mapped_character(ascii_space);
   int format_T      = charmap_format->mapped_character(ascii_T      );
   int format_colon  = charmap_format->mapped_character(ascii_colon  );
@@ -4576,6 +4583,8 @@ fill_cobol_tm(cobol_tm &ctm,
   int format_M      = charmap_format->mapped_character(ascii_M      );
   int format_D      = charmap_format->mapped_character(ascii_D      );
   int format_zero   = charmap_format->mapped_character(ascii_zero   );
+  char decimal_point
+                = charmap_format->mapped_character(__gg__get_decimal_point());
 
   // Let's eliminate trailing spaces...
   trim_trailing_spaces(format, format_end, format_space);
@@ -4584,8 +4593,6 @@ fill_cobol_tm(cobol_tm &ctm,
   bool in_offset = false;
   bool in_nanoseconds = false;
 
-  char decimal_point = __gg__get_decimal_point();
-
   // We keep constant track of the current error location.
   int retval = 1;
   int errpos;
@@ -4596,7 +4603,7 @@ fill_cobol_tm(cobol_tm &ctm,
   int bump;
   while( format < format_end && source < source_end )
     {
-    char ch = *format;
+    unsigned char ch = *format;
 
     if(    ch == format_T
            || ch == format_colon
@@ -4605,7 +4612,7 @@ fill_cobol_tm(cobol_tm &ctm,
       {
       // These are just formatting characters.  They need to be duplicated,
       // but are otherwise ignored.
-      if( *source != ch )
+      if( (unsigned char)*source != ch )
         {
         break;
         }
@@ -4616,31 +4623,31 @@ fill_cobol_tm(cobol_tm &ctm,
     if( ch == format_plus )
       {
       // This flags a following hhmm offset.  It needs to match a '+' or '-'
-      if(    *source != format_plus
-          && *source != format_minus
-          && *source != format_zero)
+      if(    (unsigned char)*source != source_plus
+          && (unsigned char)*source != source_minus
+          && (unsigned char)*source != source_zero)
         {
         break;
         }
-      if( *source == format_zero )
+      if( (unsigned char)*source == format_zero )
         {
         // The next four characters have to be zeroes
-        if( source[1] != format_zero )
+        if( (unsigned char)source[1] != format_zero )
           {
           retval += 1;
           break;
           }
-        if( source[2] != format_zero )
+        if( (unsigned char)source[2] != format_zero )
           {
           retval += 2;
           break;
           }
-        if( source[3] != format_zero )
+        if( (unsigned char)source[3] != format_zero )
           {
           retval += 3;
           break;
           }
-        if( source[4] != format_zero )
+        if( (unsigned char)source[4] != format_zero )
           {
           retval += 4;
           break;
@@ -4691,7 +4698,7 @@ fill_cobol_tm(cobol_tm &ctm,
     if( ch == format_D )
       {
       // We have three possibilities: DDD, DD, and D
-      if( format[1] != format_D )
+      if( (unsigned char)format[1] != format_D )
         {
         // A singleton 'D' is a day-of-week
         errpos = gets_day_of_week(source, source_end, charmap_checked, ctm);
@@ -4702,7 +4709,7 @@ fill_cobol_tm(cobol_tm &ctm,
           }
         bump = 1;
         }
-      else if( format[2] != format_D )
+      else if( (unsigned char)format[2] != format_D )
         {
         // This is DD, for day-of-month
         errpos = gets_day(source, source_end, charmap_checked, ctm);
@@ -4798,7 +4805,8 @@ fill_cobol_tm(cobol_tm &ctm,
     if( ch == format_Z || ch == format_z )
       {
       // This has to be the end of the road
-      if( std::toupper((unsigned char)source[0]) != 'Z' )
+      if(    (unsigned char)source[0] != format_Z
+          && (unsigned char)source[0] != format_z )
         {
         retval += 0;
         break;
@@ -5040,19 +5048,9 @@ __gg__lowest_algebraic( cblc_field_t *dest,
 
 static int
 floating_format_tester( char const * const f,
-                        char const * const f_end,
-                        cbl_encoding_t encoding)
+                        char const * const f_end)
   {
-  charmap_t *charmap = __gg__get_charmap(encoding);
-  int mapped_space = charmap->mapped_character(ascii_space);
-  int mapped_plus  = charmap->mapped_character(ascii_plus);
-  int mapped_minus = charmap->mapped_character(ascii_minus);
-  int mapped_0 = charmap->mapped_character(ascii_0);
-  int mapped_9 = charmap->mapped_character(ascii_9);
-  int mapped_E = charmap->mapped_character(ascii_E);
-  int mapped_e = charmap->mapped_character(ascii_e);
-  int decimal_point = charmap->mapped_character(__gg__get_decimal_point());
-
+  // This routine operates in ASCII space
   int retval = -1;
 
   enum
@@ -5074,23 +5072,23 @@ floating_format_tester( char const * const f,
     switch(state)
       {
       case SPACE1:
-        if( ch == mapped_space )
+        if( ch == ascii_space )
           {
           // Just keep looking
           break;
           }
-        if(    ch == mapped_minus
-            || ch == mapped_plus)
+        if(    ch == ascii_minus
+            || ch == ascii_plus)
           {
           state = SPACE2;
           break;
           }
-        if( ch >= mapped_0 && ch <= mapped_9 )
+        if( ch >= ascii_0 && ch <= ascii_9 )
           {
           state = DIGITS1;
           break;
           }
-        if( decimal_point )
+        if( __gg__decimal_point )
           {
           state = DIGITS2;
           break;
@@ -5100,16 +5098,16 @@ floating_format_tester( char const * const f,
         break;
 
       case SPACE2:
-        if( ch == mapped_space )
+        if( ch == ascii_space )
           {
           break;
           }
-        if( ch >= mapped_0 && ch <= mapped_9 )
+        if( ch >= ascii_0 && ch <= ascii_9 )
           {
           state = DIGITS1;
           break;
           }
-        if( ch == decimal_point )
+        if( ch == __gg__decimal_point )
           {
           state = DIGITS2;
           break;
@@ -5118,16 +5116,16 @@ floating_format_tester( char const * const f,
         break;
 
       case DIGITS1:
-        if( ch >= mapped_0 && ch <= mapped_9 )
+        if( ch >= ascii_0 && ch <= ascii_9 )
           {
           break;
           }
-        if( ch == decimal_point )
+        if( ch == __gg__decimal_point )
           {
           state = DIGITS2;
           break;
           }
-        if( ch == mapped_space )
+        if( ch == ascii_space )
           {
           state = SPACE3;
           break;
@@ -5136,16 +5134,16 @@ floating_format_tester( char const * const f,
         break;
 
       case DIGITS2:
-        if( ch >= mapped_0 && ch <= mapped_9 )
+        if( ch >= ascii_0 && ch <= ascii_9 )
           {
           break;
           }
-        if( ch == mapped_space )
+        if( ch == ascii_space )
           {
           state = SPACE3;
           break;
           }
-        if( ch == mapped_E || ch == mapped_e )
+        if( ch == ascii_E || ch == ascii_e )
           {
           state = SPACE4;
           break;
@@ -5154,16 +5152,16 @@ floating_format_tester( char const * const f,
         break;
 
       case SPACE3:
-        if( ch == mapped_space )
+        if( ch == ascii_space )
           {
           break;
           }
-        if( ch >= mapped_0 && ch <= mapped_9 )
+        if( ch >= ascii_0 && ch <= ascii_9 )
           {
           retval = index;
           break;
           }
-        if( ch == mapped_E || ch == mapped_e )
+        if( ch == ascii_E || ch == ascii_e )
           {
           state = SPACE4;
           break;
@@ -5172,16 +5170,16 @@ floating_format_tester( char const * const f,
         break;
 
       case SPACE4:
-        if( ch == mapped_space )
+        if( ch == ascii_space )
           {
           break;
           }
-        if( ch == mapped_minus || ch == mapped_plus )
+        if( ch == ascii_minus || ch == ascii_plus )
           {
           state = SPACE5;
           break;
           }
-        if( ch >= mapped_0 && ch <= mapped_9 )
+        if( ch >= ascii_0 && ch <= ascii_9 )
           {
           state = DIGITS3;
           break;
@@ -5190,11 +5188,11 @@ floating_format_tester( char const * const f,
         break;
 
       case SPACE5:
-        if( ch == mapped_space )
+        if( ch == ascii_space )
           {
           break;
           }
-        if( ch >= mapped_0 && ch <= mapped_9 )
+        if( ch >= ascii_0 && ch <= ascii_9 )
           {
           state = DIGITS3;
           break;
@@ -5203,11 +5201,11 @@ floating_format_tester( char const * const f,
         break;
 
       case DIGITS3:
-        if( ch >= mapped_0 && ch <= mapped_9 )
+        if( ch >= ascii_0 && ch <= ascii_9 )
           {
           break;
           }
-        if( ch == mapped_space )
+        if( ch == ascii_space )
           {
           state = SPACE6;
           break;
@@ -5216,7 +5214,7 @@ floating_format_tester( char const * const f,
         break;
 
       case SPACE6:
-      if( ch == mapped_space )
+      if( ch == ascii_space )
         {
         break;
         }
@@ -5242,16 +5240,19 @@ __gg__numval_f( cblc_field_t *dest,
                 size_t source_offset,
                 size_t source_size)
   {
+  // It's just easiest for this routine to operate in ASCII space:
+  size_t nbytes;
+  char *converted = __gg__iconverter(source->encoding,
+                                  DEFAULT_SOURCE_ENCODING,
+                                  PTRCAST(char, source->data + source_offset),
+                                  source_size,
+                                  &nbytes);
   GCOB_FP128 value = 0;
-  const char *data     = PTRCAST(char, (source->data + source_offset));
+  const char *data     = converted;
   const char *data_end = data + source_size;
-  charmap_t *charmap = __gg__get_charmap(source->encoding);
-  int mapped_space = charmap->mapped_character(ascii_space);
 
   int error = floating_format_tester( data,
-                                      data_end,
-                                      source->encoding);
-
+                                      data_end);
   if( error || source_size >= 256 )
     {
     exception_raise(ec_argument_function_e);
@@ -5264,12 +5265,13 @@ __gg__numval_f( cblc_field_t *dest,
     while( data < data_end )
       {
       char ch = *data++;
-      if( ch != mapped_space )
+      if( ch != ascii_space )
         {
         *p++ = ch;
         }
       }
     *p++ = '\0';
+    // This next call is why we needed to be in ASCII space.
     value = strtofp128(ach, NULL);
     }
   __gg__float128_to_field(dest,
@@ -5285,13 +5287,18 @@ __gg__test_numval_f(cblc_field_t *dest,
                     size_t source_offset,
                     size_t source_size)
   {
-  const char *data     = PTRCAST(char, (source->data + source_offset));
+  // It's just easiest for this routine to operate in ASCII space:
+  size_t nbytes;
+  char *converted = __gg__iconverter(source->encoding,
+                                  DEFAULT_SOURCE_ENCODING,
+                                  PTRCAST(char, source->data + source_offset),
+                                  source_size,
+                                  &nbytes);
+
+  const char *data     = converted;
   const char *data_end = data + source_size;
-
   int error = floating_format_tester( data,
-                                      data_end,
-                                      source->encoding);
-
+                                      data_end);
   __gg__int128_to_field(dest,
                         error,
                         NO_RDIGITS,
@@ -5314,14 +5321,52 @@ ismatch(const char *a1, const char *a2, const char *b1, const char *b2)
   }
 
 static bool
-iscasematch(const char *a1, const char *a2, const char *b1, const char *b2)
+iscasematch(const char *a1, const char *a2, 
+            const char *b1, const char *b2,
+            bool is_ebcdic)
   {
+  static const unsigned int ebcdic_lower[256] =
+    {
+    0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, 0x08, 0x09, 0x0a, 0x0b, 0x0c, 0x0d, 0x0e, 0x0f,
+    0x10, 0x11, 0x12, 0x13, 0x14, 0x15, 0x16, 0x17, 0x18, 0x19, 0x1a, 0x1b, 0x1c, 0x1d, 0x1e, 0x1f,
+    0x20, 0x21, 0x22, 0x23, 0x24, 0x25, 0x26, 0x27, 0x28, 0x29, 0x2a, 0x2b, 0x2c, 0x2d, 0x2e, 0x2f,
+    0x30, 0x31, 0x32, 0x33, 0x34, 0x35, 0x36, 0x37, 0x38, 0x39, 0x3a, 0x3b, 0x3c, 0x3d, 0x3e, 0x3f,
+    0x40, 0x41, 0x42, 0x43, 0x44, 0x45, 0x46, 0x47, 0x48, 0x49, 0x4a, 0x4b, 0x4c, 0x4d, 0x4e, 0x4f,
+    0x50, 0x51, 0x52, 0x53, 0x54, 0x55, 0x56, 0x57, 0x58, 0x59, 0x5a, 0x5b, 0x5c, 0x5d, 0x5e, 0x5f,
+    0x60, 0x61, 0x62, 0x63, 0x64, 0x65, 0x66, 0x67, 0x68, 0x69, 0x6a, 0x6b, 0x6c, 0x6d, 0x6e, 0x6f,
+    0x70, 0x71, 0x72, 0x73, 0x74, 0x75, 0x76, 0x77, 0x78, 0x79, 0x7a, 0x7b, 0x7c, 0x7d, 0x7e, 0x7f,
+    0x80, 0x81, 0x82, 0x83, 0x84, 0x85, 0x86, 0x87, 0x88, 0x89, 0x8a, 0x8b, 0x8c, 0x8d, 0x8e, 0x8f,
+    0x90, 0x91, 0x92, 0x93, 0x94, 0x95, 0x96, 0x97, 0x98, 0x99, 0x9a, 0x9b, 0x9c, 0x9d, 0x9e, 0x9f,
+    0xa0, 0xa1, 0xa2, 0xa3, 0xa4, 0xa5, 0xa6, 0xa7, 0xa8, 0xa9, 0xaa, 0xab, 0xac, 0xad, 0xae, 0xaf,
+    0xb0, 0xb1, 0xb2, 0xb3, 0xb4, 0xb5, 0xb6, 0xb7, 0xb8, 0xb9, 0xba, 0xbb, 0xbc, 0xbd, 0xbe, 0xbf,
+    0xc0, 0x81, 0x82, 0x83, 0x84, 0x85, 0x86, 0x87, 0x88, 0x89, 0xca, 0xcb, 0xcc, 0xcd, 0xce, 0xcf,
+    0xd0, 0x91, 0x92, 0x93, 0x94, 0x95, 0x96, 0x97, 0x98, 0x99, 0xda, 0xdb, 0xdc, 0xdd, 0xde, 0xdf,
+    0xe0, 0xe1, 0xa2, 0xa3, 0xa4, 0xa5, 0xa6, 0xa7, 0xa8, 0xa9, 0xea, 0xeb, 0xec, 0xed, 0xee, 0xef,
+    0xf0, 0xf1, 0xf2, 0xf3, 0xf4, 0xf5, 0xf6, 0xf7, 0xf8, 0xf9, 0xfa, 0xfb, 0xfc, 0xfd, 0xfe, 0xff,
+    };
+
   bool retval = true;
-  while( a1 < a2 && b1 < b2 )
+
+  if( !is_ebcdic )
     {
-    if( std::tolower((unsigned char)*a1++) != std::tolower((unsigned char)*b1++) )
+    while( a1 < a2 && b1 < b2 )
       {
-      retval = false;
+      if( std::tolower((unsigned char)*a1++)
+              != std::tolower((unsigned char)*b1++) )
+        {
+        retval = false;
+        }
+      }
+    }
+  else
+    {
+    while( a1 < a2 && b1 < b2 )
+      {
+      if( ebcdic_lower[(unsigned int)(unsigned char)*a1++]
+                          != ebcdic_lower[(unsigned int)(unsigned char)*b1++] )
+        {
+        retval = false;
+        }
       }
     }
   return retval;
@@ -5353,13 +5398,14 @@ const char *
 strcasestr( const char *haystack,
             const char *haystack_e,
             const char *needle,
-            const char *needle_e)
+            const char *needle_e,
+            bool is_ebcdic)
   {
   const char *retval = NULL;
   const char *pend = haystack_e - (needle_e - needle);
   while( haystack <= pend )
     {
-    if(iscasematch(haystack, haystack_e, needle, needle_e))
+    if(iscasematch(haystack, haystack_e, needle, needle_e, is_ebcdic))
       {
       retval = haystack;
       break;
@@ -5394,13 +5440,14 @@ const char *
 strcaselaststr( const char *haystack,
                 const char *haystack_e,
                 const char *needle,
-                const char *needle_e)
+                const char *needle_e,
+                bool is_ebcdic)
   {
   const char *retval = NULL;
   const char *pend = haystack_e - (needle_e - needle);
   while( haystack <= pend )
     {
-    if(iscasematch(haystack, haystack_e, needle, needle_e))
+    if(iscasematch(haystack, haystack_e, needle, needle_e, is_ebcdic))
       {
       retval = haystack;
       }
@@ -5409,7 +5456,6 @@ strcaselaststr( const char *haystack,
   return retval;
   }
 
-
 extern "C"
 void
 __gg__substitute( cblc_field_t *dest,
@@ -5442,6 +5488,9 @@ __gg__substitute( cblc_field_t *dest,
   const char **pflasts = static_cast<const char **>(malloc(N * sizeof(char *)));
   massert(pflasts);
 
+  const charmap_t *charmap = __gg__get_charmap(arg1_f->encoding);
+  bool is_ebcdic = charmap->is_like_ebcdic();
+
   if( arg1_s == 0 )
     {
     exception_raise(ec_argument_function_e);
@@ -5462,14 +5511,16 @@ __gg__substitute( cblc_field_t *dest,
         pflasts[i] = strcasestr(haystack,
                                 haystack_e,
                                 PTRCAST(char, (arg2_f[i]->data+arg2_o[i])),
-                                PTRCAST(char, (arg2_f[i]->data+arg2_o[i])) + arg2_s[i]);
+                                PTRCAST(char, (arg2_f[i]->data+arg2_o[i])) + arg2_s[i],
+                                is_ebcdic);
         }
       else if( control[i] & substitute_last_e)
         {
         pflasts[i] = strcaselaststr(haystack,
                                 haystack_e,
                                 PTRCAST(char, (arg2_f[i]->data+arg2_o[i])),
-                                PTRCAST(char, (arg2_f[i]->data+arg2_o[i])) + arg2_s[i]);
+                                PTRCAST(char, (arg2_f[i]->data+arg2_o[i])) + arg2_s[i],
+                                is_ebcdic);
         }
       else
         {
@@ -5532,7 +5583,8 @@ __gg__substitute( cblc_field_t *dest,
                                                                  haystack,
                                                                  haystack_e,
                                                                  needle,
-                                                                 needle_e);
+                                                                 needle_e,
+                                                                 is_ebcdic);
         if( !matched )
           {
           matched = !(control[i] & substitute_anycase_e) && ismatch(haystack,
@@ -5622,11 +5674,11 @@ __gg__locale_compare( cblc_field_t *dest,
       }
     }
 
-  __gg__adjust_dest_size(dest, 1);
-  dest->data[0] = *achretval;
-  __gg__convert_encoding(PTRCAST(char, dest->data),
-                         DEFAULT_CHARMAP_SOURCE,
+  __gg__convert_encoding(achretval,
+                         DEFAULT_SOURCE_ENCODING,
                          dest->encoding);
+  memcpy(dest->data, achretval, strlen(achretval));
+  __gg__adjust_dest_size(dest, strlen(achretval));
   }
 
 extern "C"
@@ -5659,12 +5711,11 @@ __gg__locale_date(cblc_field_t *dest,
     strcpy(ach, nl_langinfo(D_FMT));
     strftime(ach, sizeof(ach), nl_langinfo(D_FMT), &tm);
     }
-
-  __gg__adjust_dest_size(dest, strlen(ach));
-  __gg__convert_encoding(PTRCAST(char, dest->data),
-                         DEFAULT_CHARMAP_SOURCE,
+  __gg__convert_encoding(ach,
+                         DEFAULT_SOURCE_ENCODING,
                          dest->encoding);
   memcpy(dest->data, ach, strlen(ach));
+  __gg__adjust_dest_size(dest, strlen(ach));
   }
 
 extern "C"
@@ -5698,11 +5749,11 @@ __gg__locale_time(cblc_field_t *dest,
     strftime(ach, sizeof(ach), nl_langinfo(T_FMT), &tm);
     }
 
-  __gg__adjust_dest_size(dest, strlen(ach));
-  __gg__convert_encoding(PTRCAST(char, dest->data),
-                         DEFAULT_CHARMAP_SOURCE,
+  __gg__convert_encoding(ach,
+                         DEFAULT_SOURCE_ENCODING,
                          dest->encoding);
   memcpy(dest->data, ach, strlen(ach));
+  __gg__adjust_dest_size(dest, strlen(ach));
   }
 
 extern "C"
@@ -5738,9 +5789,9 @@ __gg__locale_time_from_seconds( cblc_field_t *dest,
     strftime(ach, sizeof(ach), nl_langinfo(T_FMT), &tm);
     }
 
-  __gg__adjust_dest_size(dest, strlen(ach));
-  __gg__convert_encoding(PTRCAST(char, dest->data),
-                         DEFAULT_CHARMAP_SOURCE,
+  __gg__convert_encoding(ach,
+                         DEFAULT_SOURCE_ENCODING,
                          dest->encoding);
   memcpy(dest->data, ach, strlen(ach));
+  __gg__adjust_dest_size(dest, strlen(ach));
   }
index 89153bbcca2f26783feae976c3ff76e2e87fd4f1..f587fbfa06f9ba67a5f28b17b6093b7324295e0a 100644 (file)
@@ -289,6 +289,7 @@ class ec_status_t {
       case file_op_write: return "write";
       case file_op_rewrite: return "rewrite";
       case file_op_delete: return "delete";
+      case file_op_remove: return "remove";
       }
       return "???";
     }
@@ -1627,7 +1628,7 @@ int128_to_field(cblc_field_t   *var,
                                 var->picture);
               size_t outlength;
               const char *converted = __gg__iconverter(
-                                     DEFAULT_CHARMAP_SOURCE,
+                                     DEFAULT_SOURCE_ENCODING,
                                      var->encoding,
                                      PTRCAST(char, location),
                                      var->capacity,
@@ -2755,7 +2756,7 @@ __gg__dirty_to_float( const char *dirty,
   int delta_r = 0;
 
   // We now loop over the remaining input characters:
-  char ch = '\0';
+  unsigned char ch = '\0';
 
   charmap_t *charmap = __gg__get_charmap(field->encoding);
 
@@ -3055,7 +3056,7 @@ format_for_display_internal(char **dest,
         // This buffer is larger than can validly be needed
         unsigned char converted[128];
         size_t outlength;
-        retval = DEFAULT_CHARMAP_SOURCE;
+        retval = DEFAULT_SOURCE_ENCODING;
         const char *mapped = __gg__iconverter(
                                   var->encoding,
                                   retval,
@@ -3285,7 +3286,7 @@ format_for_display_internal(char **dest,
         }
 
       char ach[128];
-      retval = DEFAULT_CHARMAP_SOURCE;
+      retval = DEFAULT_SOURCE_ENCODING;
       charmap_t *charmap = __gg__get_charmap(retval);
 
       __gg__binary_to_string_ascii(ach, digits, value);
@@ -3724,7 +3725,13 @@ get_float128( const cblc_field_t *field,
     {
     if( __gg__decimal_point == '.' )
       {
-      retval = strtofp128(field->initial, NULL);
+      size_t charsout;
+      char *converted = __gg__iconverter(field->encoding,
+                                         DEFAULT_SOURCE_ENCODING,
+                                         field->initial,
+                                         strlen(field->initial),
+                                         &charsout);
+      retval = strtofp128(converted, NULL);
       }
     else
       {
@@ -3954,7 +3961,7 @@ compare_field_class(const cblc_field_t  *conditional,
         walker = right + right_len;
 
         GCOB_FP128 left_value;
-        if( left_flag == 'F' && left[0] == 'Z' )
+        if( left_flag == ascii_F && left[0] == ascii_Z )
           {
           left_value = 0;
           }
@@ -4375,9 +4382,11 @@ __gg__compare_2(cblc_field_t *left_side,
             }
           massert(buffer);
           strcpy(buffer, right_side->initial);
+
           if( __gg__decimal_point == ',' )
             {
-            // We need to replace any commas with periods
+            // We are operating in DECIMAL IS COMMA mode, so we need to
+            // replace any commas with periods.
             char *p = strchr(buffer, ',');
             if(p)
               {
@@ -4385,8 +4394,9 @@ __gg__compare_2(cblc_field_t *left_side,
               }
             }
 
-          // buffer[] now contains the string we want to convert
-
+          // buffer[] now contains the right-side string we want to convert
+          // to one of the floating-point types.  We want them to be the
+          // same size:
           switch(left_side->capacity)
             {
             case 4:
@@ -4970,7 +4980,7 @@ init_var_both(cblc_field_t  *var,
             {
             strcpy(first, walker);
             __gg__convert_encoding( first,
-                                    DEFAULT_CHARMAP_SOURCE,
+                                    DEFAULT_SOURCE_ENCODING,
                                     var->encoding);
             }
           walker += strlen(first) + 1;
@@ -4987,7 +4997,7 @@ init_var_both(cblc_field_t  *var,
           else
             {
            __gg__convert_encoding( last,
-                                   DEFAULT_CHARMAP_SOURCE,
+                                   DEFAULT_SOURCE_ENCODING,
                                    var->encoding);
             }
           walker += strlen(last) + 1;
@@ -6234,7 +6244,7 @@ __gg__move( cblc_field_t        *fdest,
             // ascii:
             size_t charsout;
             const char *converted = __gg__iconverter(fsource->encoding,
-                                                     DEFAULT_CHARMAP_SOURCE,
+                                                     DEFAULT_SOURCE_ENCODING,
                                     PTRCAST(char, fsource->data+source_offset),
                                                      source_size,
                                                      &charsout);
@@ -9142,11 +9152,17 @@ display_both(cblc_field_t  *field,
       }
     }
 
+  size_t conversion_length = strlen(display_string);
+  if( charmap->stride() != 1 )
+    {
+    conversion_length = qual_size;
+    }
+
   size_t outlength;
   const char *converted = __gg__iconverter( encoding,
                                             encout,
                                             display_string,
-                                            strlen(display_string),
+                                            conversion_length,
                                             &outlength);
   write(file_descriptor,
         converted,
@@ -10059,7 +10075,7 @@ is_numeric_display_numeric(cblc_field_t *field, size_t offset, size_t size)
       digits_e -= 1;
       unsigned char final_char = (unsigned char)*digits_e;
       final_char = charmap->set_digit_negative(final_char, false);
-      if(   final_char<charmap->mapped_character(ascii_0) 
+      if(   final_char<charmap->mapped_character(ascii_0)
          || final_char>charmap->mapped_character(ascii_9) )
         {
         retval = 0;
@@ -10420,14 +10436,14 @@ accept_envar( cblc_field_t  *tgt,
     // Convert the name to the console codeset:
     __gg__convert_encoding( trimmed_env,
                             encoding,
-                            DEFAULT_CHARMAP_SOURCE);
+                            DEFAULT_SOURCE_ENCODING);
 
     // Pick up the environment variable, and convert it to the internal codeset
     const char *p = getenv(trimmed_env);
     if(p)
       {
       retval = 0; // Okay
-      move_string(tgt, tgt_offset, tgt_length, p, DEFAULT_CHARMAP_SOURCE);
+      move_string(tgt, tgt_offset, tgt_length, p, DEFAULT_SOURCE_ENCODING);
       }
     free(env);
     }
@@ -10638,7 +10654,7 @@ __gg__get_argv( cblc_field_t *dest,
                 dest_offset,
                 dest_length,
                 stashed_argv[N],
-                DEFAULT_CHARMAP_SOURCE);
+                DEFAULT_SOURCE_ENCODING);
     retcode = 0;  // Okay
     }
   return retcode;
@@ -11381,7 +11397,7 @@ __gg__unstring( const cblc_field_t *id1,        // The string being unstring
                     id5_o[nreceiver],
                     id5_s[nreceiver],
                     "",
-                    DEFAULT_CHARMAP_SOURCE);
+                    DEFAULT_SOURCE_ENCODING);
         }
       }
 
@@ -11768,6 +11784,7 @@ __gg__check_fatal_exception()
     case file_op_write:
     case file_op_rewrite:
     case file_op_delete:
+    case file_op_remove:
       break;
     }
   } else {
@@ -12039,6 +12056,23 @@ __gg__adjust_dest_size(cblc_field_t *dest, size_t ncount)
     }
   }
 
+extern "C"
+void
+__gg__adjust_encoding(cblc_field_t *field)
+  {
+  // Assume that field->data is in ASCII;  We need to convert it to the target
+  size_t nbytes;
+  const char *converted = __gg__iconverter(DEFAULT_SOURCE_ENCODING,
+                                           field->encoding,
+                                           PTRCAST(char, field->data),
+                                           field->capacity,
+                                           &nbytes);
+  size_t tocopy = std::min(nbytes, field->allocated);
+  field->capacity = tocopy;
+  memcpy(field->data, converted, tocopy);
+  }
+
+
 extern "C"
 void
 __gg__func_exception_location(cblc_field_t *dest)
@@ -12088,6 +12122,7 @@ __gg__func_exception_location(cblc_field_t *dest)
     }
   __gg__adjust_dest_size(dest, strlen(ach));
   memcpy(dest->data, ach, strlen(ach));
+  __gg__adjust_encoding(dest);
   }
 
 extern "C"
@@ -12102,6 +12137,7 @@ __gg__func_exception_statement(cblc_field_t *dest)
     }
   __gg__adjust_dest_size(dest, strlen(ach));
   memcpy(dest->data, ach, strlen(ach));
+  __gg__adjust_encoding(dest);
   }
 
 extern "C"
@@ -12128,6 +12164,7 @@ __gg__func_exception_status(cblc_field_t *dest)
     }
   __gg__adjust_dest_size(dest, strlen(ach));
   memcpy(dest->data, ach, strlen(ach));
+  __gg__adjust_encoding(dest);
   }
 
 extern "C"
@@ -12195,6 +12232,7 @@ __gg__func_exception_file(cblc_field_t      *dest,
 
   __gg__adjust_dest_size(dest, strlen(ach));
   memcpy(dest->data, ach, strlen(ach));
+  __gg__adjust_encoding(dest);
   }
 
 extern "C"
@@ -12693,7 +12731,7 @@ __gg__just_mangle_name( const cblc_field_t  *field,
   // We need ach_name to be in ASCII:
   size_t charsout;
   const char *converted = __gg__iconverter(field->encoding,
-                                           DEFAULT_CHARMAP_SOURCE,
+                                           DEFAULT_SOURCE_ENCODING,
                                            PTRCAST(char, field->data),
                                            length,
                                            &charsout);
@@ -12784,7 +12822,7 @@ __gg__function_handle_from_name(int                 program_id,
 
   size_t charsout;
   const char *converted = __gg__iconverter(field->encoding,
-                                           DEFAULT_CHARMAP_SOURCE,
+                                           DEFAULT_SOURCE_ENCODING,
                                            PTRCAST(char, field->data + offset),
                                            length,
                                            &charsout);
@@ -13093,6 +13131,7 @@ __gg__deallocate( cblc_field_t *target,
 static int
 get_the_byte(cblc_field_t *field)
   {
+  // This is a helper routine for ALLOCATE
   int retval = -1;
   if( field )
     {
@@ -13100,7 +13139,14 @@ get_the_byte(cblc_field_t *field)
     retval = __gg__fc_char(field);
     if(retval == -1)
       {
-      retval = (int)__gg__get_integer_binary_value(field);
+      retval = (int)(unsigned char)__gg__get_integer_binary_value(field);
+      }
+    else
+      {
+      // This is a bit of a hack.  It turns out the figurative constant is
+      // encoded in ASCII.  We need it to be in the current DISPLAY encoding.
+      charmap_t *charmap = __gg__get_charmap(__gg__display_encoding);
+      retval = charmap->mapped_character(retval);
       }
     }
   return retval;
@@ -13373,6 +13419,7 @@ __gg__module_name(cblc_field_t *dest, module_type_t type)
 
   __gg__adjust_dest_size(dest, strlen(result));
   memcpy(dest->data, result, strlen(result)+1);
+  __gg__adjust_encoding(dest);
   }
 
 /*
@@ -13652,7 +13699,7 @@ __gg__accept_arg_value( cblc_field_t *dest,
                 dest_offset,
                 dest_length,
                 stashed_argv[sv_argument_number],
-                DEFAULT_CHARMAP_SOURCE);
+                DEFAULT_SOURCE_ENCODING);
     retcode = 0;  // Okay
 
     // The Fujitsu spec says bump this value by one.
index 54b9f02ca37c9f35f8cc3b611486396d39dea6ff..d89d48022c076e641448cc8dd6001c142d34b80b 100644 (file)
@@ -408,6 +408,7 @@ static void fatalError(void * CTX, const char * msg, ...)
 }
 
 #if 0
+
 static xmlEntityPtr getEntity(void * CTX,
                               const xmlChar * name)
 { SAYSO_DATAZ(name); }
@@ -484,6 +485,7 @@ static void setDocumentLocator(void * CTX,
  * xmlCtxtGetStandalone() to get data from the XML declaration.
  */
 static void startDocument(void * CTX)
+
 {
   SAYSO();
 }
@@ -616,7 +618,6 @@ xmlchar_of( const char input[] ) {
 static const char *
 xmlParserErrors_str( xmlParserErrors erc, const char name[] ) {
   const char *msg = "???";
-
   switch( erc ) {
   case XML_ERR_OK:
     msg = "Success";
@@ -630,6 +631,7 @@ xmlParserErrors_str( xmlParserErrors erc, const char name[] ) {
   case XML_ERR_UNSUPPORTED_ENCODING:
     msg = "Unsupported character encoding";
     break;
+
 #if LIBXML_VERSION >= 21400
   case XML_ERR_RESOURCE_LIMIT:
     msg = "Internal resource limit like maximum amplification factor exceeded";
@@ -710,6 +712,7 @@ static class context_t {
     }
   }
 
+
  protected:
   void init() {
     const char *external_entities = nullptr;