cobol: Mainly extends compilation and execution in finternal-ebcdic.

author Robert Dubner <rdubner@symas.com>

Thu, 6 Nov 2025 12:26:18 +0000 (07:26 -0500)

committer Robert Dubner <rdubner@symas.com>

Thu, 6 Nov 2025 13:57:54 +0000 (08:57 -0500)
author Robert Dubner <rdubner@symas.com>
Thu, 6 Nov 2025 12:26:18 +0000 (07:26 -0500)
committer Robert Dubner <rdubner@symas.com>
Thu, 6 Nov 2025 13:57:54 +0000 (08:57 -0500)
diff --git a/gcc/cobol/Make-lang.in b/gcc/cobol/Make-lang.in

index ed6b588fe729cab946e314e20ede5ab5dc5cf2fe..9f28752f165ee6cd9d8e01bef05162cac3b02c45 100644 (file)
--- a/gcc/cobol/Make-lang.in
+++ b/gcc/cobol/Make-lang.in
@@ -330,7 +330,7 @@ cobol.srcpdf: gcobol.pdf gcobol-io.pdf
         ln $^ $(srcdir)/cobol/
  
  gcobol.pdf: $(srcdir)/cobol/gcobol.1
-       groff -mdoc -T pdf  $^ > $@~
+       groff -mdoc -t -T pdf  $^ > $@~
         @mv $@~ $@
  gcobol-io.pdf: $(srcdir)/cobol/gcobol.3
         groff -mdoc -T pdf  $^ > $@~
diff --git a/gcc/cobol/cdf.y b/gcc/cobol/cdf.y

index f72ed77c964155f757181043a5d826b3f32bd550..2d3f8192bc61b6f2ce8be6047586a63628349c77 100644 (file)
--- a/gcc/cobol/cdf.y
+++ b/gcc/cobol/cdf.y
@@ -244,21 +244,21 @@ apply_cdf_turn( const exception_turn_t& turn ) {
  %type  <boolean>            DEFINED
  %token OTHER 699  PARAMETER_kw 369    "PARAMETER"
  %token OFF 688  OVERRIDE 370
-%token THRU 949
-%token TRUE_kw 814    "True"
+%token THRU 950
+%token TRUE_kw 815    "True"
  
  %token CALL_COBOL 393    "CALL"
  %token CALL_VERBATIM 394    "CALL (as C)"
  
-%token TURN 816  CHECKING 497  LOCATION 650  ON 690  WITH 843
+%token TURN 817  CHECKING 497  LOCATION 650  ON 690  WITH 844
  
-%left OR 950
-%left AND 951
-%right NOT 952
-%left '<'  '>'  '='  NE 953  LE 954  GE 955
+%left OR 951
+%left AND 952
+%right NOT 953
+%left '<'  '>'  '='  NE 954  LE 955  GE 956
  %left '-'  '+'
  %left '*'  '/'
-%right NEG 957
+%right NEG 958
  
  %define api.prefix {ydf}
  %define api.token.prefix{YDF_}
diff --git a/gcc/cobol/cobol1.cc b/gcc/cobol/cobol1.cc

index 3146da5789988b45694188c9218e4c208ac81772..77c457d496c66b83ec505d9206efde4c33d366e8 100644 (file)
--- a/gcc/cobol/cobol1.cc
+++ b/gcc/cobol/cobol1.cc
@@ -365,6 +365,7 @@ cobol_langhook_handle_option (size_t scode,
              return true;
  
          case OPT_fdefaultbyte:
+            // cobol_default_byte is an unsigned ing
              wsclear(cobol_default_byte);
              return true;
  
diff --git a/gcc/cobol/genapi.cc b/gcc/cobol/genapi.cc

index 9d30dde96ebcc63406d79b9bbd40632bb5d34aa4..8c5f28ac07d86cd8b20f3b0783a8fbf8af3e290d 100644 (file)
--- a/gcc/cobol/genapi.cc
+++ b/gcc/cobol/genapi.cc
@@ -863,8 +863,12 @@ function_pointer_from_name(const cbl_refer_t &name,
                         NULL); // And, hence, no types
  
      // Fetch the FUNCTION_DECL for that FUNCTION_TYPE
-    tree function_decl = gg_build_fn_decl(name.field->data.initial,
+    char *tname = static_cast<char *>(xmalloc(name.field->data.capacity+1));
+    memcpy(tname, name.field->data.original(), name.field->data.capacity);
+    tname[name.field->data.capacity] = '\0';
+    tree function_decl = gg_build_fn_decl(tname,
                                            fndecl_type);
+    free(tname);
      // Take the address of the function decl:
      tree address_of_function = gg_get_address_of(function_decl);
      gg_assign(function_pointer, address_of_function);
@@ -877,11 +881,11 @@ function_pointer_from_name(const cbl_refer_t &name,
        gg_assign(function_pointer,
                  gg_cast(build_pointer_type(function_type),
                          gg_call_expr( VOID_P,
-                                  "__gg__function_handle_from_literal",
-                                  build_int_cst_type(INT,
-                                    current_function->our_symbol_table_index),
-                                  gg_string_literal(name.field->data.initial),
-                                  NULL_TREE)));
+                              "__gg__function_handle_from_literal",
+                              build_int_cst_type(INT,
+                                current_function->our_symbol_table_index),
+                              gg_string_literal(name.field->data.original()),
+                              NULL_TREE)));
        }
      else
        {
@@ -919,7 +923,7 @@ parser_initialize_programs( size_t nprogs,
        if( progs[i].field->type == FldLiteralA )
          {
          SHOW_PARSE_TEXT("\"")
-        SHOW_PARSE_TEXT(progs[i].field->data.initial)
+        SHOW_PARSE_TEXT(progs[i].field->data.original())
          SHOW_PARSE_TEXT("\"")
          }
        else
@@ -2246,21 +2250,19 @@ cobol_compare(  tree return_int,
                {
                // Comparing a FldLiteralN to an alphanumeric
  
-              // CONVERSION ALERT.  lefty->field->data.initial is an ASCII
-              // string.  We want to convert it to the same encoding as the
-              // right side.
-
-              cbl_encoding_t enc_left = DEFAULT_CHARMAP_SOURCE;
-              cbl_encoding_t enc_right =
-                  static_cast<cbl_encoding_t>(righty->field->codeset.encoding);
-
+              // This next conversion may be overkill.  But just in case
+              // the encodings of the two variables are different, we are
+              // going to convert left-side text to the right-side encoding
+              cbl_encoding_t enc_left  = lefty->field->codeset.encoding;
+              cbl_encoding_t enc_right = righty->field->codeset.encoding;
                size_t outlength;
-              char *converted = __gg__iconverter(enc_left,
-                                           enc_right,
-                                           lefty->field->data.initial,
-                                           strlen(lefty->field->data.initial)+1,
-                                           &outlength );
-
+              size_t inlength = strlen(lefty->field->data.initial);
+              char *converted = __gg__iconverter(
+                                         enc_left,
+                                         enc_right,
+                                         lefty->field->data.initial,
+                                         inlength,
+                                         &outlength );
                gg_assign(  return_int, gg_call_expr(
                            INT,
                            "__gg__literaln_alpha_compare",
@@ -2458,7 +2460,7 @@ move_tree(  cbl_field_t  *dest,
        gg_call(VOID,
                "__gg__string_to_alpha_edited",
                location,
-              build_int_cst_type(INT, DEFAULT_CHARMAP_SOURCE),
+              build_int_cst_type(INT, DEFAULT_SOURCE_ENCODING),
                psz_source,
                min_length,
                member(dest->var_decl_node, "picture"),
@@ -3956,7 +3958,7 @@ parser_enter_program( const char *funcname_,
  
    if( strcmp(funcname_, "main") == 0 && this_module_has_main )
      {
-    // setting 'retval' to 1 let's the caller know that we are being told
+    // Setting 'retval' to 1 lets the caller know that we are being told
      // both to synthesize a main() entry point to duplicate GCC's default
      // behavior, and to create an explicit entry point named "main".  This will
      // eventually result in a link error (because of the duplicated entry
@@ -4164,178 +4166,197 @@ parser_init_list()
    gg_call(VOID,
            "__gg__variables_to_init",
            gg_get_address_of(array),
-          wsclear() ? gg_string_literal(wsclear()) : null_pointer_node,
+          wsclear() ? build_string_literal(1, (const char *)wsclear())
+                    : null_pointer_node,
            NULL_TREE);
    }
  
-static void
-psa_FldLiteralN(struct cbl_field_t *field )
+static
+FIXED_WIDE_INT(128)
+dirty_to_binary(const char  *instring,
+                uint32_t    &capacity,
+                uint32_t    &digits,
+                int32_t     &rdigits,
+                uint64_t    &attr)
    {
-  Analyze();
-  SHOW_PARSE
-    {
-    SHOW_PARSE_HEADER
-    SHOW_PARSE_FIELD(" ", field)
-    SHOW_PARSE_END
-    }
-  // We are constructing a completely static constant structure, based on the
-  // text string in .initial
-
-  CHECK_FIELD(field);
+  digits = 0;
+  rdigits = 0;
+  attr = 0;
  
    FIXED_WIDE_INT(128) value = 0;
  
-  do
+  // We need to convert data.initial to an FIXED_WIDE_INT(128) value
+  const char *p = instring;
+  int sign = 1;
+  if( *p == '-' )
      {
-    // This is a false do{}while, to isolate the variables:
+    attr |= signable_e;
+    sign = -1;
+    p += 1;
+    }
+  else if( *p == '+' )
+    {
+    // We set it signable so that the instruction DISPLAY +1
+    // actually outputs "+1"
+    attr |= signable_e;
+    p += 1;
+    }
  
-    // We need to convert data.initial to an FIXED_WIDE_INT(128) value
-    char *p = const_cast<char *>(field->data.initial);
-    int sign = 1;
-    if( *p == '-' )
-      {
-      field->attr |= signable_e;
-      sign = -1;
-      p += 1;
-      }
-    else if( *p == '+' )
-      {
-      // We set it signable so that the instruction DISPLAY +1
-      // actually outputs "+1"
-      field->attr |= signable_e;
-      p += 1;
-      }
+  //  We need to be able to handle
+  //  123
+  //  123.456
+  //  123E<exp>
+  //  123.456E<exp>
+  //  where <exp> can be N, +N and -N
+  //
+  //  Oh, yeah, and we're talking handling up to 32 digits, or more, so using
+  //  library routines is off the table.
+
+  int rdigit_delta = 0;
+  int exponent = 0;
+  const char *exp = strchr(p, 'E');
+  if( !exp )
+    {
+    exp = strchr(p, 'e');
+    }
+  if(exp)
+    {
+    exponent = atoi(exp+1);
+    }
  
-    //  We need to be able to handle
-    //  123
-    //  123.456
-    //  123E<exp>
-    //  123.456E<exp>
-    //  where <exp> can be N, +N and -N
-    //
-    //  Oh, yeah, and we're talking handling up to 32 digits, or more, so using
-    //  library routines is off the table.
+  // We can now calculate the value, and the number of digits and rdigits.
  
-    int digits = 0;
-    int rdigits = 0;
-    int rdigit_delta = 0;
-    int exponent = 0;
+  // We count up leading zeroes as part of the attr->digits calculation.
+  // It turns out that certain comparisons need to know the number of digits,
+  // because "IF "2" EQUAL 002" is false, while "IF "2" EQUAL 2" is true.  So,
+  // we need to count up leading zeroes.
  
-    const char *exp = strchr(p, 'E');
-    if( !exp )
+  for(;;)
+    {
+    char ch = *p++;
+    if( ch == symbol_decimal_point() )
        {
-      exp = strchr(p, 'e');
+      rdigit_delta = 1;
+      continue;
        }
-    if(exp)
+    if( ch < '0' || ch > '9' )
        {
-      exponent = atoi(exp+1);
+      break;
        }
+    digits += 1;
+    rdigits += rdigit_delta;
+    value *= 10;
+    value += ch - '0';
+    }
  
-    // We can now calculate the value, and the number of digits and rdigits.
-
-    // We count up leading zeroes as part of the attr->digits calculation.
-    // It turns out that certain comparisons need to know the number of digits,
-    // because "IF "2" EQUAL 002" is false, while "IF "2" EQUAL 2" is true.  So,
-    // we need to count up leading zeroes.
-
-    for(;;)
+  if( exponent < 0 )
+    {
+    rdigits += -exponent;
+    }
+  else
+    {
+    while(exponent--)
        {
-      char ch = *p++;
-      if( ch == symbol_decimal_point() )
+      if(rdigits)
          {
-        rdigit_delta = 1;
-        continue;
+        rdigits -= 1;
          }
-      if( ch < '0' || ch > '9' )
+      else
          {
-        break;
+        digits += 1;
+        value *= 10;
          }
-      digits += 1;
-      rdigits += rdigit_delta;
-      value *= 10;
-      value += ch - '0';
        }
+    }
  
-    if( exponent < 0 )
-      {
-      rdigits += -exponent;
-      }
-    else
-      {
-      while(exponent--)
-        {
-        if(rdigits)
-          {
-          rdigits -= 1;
-          }
-        else
-          {
-          digits += 1;
-          value *= 10;
-          }
-        }
-      }
+  if( (int32_t)digits < rdigits )
+    {
+    digits = rdigits;
+    }
  
-    if(digits < rdigits)
-      {
-      digits = rdigits;
-      }
-    field->data.digits = digits;
-    field->data.rdigits = rdigits;
+  // We now need to calculate the capacity.
  
-    // We now need to calculate the capacity.
+  unsigned int min_prec = wi::min_precision(value, UNSIGNED);
+  if( min_prec > 64 )
+    {
+    // Bytes 15 through 8 are non-zero
+    capacity = 16;
+    }
+  else if( min_prec > 32 )
+    {
+    // Bytes 7 through 4 are non-zero
+    capacity = 8;
+    }
+  else if( min_prec > 16 )
+    {
+    // Bytes 3 and 2
+    capacity = 4;
+    }
+  else if( min_prec > 8 )
+    {
+    // Byte 1 is non-zero
+    capacity = 2;
+    }
+  else
+    {
+    // The value is zero through 0xFF
+    capacity = 1;
+    }
  
-    unsigned int min_prec = wi::min_precision(value, UNSIGNED);
-    int capacity;
-    if( min_prec > 64 )
-      {
-      // Bytes 15 through 8 are non-zero
-      capacity = 16;
-      }
-    else if( min_prec > 32 )
-      {
-      // Bytes 7 through 4 are non-zero
-      capacity = 8;
-      }
-    else if( min_prec > 16 )
-      {
-      // Bytes 3 and 2
-      capacity = 4;
-      }
-    else if( min_prec > 8 )
+  value *= sign;
+
+  // One last adjustment.  The number is signable, so the binary value
+  // is going to be treated as twos complement.  That means that the highest
+  // bit has to be 1 for negative signable numbers, and 0 for positive.  If
+  // necessary, adjust capacity up by one byte so that the variable fits:
+
+  if( capacity < 16 && (attr & signable_e) )
+    {
+    FIXED_WIDE_INT(128) mask
+      = wi::set_bit_in_zero<FIXED_WIDE_INT(128)>(capacity * 8 - 1);
+    if( wi::neg_p (value) && (value & mask) == 0 )
        {
-      // Byte 1 is non-zero
-      capacity = 2;
+      capacity *= 2;
        }
-    else
+    else if( !wi::neg_p (value) && (value & mask) != 0 )
        {
-      // The value is zero through 0xFF
-      capacity = 1;
+      capacity *= 2;
        }
+    }
  
-    value *= sign;
+  return value;
+  }
  
-    // One last adjustment.  The number is signable, so the binary value
-    // is going to be treated as twos complement.  That means that the highest
-    // bit has to be 1 for negative signable numbers, and 0 for positive.  If
-    // necessary, adjust capacity up by one byte so that the variable fits:
+static void
+psa_FldLiteralN(struct cbl_field_t *field )
+  {
+  Analyze();
+  SHOW_PARSE
+    {
+    SHOW_PARSE_HEADER
+    SHOW_PARSE_FIELD(" ", field)
+    SHOW_PARSE_END
+    }
+  // We are constructing a completely static constant structure, based on the
+  // text string in .initial
  
-    if( capacity < 16 && (field->attr & signable_e) )
-      {
-      FIXED_WIDE_INT(128) mask
-        = wi::set_bit_in_zero<FIXED_WIDE_INT(128)>(capacity * 8 - 1);
-      if( wi::neg_p (value) && (value & mask) == 0 )
-        {
-        capacity *= 2;
-        }
-      else if( !wi::neg_p (value) && (value & mask) != 0 )
-        {
-        capacity *= 2;
-        }
-      }
-    field->data.capacity = capacity;
+  CHECK_FIELD(field);
  
-    }while(0);
+  uint32_t capacity;
+  uint32_t digits;
+  int32_t  rdigits;
+  uint64_t attr;
+  FIXED_WIDE_INT(128) value = dirty_to_binary(field->data.original(),
+                                              capacity,
+                                              digits,
+                                              rdigits,
+                                              attr);
+  // This is a rare occurrence of a parser_xxx call changing the entry
+  // in the symbol table.
+  field->data.capacity = capacity;
+  field->data.digits   = digits;
+  field->data.rdigits  = rdigits;
+  field->attr         |= attr;
  
    char base_name[257];
    char id_string[32] = "";
@@ -5136,9 +5157,9 @@ parser_alphabet( const cbl_alphabet_t& alphabet )
          // character i has the ordinal alphabet[i]
          unsigned char ch = i;
  
-        ach[ch] = (alphabet.alphabet[i]);
+        ach[ch] = (alphabet.collation_sequence[i]);
          gg_assign(  gg_array_value(table256, ch),
-                    build_int_cst_type(UCHAR, (alphabet.alphabet[i])) );
+                    build_int_cst_type(UCHAR, (alphabet.collation_sequence[i])) );
          }
  
        unsigned int low_char  = alphabet.low_char;
@@ -6811,7 +6832,7 @@ parser_allocate(cbl_refer_t size_or_based,
    cbl_field_t *f_working = current_options().initial_working();
    cbl_field_t *f_local   = current_options().initial_local();
  
-  int default_byte = wsclear() ? *wsclear() : -1;
+  unsigned int default_byte = wsclear() ? *wsclear() : (uint32_t)(-1);
  
    gg_call(VOID,
            "__gg__allocate",
@@ -8201,7 +8222,7 @@ parser_label_label(struct cbl_label_t *label)
      }
  
    CHECK_LABEL(label);
-  
+
  #if 1
    // At the present time, label_verify.lay is returning true, so I edited
    // out the if( !... ) to quiet cppcheck
@@ -8252,7 +8273,7 @@ parser_label_goto(struct cbl_label_t *label)
      }
  
    CHECK_LABEL(label);
-  
+
    label_verify.go_to(label);
  
    label_verify.go_to(label);
@@ -9933,6 +9954,44 @@ parser_file_open( size_t nfiles, struct cbl_file_t *files[], int mode_char )
      }
    }
  
+static
+tree get_the_filename(bool &quoted_name, const cbl_file_t *file)
+  {
+  // The cbl_file_t has a cbl_field_t *filename.  This can be a FldAlphanumeric.
+  // The runtime has a (char *)filename, so we need to
+  // do a runtime conversion.
+
+  tree psz;   // This is going to be either the name of the file, or the
+  // possible run-time environment variable that will contain
+  // the name of the file.
+
+  cbl_field_t *field_of_name = symbol_field_forward(file->filename);
+  quoted_name = false;
+  if( field_of_name->type == FldForward )
+    {
+    // The target of ASSIGN TO was unquoted, but didn't resolve to a
+    // cbl_field_t.  This means that the name of the field is an
+    // environment variable that will hold the file name
+    psz = gg_define_char_star();
+    gg_assign(psz, gg_strdup(gg_string_literal(field_of_name->name)));
+    }
+  else
+    {
+    // The name is coming from a presumably FldAlphaNumeric variable
+    psz = get_string_from(field_of_name);
+    gg_call( CHAR_P,
+             "__gg__convert_encoding",
+             psz,
+             build_int_cst_type(INT,
+                                field_of_name->codeset.encoding),
+             build_int_cst_type(INT,
+                                DEFAULT_SOURCE_ENCODING),
+             NULL_TREE);
+    quoted_name = true;
+    }
+  return psz;
+  }
+
  void
  parser_file_open( struct cbl_file_t *file, int mode_char )
    {
@@ -9985,45 +10044,15 @@ parser_file_open( struct cbl_file_t *file, int mode_char )
      TRACE1_END
      }
  
-  // The cbl_file_t has a cbl_field_t *filename.  This can be a FldAlphanumeric.
-  // The runtime has a (char *)filename, so we need to
-  // do a runtime conversion.
-
-  tree psz;   // This is going to be either the name of the file, or the
-  // possible run-time environment variable that will contain
-  // the name of the file.
-
-  cbl_field_t *field_of_name = symbol_field_forward(file->filename);
-  bool quoted_name = false;
-  if( field_of_name->type == FldForward )
-    {
-    // The target of ASSIGN TO was unquoted, but didn't resolve to a
-    // cbl_field_t.  This means that the name of the field is an
-    // environment variable that will hold the file name
-    psz = gg_define_char_star();
-    gg_assign(psz, gg_strdup(gg_string_literal(field_of_name->name)));
-    }
-  else
-    {
-    // The name is coming from a presumably FldAlphaNumeric variable
-    psz = get_string_from(field_of_name);
-    gg_call( CHAR_P,
-             "__gg__convert_encoding",
-             psz,
-             build_int_cst_type(INT,
-                                field_of_name->codeset.encoding),
-             build_int_cst_type(INT,
-                                DEFAULT_CHARMAP_SOURCE),
-             NULL_TREE);
-    quoted_name = true;
-    }
+  bool quoted_name;
+  tree pszFilename = get_the_filename(quoted_name, file);
  
    sv_is_i_o = true;
    store_location_stuff("OPEN");
    gg_call(VOID,
            "__gg__file_open",
            gg_get_address_of(file->var_decl_node),
-          psz,
+          pszFilename,
            build_int_cst_type(INT, mode_char),
            quoted_name ? integer_one_node : integer_zero_node,
            NULL_TREE);
@@ -10384,6 +10413,121 @@ parser_file_delete( struct cbl_file_t *file, bool /*sequentially*/ )
      }
    }
  
+static void
+set_up_delete_file_label(cbl_label_t *delete_file_label)
+  {
+  if( delete_file_label )
+    {
+    if( !delete_file_label->structs.delete_file )
+      {
+      delete_file_label->structs.delete_file
+        = static_cast<cbl_delete_file_t *>
+                                  (xmalloc(sizeof(struct cbl_delete_file_t)));
+      // Set up the address pairs for this clause
+      gg_create_goto_pair(
+                  &delete_file_label->structs.delete_file->over.go_to,
+                  &delete_file_label->structs.delete_file->over.label);
+      gg_create_goto_pair(
+                  &delete_file_label->structs.delete_file->exception.go_to,
+                  &delete_file_label->structs.delete_file->exception.label);
+      gg_create_goto_pair(
+                  &delete_file_label->structs.delete_file->no_exception.go_to,
+                  &delete_file_label->structs.delete_file->no_exception.label);
+      gg_create_goto_pair(
+                  &delete_file_label->structs.delete_file->bottom.go_to,
+                  &delete_file_label->structs.delete_file->bottom.label);
+      }
+    }
+  }
+
+void
+parser_file_delete_file( cbl_label_t *name,
+                         std::vector<cbl_file_t*> filenames )
+  {
+  // This removes a file from the file system.  It is distinct from the
+  // FILE DELETE statement, which deletes a record from a file.
+  SHOW_PARSE
+    {
+    SHOW_PARSE_HEADER
+    SHOW_PARSE_TEXT(" ");
+    SHOW_PARSE_TEXT(name->name);
+    for(size_t i=0; i<filenames.size(); i++)
+      {
+      SHOW_PARSE_INDENT
+      SHOW_PARSE_TEXT(filenames[i]->name)
+      }
+    SHOW_PARSE_END
+    }
+  set_up_delete_file_label(name);
+  tree there_was_an_error = gg_define_int(0);
+  for(size_t i=0; i<filenames.size(); i++)
+    {
+    bool quoted_name;
+    tree pszFilename = get_the_filename(quoted_name, filenames[i]);
+    gg_assign(there_was_an_error,
+              gg_bitwise_or(there_was_an_error,
+                            gg_call_expr(
+                            INT,
+                            "__gg__file_remove",
+                            gg_get_address_of(filenames[i]->var_decl_node),
+                            pszFilename,
+                            quoted_name ? integer_one_node : integer_zero_node,
+                            NULL_TREE)));
+    set_user_status(filenames[i]);
+    }
+  IF( there_was_an_error, eq_op, integer_zero_node )
+    {
+    // There was no error detected.
+    gg_append_statement(name->structs.delete_file->no_exception.go_to);
+    }
+  ELSE
+    {
+    // There was an error detected.
+    gg_append_statement(name->structs.delete_file->exception.go_to);
+    }
+  }
+
+void
+parser_file_delete_on_exception( cbl_label_t *name )
+  {
+  SHOW_PARSE
+    {
+    SHOW_PARSE_HEADER
+    SHOW_PARSE_TEXT(" ");
+    SHOW_PARSE_TEXT(name->name);
+    SHOW_PARSE_END
+    }
+  gg_append_statement(name->structs.delete_file->bottom.go_to);
+  gg_append_statement(name->structs.delete_file->exception.label);
+  }
+
+void
+parser_file_delete_not_exception( cbl_label_t *name )
+  {
+  SHOW_PARSE
+    {
+    SHOW_PARSE_HEADER
+    SHOW_PARSE_TEXT(" ");
+    SHOW_PARSE_TEXT(name->name);
+    SHOW_PARSE_END
+    }
+  gg_append_statement(name->structs.delete_file->bottom.go_to);
+  gg_append_statement(name->structs.delete_file->no_exception.label);
+  }
+
+void
+parser_file_delete_end( cbl_label_t *name )
+  {
+  SHOW_PARSE
+    {
+    SHOW_PARSE_HEADER
+    SHOW_PARSE_TEXT(" ");
+    SHOW_PARSE_TEXT(name->name);
+    SHOW_PARSE_END
+    }
+  gg_append_statement(name->structs.delete_file->bottom.label);
+  }
+
  void
  parser_file_rewrite(cbl_file_t *file,
                      cbl_field_t *record_area,
@@ -13639,7 +13783,7 @@ parser_call(   cbl_refer_t name,
      create_and_call(narg,
                      args,
                      NULL_TREE,
-                    name.field->data.initial,
+                    name.field->data.original(),
                      returned_value_type,
                      returned,
                      not_except);
@@ -13747,7 +13891,7 @@ parser_entry( const cbl_field_t *name, size_t nusing, cbl_ffi_arg_t *args )
      {
      SHOW_PARSE_HEADER
      SHOW_PARSE_TEXT(" ")
-    SHOW_PARSE_TEXT(name->data.initial)
+    SHOW_PARSE_TEXT(name->data.original())
      SHOW_PARSE_END
      }
  
@@ -13756,7 +13900,7 @@ parser_entry( const cbl_field_t *name, size_t nusing, cbl_ffi_arg_t *args )
  
    // Get the name of the ENTRY point.
    // cppcheck-suppress nullPointerRedundantCheck
-  char *psz = cobol_name_mangler(name->data.initial);
+  char *psz = cobol_name_mangler(name->data.original());
  
    // Create a goto/label pair.  The label will be set up here; the goto will
    // be used when we re-enter the containing function:
@@ -14642,13 +14786,12 @@ mh_source_is_literalN(cbl_refer_t &destref,
            SHOW_PARSE_TEXT("mh_source_is_literalN: __gg__psz_to_alpha_move")
            }
  
-        // We know that the encoding of the literal::initial is in ASCII
-
          // We need the data sent to __gg__psz_to_alpha_move to be in the
          // encoding of the destination
  
          size_t charsout;
-        const char *converted = __gg__iconverter(DEFAULT_CHARMAP_SOURCE,
+        const char *converted = __gg__iconverter(
+                                         sourceref.field->codeset.encoding,
                                           destref.field->codeset.encoding,
                                           sourceref.field->data.initial,
                                           strlen(sourceref.field->data.initial),
@@ -16086,54 +16229,50 @@ real_powi10 (uint32_t x)
    return pow10;
  }
  
+static
  char *
-binary_initial_from_float128(cbl_field_t *field, int rdigits,
-                             REAL_VALUE_TYPE value)
+binary_initial(cbl_field_t *field)
    {
    // This routine returns an xmalloced buffer designed to replace the
    // data.initial member of the incoming field
    char *retval = NULL;
  
-  // We need to adjust value so that it has no decimal places
-  if( rdigits )
+  uint32_t capacity;
+  uint32_t ddigits;
+  int32_t  drdigits;
+  uint64_t attr;
+  FIXED_WIDE_INT(128) value128 = dirty_to_binary(field->data.original(),
+                                                 capacity,
+                                                 ddigits,
+                                                 drdigits,
+                                                 attr);
+  int scaled_rdigits = get_scaled_rdigits(field);
+
+  int i = field->data.rdigits;
+  while( i<0 )
      {
-      REAL_VALUE_TYPE pow10 = real_powi10 (rdigits);
-      real_arithmetic (&value, MULT_EXPR, &value, &pow10);
-      real_convert (&value, TYPE_MODE (float128_type_node), &value);
+    value128 = value128/10;
+    i += 1;
      }
-  // We need to make sure that the resulting string will fit into
-  // a number with 'digits' digits
  
-  // Keep in mind that pure binary types, like BINARY-CHAR, have no digits
-  if( field->data.digits )
+  // We take the digits of value128, and put them into ach.  We line up
+  // the rdigits, and we truncate the string after desired_digits
+  while(drdigits < scaled_rdigits)
      {
-      REAL_VALUE_TYPE pow10 = real_powi10 (field->data.digits);
-      mpfr_t m0, m1;
-
-      mpfr_inits2 (REAL_MODE_FORMAT (TYPE_MODE (float128_type_node))->p,
-                   m0, m1, NULL);
-      mpfr_from_real (m0, &value, MPFR_RNDN);
-      mpfr_from_real (m1, &pow10, MPFR_RNDN);
-      mpfr_clear_flags ();
-      mpfr_fmod (m0, m0, m1, MPFR_RNDN);
-      real_from_mpfr (&value, m0,
-                      REAL_MODE_FORMAT (TYPE_MODE (float128_type_node)),
-                      MPFR_RNDN);
-      real_convert (&value, TYPE_MODE (float128_type_node), &value);
-      mpfr_clears (m0, m1, NULL);
+    value128 *= 10;
+    drdigits += 1;
+    }
+  while(drdigits > scaled_rdigits)
+    {
+    value128 = value128 / 10;
+    drdigits -= 1;
      }
-
-  real_roundeven (&value, TYPE_MODE (float128_type_node), &value);
-
-  bool fail = false;
-  FIXED_WIDE_INT(128) i
-    = FIXED_WIDE_INT(128)::from (real_to_integer (&value, &fail, 128), SIGNED);
  
    retval = static_cast<char *>(xmalloc(field->data.capacity));
    gcc_assert(retval);
    switch(field->data.capacity)
      {
-      tree type;
+    tree type;
      case 1:
      case 2:
      case 4:
@@ -16141,12 +16280,12 @@ binary_initial_from_float128(cbl_field_t *field, int rdigits,
      case 16:
        type = build_nonstandard_integer_type ( field->data.capacity
                                                * BITS_PER_UNIT, 0);
-      native_encode_wide_int (type, i, PTRCAST(unsigned char, retval),
+      native_encode_wide_int (type, value128, PTRCAST(unsigned char, retval),
                                field->data.capacity);
        break;
      default:
        fprintf(stderr,
-              "Trouble in binary_initial_from_float128 at %s() %s:%d\n",
+              "Trouble in binary_initial at %s() %s:%d\n",
                __func__,
                __FILE__,
                __LINE__);
@@ -16157,6 +16296,60 @@ binary_initial_from_float128(cbl_field_t *field, int rdigits,
    return retval;
    }
  
+static void
+digits_from_int128( char                *ach,
+                    cbl_field_t         *field,
+                    uint32_t             desired_digits,
+                    FIXED_WIDE_INT(128)  value128, // cppcheck-suppress unknownMacro
+                    int32_t              rdigits)
+  {
+  if( value128 < 0 )
+    {
+    value128 = -value128;
+    }
+
+  // 'rdigits' are the number of rdigits in value128.
+
+  int scaled_rdigits = get_scaled_rdigits(field);
+
+  int i = field->data.rdigits;
+  while( i<0 )
+    {
+    value128 = value128/10;
+    i += 1;
+    }
+
+  // We take the digits of value128, and put them into ach.  We line up
+  // the rdigits, and we truncate the string after desired_digits
+  while(rdigits < scaled_rdigits)
+    {
+    value128 *= 10;
+    rdigits += 1;
+    }
+  while(rdigits > scaled_rdigits)
+    {
+    value128 = value128 / 10;
+    rdigits -= 1;
+    }
+  char conv[128];
+  print_dec (value128, conv, SIGNED);
+  size_t len = strlen(conv);
+
+  if( len<desired_digits )
+    {
+    memset(ach, ascii_0, desired_digits - len);
+    strcpy(ach+desired_digits - len, conv);
+    }
+  else
+    {
+    strcpy(ach, conv + len-desired_digits);
+    }
+  }
+
+#if 0
+// This routine was replaced with digits_from_int1289.  However, I am choosing
+// to keep it around for a while, because it is a master class in manipulating
+// REAL_VALUE_TYPE and FIXED_WIDE_INT
  
  static void
  digits_from_float128(char *retval, cbl_field_t *field, size_t width, int rdigits, REAL_VALUE_TYPE value)
@@ -16194,8 +16387,6 @@ digits_from_float128(char *retval, cbl_field_t *field, size_t width, int rdigits
    // We convert it to a integer string of digits:
    print_dec (i, ach, SIGNED);
  
-  //fprintf(stderr, "digits_from_float128() %s %f %s ", field->name, (double)value, ach);
-
    gcc_assert( strlen(ach) <= field->data.digits );
    if( strlen(ach) < width )
      {
@@ -16203,6 +16394,7 @@ digits_from_float128(char *retval, cbl_field_t *field, size_t width, int rdigits
      }
    strcpy(retval + (width-strlen(ach)), ach);
    }
+#endif
  
  static char *
  initial_from_initial(cbl_field_t *field)
@@ -16211,10 +16403,9 @@ initial_from_initial(cbl_field_t *field)
    // This routine returns an xmalloced buffer that is intended to replace the
    // data.initial member of the incoming field.
  
-  //fprintf(stderr, "initial_from_initial %s\n", field->name);
+  //fprintf(stderr, " %s\n", field->name);
  
    char *retval = NULL;
-  int rdigits;
  
    // Let's handle the possibility of a figurative constant
    cbl_figconst_t figconst = cbl_figconst_of(field->data.initial);
@@ -16253,6 +16444,8 @@ initial_from_initial(cbl_field_t *field)
    if( field->data.etc_type == cbl_field_data_t::value_e )
      value = TREE_REAL_CST (field->data.value_of ());
  
+#if 0
+  int rdigits;
    // There is always the infuriating possibility of a P-scaled number
    if( field->attr & scaled_e )
      {
@@ -16288,17 +16481,18 @@ initial_from_initial(cbl_field_t *field)
      // Not P-scaled
      rdigits = field->data.rdigits;
      }
+#endif
  
    switch(field->type)
      {
      case FldNumericBin5:
      case FldIndex:
-      retval = binary_initial_from_float128(field, rdigits, value);
+      retval = binary_initial(field);
        break;
  
      case FldNumericBinary:
        {
-      retval = binary_initial_from_float128(field, rdigits, value);
+      retval = binary_initial(field);
        size_t left = 0;
        size_t right = field->data.capacity - 1;
        while(left < right)
@@ -16328,7 +16522,17 @@ initial_from_initial(cbl_field_t *field)
          negative = false;
          }
  
-      digits_from_float128(ach, field, field->data.digits, rdigits, value);
+      // Convert the data.initial to a __int128
+      uint32_t capacity;
+      uint32_t ddigits;
+      int32_t  drdigits;
+      uint64_t attr;
+      FIXED_WIDE_INT(128) value128 = dirty_to_binary(field->data.initial,
+                                                     capacity,
+                                                     ddigits,
+                                                     drdigits,
+                                                     attr);
+      digits_from_int128(ach, field, field->data.digits, value128, drdigits);
  
        const char *digits = ach;
        if(    (field->attr & signable_e)
@@ -16404,7 +16608,16 @@ initial_from_initial(cbl_field_t *field)
        size_t ndigits =   (field->attr & separate_e)
                         ? field->data.capacity * 2
                         : field->data.capacity * 2 - 1;
-      digits_from_float128(ach, field, ndigits, rdigits, value);
+      uint32_t capacity;
+      uint32_t ddigits;
+      int32_t  drdigits;
+      uint64_t attr;
+      FIXED_WIDE_INT(128) value128 = dirty_to_binary(field->data.initial,
+                                                     capacity,
+                                                     ddigits,
+                                                     drdigits,
+                                                     attr);
+      digits_from_int128(ach, field, ndigits, value128, drdigits);
  
        const char *digits = ach;
        for(size_t i=0; i<ndigits; i++)
@@ -16517,13 +16730,31 @@ initial_from_initial(cbl_field_t *field)
          else
            {
            size_t ndigits = field->data.capacity;
-          digits_from_float128(ach, field, ndigits, rdigits, value);
-          /* ???  This resides in libgcobol valconv.cc.  */
+          uint32_t capacity;
+          uint32_t ddigits;
+          int32_t  drdigits;
+          uint64_t attr;
+          FIXED_WIDE_INT(128) value128 = dirty_to_binary(field->data.original(),
+                                                         capacity,
+                                                         ddigits,
+                                                         drdigits,
+                                                         attr);
+          digits_from_int128(ach, field, ndigits, value128, drdigits);
+
+          // __gg__string_to_numeric_edited operates in ASCII space:
            __gg__string_to_numeric_edited( retval,
                                            ach,
                                            field->data.rdigits,
                                            negative,
                                            field->data.picture);
+          // So now we convert it to the target encoding:
+          size_t nbytes;
+          const char *converted = __gg__iconverter(DEFAULT_SOURCE_ENCODING,
+                                                   field->codeset.encoding,
+                                                   retval,
+                                                   strlen(retval),
+                                                   &nbytes);
+          strcpy(retval, converted);
            }
          }
        break;
@@ -16556,10 +16787,32 @@ initial_from_initial(cbl_field_t *field)
  
      case FldLiteralN:
        {
-////      retval = static_cast<char *>(xmalloc(field->data.capacity+1));
-////      gcc_assert(retval);
-////      memcpy(retval, field->data.initial, field->data.capacity);
-////      retval[field->data.capacity] = '\0';
+      // This requires annotation.
+
+      // The compiler originally used ASCII for field->data.initial. Later we
+      // expanded the field with the addition of the codeset.encoding
+      // For consistency in the parser processing, the FldLiteralN is arriving
+      // with the Object-Computer's character encoding, and field->data.initial
+      // is showing up encoded.
+
+      // But on the run-time side, if the initial string is needed, it is
+      // invariably more useful in ASCII.  Consider converting that string to
+      // a floating-point value, for example.
+
+      // So, we are going to convert the data.initial string back to ASCII
+      // here.  Later on, when we establish the run-time encoding, we will
+      // check for FldLiteralN and set that to ASCII as well. See
+      // actually_create_the_static_field().
+
+      size_t nbytes;
+      const char *converted = __gg__iconverter(field->codeset.encoding,
+                                               DEFAULT_SOURCE_ENCODING,
+                                               field->data.initial,
+                                               strlen(field->data.initial),
+                                               &nbytes);
+      retval = static_cast<char *>(xmalloc(strlen(field->data.initial)+1));
+      gcc_assert(retval);
+      strcpy(retval, converted);
        break;
        }
  
@@ -16716,9 +16969,14 @@ actually_create_the_static_field( cbl_field_t *new_var,
    next_field = TREE_CHAIN(next_field);
  
    //  INT,     "encoding",
+  //  For FldLiteralN we force the encoding to be ASCII.
+  //  See initial_from_initial() for an explanation.
    CONSTRUCTOR_APPEND_ELT(CONSTRUCTOR_ELTS(constr),
                          next_field,
-                        build_int_cst_type(INT, new_var->codeset.encoding));
+                        build_int_cst_type(INT,
+                                           new_var->type == FldLiteralN ?
+                                                DEFAULT_SOURCE_ENCODING
+                                              : new_var->codeset.encoding));
    next_field = TREE_CHAIN(next_field);
  
    //  INT,     "alphabet",
@@ -17643,6 +17901,10 @@ parser_symbol_add(struct cbl_field_t *new_var )
            length_of_initial_string = new_var->data.capacity+1;
            break;
  
+        case FldLiteralN:
+          length_of_initial_string = strlen(new_initial)+1;
+          break;
+
          default:
            length_of_initial_string = new_var->data.capacity;
            break;
diff --git a/gcc/cobol/genapi.h b/gcc/cobol/genapi.h

index 6582d2e8898d6ac388233393b269e63568e9cb26..802bba7594468ac18606ea0b4307d67dae30d4b3 100644 (file)
--- a/gcc/cobol/genapi.h
+++ b/gcc/cobol/genapi.h
@@ -400,6 +400,12 @@ parser_file_rewrite( cbl_file_t *file, cbl_field_t *field,
  void
  parser_file_delete( cbl_file_t *file, bool sequentially );
  
+void parser_file_delete_file( cbl_label_t *name,
+                              std::vector<cbl_file_t*> filenames );
+void parser_file_delete_on_exception( cbl_label_t *name );
+void parser_file_delete_not_exception( cbl_label_t *name );
+void parser_file_delete_end( cbl_label_t *name );
+
  #if condition_lists
  struct cbl_conditional_t {
    cbl_field_t *tgt;
diff --git a/gcc/cobol/genmath.cc b/gcc/cobol/genmath.cc

index 320e6bf43664fa0bc0ca2590e973c5631db69c77..7d6ae8c93d48538d5b262c90949b6473bba659f7 100644 (file)
--- a/gcc/cobol/genmath.cc
+++ b/gcc/cobol/genmath.cc
@@ -42,6 +42,7 @@
  #include "gengen.h"
  #include "structs.h"
  #include "../../libgcobol/gcobolio.h"
+#include "../../libgcobol/charmaps.h"
  #include "show_parse.h"
  
  void
diff --git a/gcc/cobol/genutil.cc b/gcc/cobol/genutil.cc

index 56b6b83728b55c3c5e5ed5186a6deaf74dd0ca06..63f37f68806b6f600f4889200f3f65e82c7fb25b 100644 (file)
--- a/gcc/cobol/genutil.cc
+++ b/gcc/cobol/genutil.cc
@@ -1744,7 +1744,7 @@ get_literal_string(cbl_field_t *field)
    char *buffer = static_cast<char *>(xcalloc(1, buffer_length));
  
    size_t charsout;
-  const char *converted = __gg__iconverter(DEFAULT_CHARMAP_SOURCE,
+  const char *converted = __gg__iconverter(DEFAULT_SOURCE_ENCODING,
                                       field->codeset.encoding,
                                       field->data.initial,
                                       field->data.capacity,
diff --git a/gcc/cobol/parse.y b/gcc/cobol/parse.y

index 9187a59a3cfcd97b70b7e5e12f9145d657ea27fb..d54a686511f5bef5e1eac22a98aea8ef45573d9c 100644 (file)
--- a/gcc/cobol/parse.y
+++ b/gcc/cobol/parse.y
@@ -51,7 +51,7 @@
      accept_envar_e,
    };
  
-  struct collating_an_t {
+  struct coll_alphanat_t {
      const char *alpha, *national; 
    };
  
@@ -575,7 +575,7 @@ class locale_tgt_t {
                         RD RECORD RECORDING RECORDS RECURSIVE
                         REDEFINES REEL REFERENCE RELATIVE REM REMAINDER REMARKS
                         REMOVAL RENAMES  REPLACE REPLACING REPORT REPORTING REPORTS
-                       REPOSITORY RERUN RESERVE RESTRICTED RESUME
+                       REPOSITORY RERUN RESERVE RESTRICTED RESUME RETRY
                         REVERSE REVERSED REWIND RF RH RIGHT ROUNDED RUN
  
                         SAME SCREEN SD
@@ -702,8 +702,8 @@ class locale_tgt_t {
  %type   <number>        open_io alphabet_etc
  %type   <special_type>  device_name
  %type   <string>        numed  context_word ctx_name locale_spec
-%type   <collating_sequences> collating_sequences collating_ans 
-%type   <collating_name> collating_an 
+%type   <char_class_locales> char_class_locales coll_alphanats 
+%type   <collating_name> coll_alphanat 
  %type   <literal>       namestr alphabet_lit program_as repo_as
  %type   <field>         perform_cond kind_of_name
  %type   <refer>         alloc_ret
@@ -738,6 +738,9 @@ class locale_tgt_t {
                          relative_key_clause reserve_clause sharing_clause
  
  %type   <file>          filename read_body write_body delete_body
+%type   <label>         delete_file_body
+%type   <error>         delete_error delete_except delete_excepts
+                                                
  %type   <file>         start_impl start_cond start_body
  %type   <rewrite_t>     rewrite_body
  %type   <min_max>       record_vary rec_contains from_to record_desc
@@ -833,6 +836,7 @@ class locale_tgt_t {
                          global is_global anycase backward
                          end_display
                          exh_changed exh_named
+                        override
  %type   <number>        mistake globally first_last
  %type   <io_mode>   io_mode
  
@@ -874,6 +878,7 @@ class locale_tgt_t {
  %type  <opt_init_sect>         opt_init_sect
  %type  <number>        opt_init_value
  %type  <number>        locale_current loc_category user_default
+%type   <string>        locale_name
  %type   <token_list>    loc_categories locale_tgt
  %type  <opt_round>     rounded round_between rounded_type rounded_mode
  %type  <opt_arith>     opt_arith_type
@@ -901,7 +906,7 @@ class locale_tgt_t {
      struct { YYLTYPE loc; int token; literal_t name; } prog_end;
      struct { int token; special_name_t id; } special_type;
      struct { char locale_type; const char * name; } locale_phrase;
-             collating_an_t collating_sequences;
+             coll_alphanat_t char_class_locales;
      struct collating_name_t { int token; const char *name; } collating_name;
      struct { size_t isym; cbl_encoding_t encoding; } codeset;
      struct { cbl_field_type_t type;
@@ -2371,6 +2376,23 @@ config_paragraphs: config_paragraph
  config_paragraph:
                  SPECIAL_NAMES '.'
          |       SPECIAL_NAMES '.' special_names '.'
+                {
+                  std::reverse_iterator<symbol_elem_t *>
+                                        p(symbols_end()),
+                                     pend(symbols_begin(PROGRAM));
+                  for( ++p; p != pend; p++ ) {
+                    if( p->type == SymAlphabet ) {
+                      const auto& alphabet = *cbl_alphabet_of(&*p);
+                      if( alphabet.encoding == no_encoding_e ) {
+                        assert(alphabet.locale != 0 );
+                        const auto& missing = *cbl_locale_of(symbol_at(alphabet.locale));
+                        error_msg(alphabet.loc,
+                          "ALPHABET %qs references LOCALE %qs, which is not defined",
+                                  alphabet.name, missing.name);
+                      }
+                    }
+                  }
+                }
          |       SOURCE_COMPUTER  '.' 
          |       SOURCE_COMPUTER  '.' NAME '.'
          |       SOURCE_COMPUTER  '.' NAME with_debug '.'
@@ -2507,19 +2529,36 @@ with_debug:     with DEBUGGING MODE {
                  ;
  
  collations:     %empty
-        |       collation_classification
-        |       collation_sequence
-        |       collation_classification collation_sequence
-        |       collation_sequence collation_classification
+        |       char_classification
+        |       collating_sequence
+        |       char_classification collating_sequence
+        |       collating_sequence char_classification
                  ;
-collation_classification:
-                character CLASSIFICATION collating_sequences[seq]
+char_classification:
+                character CLASSIFICATION char_class_locales[seq]
                  {
-                  warn_msg(@seq, "CHARACTER CLASSIFICATION ignored");
+                  if( $seq.alpha ) {
+                    auto e = symbol_locale(PROGRAM, $seq.alpha);
+                    if( !e ) {
+                      error_msg(@seq, "no LOCALE defined as %qs", $seq.alpha);
+                    } else {
+                      auto& encoding = cbl_locale_of(e)->encoding;
+                      current.alpha_encoding(symbol_index(e), encoding);
+                    }
+                  }
+                  if( $seq.national ) {
+                    auto e = symbol_locale(PROGRAM, $seq.national);
+                    if( !e ) {
+                      error_msg(@seq, "no LOCALE defined as %qs", $seq.national);
+                    } else {
+                      auto& encoding = cbl_locale_of(e)->encoding;
+                      current.national_encoding(symbol_index(e), encoding);
+                    }
+                  }
                  }
                  ;
-collation_sequence:
-                program_kw collating SEQUENCE collating_sequences[seq]
+collating_sequence:
+                program_kw collating SEQUENCE char_class_locales[seq]
                  {
                    if( !current.collating_sequence($seq.alpha) ) {
                      error_msg(@seq, "collating sequence already defined as '%s'",
@@ -2529,20 +2568,20 @@ collation_sequence:
                  }
                  ;
  
-collating_sequences:
+char_class_locales:
                  is NAME[name] {
                    $$.alpha = $name;
                    $$.national = nullptr;
                  }
-        |       collating_ans { $$ = $1; }
+        |       coll_alphanats { $$ = $1; }
                  ;
-collating_ans:  collating_an[encoding] {
-                  $$ = collating_an_t();
+coll_alphanats: coll_alphanat[encoding] {
+                  $$ = coll_alphanat_t();
                    const char **pname =
                      $encoding.token == ALPHANUMERIC? &$$.alpha : &$$.national;
                    *pname = $encoding.name;
                  }
-        |       collating_ans collating_an[encoding]
+        |       coll_alphanats coll_alphanat[encoding]
                  {
                    const char **pname =
                      $encoding.token == ALPHANUMERIC? &$$.alpha : &$$.national;
@@ -2553,7 +2592,7 @@ collating_ans:  collating_an[encoding] {
                    *pname = $encoding.name;
                  }
                  ;
-collating_an:   for alphanational is locale_phrase[locale] {
+coll_alphanat:  for alphanational is locale_phrase[locale] {
                    $$.token = $alphanational;
                    $$.name = $locale.name;
                    if( ! $locale.name ) {
@@ -2568,7 +2607,6 @@ collating_an:   for alphanational is locale_phrase[locale] {
                                        keyword_str($$.token),
                                        locale_name);
                    }
-                  warn_msg(@locale, "LOCALE phrase ignored");
                  }
                  ;             
  
@@ -2643,9 +2681,20 @@ special_name:   dev_mnemonic
                  {
                    symbol_decimal_point_set(',');
                  }
-        |       LOCALE NAME is locale_spec[spec] {
-                  current.locale($NAME, $spec);
-                  cbl_unimplementedw("sorry, unimplemented: LOCALE %qs", $spec);
+        |       LOCALE NAME is locale_spec[spec]
+                {
+                  cbl_locale_t locale($NAME, $spec);
+                  if( locale.encoding == no_encoding_e ) {
+                    error_msg(@NAME, "invalid iconv LOCALE name %qs", $spec);
+                    YYERROR;
+                  }
+                  if( locale.encoding == UTF8_e ) {
+                    cbl_unimplemented("UTF-8");
+                    YYERROR; 
+                  }
+                  if( ! current.locale_add(locale) ) {
+                    error_msg(@NAME, "%qs already defined as LOCALE name", $NAME);
+                  }
                  }
                  ;
          |       upsi
@@ -2655,6 +2704,8 @@ special_name:   dev_mnemonic
                  }
                  ;
  locale_spec:    NAME    { $$ = $1; }
+        |       UTF_8   { static char s[] ="UTF-8";  $$ = s; }
+        |       UTF_16  { static char s[] ="UTF-16"; $$ = s; }
          |       LITERAL { $$ = string_of($1); }
  
                  ;
@@ -2746,14 +2797,16 @@ device_name:    SYSIN           { $$.token = SYSIN; $$.id = SYSIN_e; }
  alphabet_name:  STANDARD_ALPHABET  { $$ = alphabet_add(@1, CP1252_e); }
          |       NATIVE             { $$ = alphabet_add(@1, EBCDIC_e); }
          |       EBCDIC             { $$ = alphabet_add(@1, EBCDIC_e); }
-        |       LOCALE ctx_name
+        |       LOCALE locale_name[name]
                  {
-                  auto e = symbol_alphabet(PROGRAM, $ctx_name);
+                  auto e = symbol_locale(PROGRAM, $name);                  
                    if( !e ) {
-                    error_msg(@ctx_name, "no such ALPHABET %qs", $ctx_name);
-                    YYERROR;
-                  }
-                  $$ = cbl_alphabet_of(e);
+                    dbgmsg("no such LOCALE yet %s", $name);
+                    cbl_locale_t locale($name); // locale is named but not defined 
+                    e = symbol_locale_add(PROGRAM, &locale); 
+                  } 
+                  cbl_alphabet_t alphabet( @name, symbol_index(e), $name);
+                  $$ = alphabet_add(alphabet);
                  }
          |       alphabet_seqs
                  {
@@ -3592,7 +3645,7 @@ const_value:    cce_expr
  
  value78:        literalism
                  {
-                  cbl_field_data_t data = {};
+                  cbl_field_data_t data;
                   data.capacity = capacity_cast(strlen($1.data));
                    data.initial = $1.data;
                    $$.encoding = $1.encoding;
@@ -3600,13 +3653,15 @@ value78:        literalism
                  }
          |       const_value
                  {
-                  cbl_field_data_t data = {};
+                  cbl_field_data_t data;
                   data = build_real (float128_type_node, $1);
+                  $$.encoding = current_encoding('A');
                    $$.data = new cbl_field_data_t(data);
                  }
          |       reserved_value[value]
                  {
                   const auto field = constant_of(constant_index($value));
+                  $$.encoding = current_encoding('A');
                    $$.data = new cbl_field_data_t(field->data);
                  }
  
@@ -3638,6 +3693,7 @@ data_descr1:    level_name
                    field.type = FldLiteralN;
                   field.data = build_real (float128_type_node, $const_value);
                    field.data.initial = string_of($const_value);
+                  field.codeset.set();
  
                    if( !cdf_value(field.name, cdfval_t($const_value)) ) {
                      error_msg(@1, "%s was defined by CDF", field.name);
@@ -3674,13 +3730,12 @@ data_descr1:    level_name
                    if( !cdf_value(field.name, $lit.data) ) {
                      error_msg(@1, "%s was defined by CDF", field.name);
                    }
-                  if( ! field.codeset.valid() ) {
-                    if( ! field.codeset.set(field.codeset.standard_internal.type) ) {
-                      error_msg(@lit, "CONSTANT inconsistent with encoding %s",
-                                cbl_alphabet_t::encoding_str(field.codeset.encoding));
-                    }
+                  if( ! field.codeset.set() ) {
+                    error_msg(@lit, "CONSTANT inconsistent with encoding %s",
+                              cbl_alphabet_t::encoding_str(field.codeset.encoding));
                    }
-                  value_encoding_check(@lit, $1, $lit.encoding);
+                  
+                  value_encoding_check(@lit, $1);
                  }
          |       level_name CONSTANT is_global FROM NAME
                  {
@@ -3718,6 +3773,7 @@ data_descr1:    level_name
                    } else {
                      field.type = FldLiteralN;
                      field.data.initial = string_of(field.data.value_of());
+                    field.codeset.set($data.encoding);
                      if( !cdf_value(field.name, field.as_integer()) ) {
                        yywarn("%s was defined by CDF", field.name);
                      }
@@ -3975,6 +4031,15 @@ data_descr1:    level_name
                    // Verify VALUE
                    $field->report_invalid_initial_value(@data_clauses);
  
+                  bool numerical =
+                      $field->type == FldNumericDisplay || is_numeric($field);
+
+                  if( $field->data.initial && ! numerical ) {
+                    if( normal_value_e == cbl_figconst_of($field->data.initial) ) {
+                      value_encoding_check(@data_clauses, $field);
+                    }
+                  }
+
                    // verify REDEFINES
                    const auto parent = parent_of($field);
                    if( parent && $field->level == parent->level ) {
@@ -4287,14 +4352,16 @@ picture_clause: PIC signed nps[fore] nines nps[aft]
                    if( field->data.initial != NULL ) {
                      if( 0 < field->data.capacity &&
                              field->data.capacity < uint32_t($size) ) {
-                      auto p = blank_pad_initial( field->data.initial,
-                                                  field->data.capacity, $size );
+                      auto p = blank_pad_initial(field->data.initial,
+                                                 field->data.capacity, $size );
                        if( !p ) YYERROR;
                        field->data.initial = p;
                      }
                    }
  
-                  field->data.capacity = $size;
+                  charmap_t *charmap = 
+                                    __gg__get_charmap(field->codeset.encoding);
+                  field->data.capacity = $size * charmap->stride();
                    field->data.picture = NULL;
  
                    if( false ) dbgmsg("PIC alphanum_pic[size]:%d: %s",
@@ -4708,14 +4775,23 @@ usage_clause1:  usage BIT
  
  value_clause:   VALUE all LITERAL[lit] {
                    cbl_field_t *field = current_field();
-                  if( ! field->codeset.set($lit.encoding) ) {
-                    error_msg(@lit, "VALUE inconsistent with encoding %s",
-                              cbl_alphabet_t::encoding_str(field->codeset.encoding));
+
+                  if( $lit.prefix[0] ) { // not the default encoding
+                    if( ! field->codeset.set($lit.encoding) ) {
+                      error_msg(@lit, "VALUE inconsistent with encoding %s",
+                                cbl_alphabet_t::encoding_str(field->codeset.encoding));
+                    }
+                  } else {
+                    field->codeset.set();
                    }
+
+                  if( field->codeset.encoding != $lit.encoding ) {
+                    error_msg(@lit, "PICTURE inconsistent with VALUE %s'%s'",
+                              $lit.prefix, $lit.data);
+                  }
+
                    field->data.initial  = $lit.data;
                    field->attr |= literal_attr($lit.prefix);
-                  // The __gg__initialize_data routine needs to know that VALUE is a
-                  // quoted literal. This is critical for NumericEdited variables
                    field->attr |= quoted_e;
  
                    if( field->data.capacity == 0 ) {
@@ -4732,7 +4808,6 @@ value_clause:   VALUE all LITERAL[lit] {
                        }
                      }
                    }
-                  value_encoding_check(@lit, field, $lit.encoding);
                  }
          |       VALUE all cce_expr[value] {
                    cbl_field_t *field = current_field();
@@ -4761,11 +4836,9 @@ value_clause:   VALUE all LITERAL[lit] {
          |       VALUE all reserved_value[value]
                  {
                    cbl_field_t *field = current_field();
-                  if( ! field->codeset.valid() ) {
-                    if( ! field->codeset.set(field->codeset.standard_internal.type) ) {
-                      error_msg(@value, "VALUE inconsistent with encoding %s",
-                                cbl_alphabet_t::encoding_str(field->codeset.encoding));
-                    }
+                  if( ! field->codeset.set() ) {
+                    error_msg(@value, "VALUE inconsistent with encoding %s",
+                              cbl_alphabet_t::encoding_str(field->codeset.encoding));
                    }
                    if( $value != NULLS ) {
                      auto fig = constant_of(constant_index($value));
@@ -5017,6 +5090,7 @@ typedef_clause: is TYPEDEF strong
                      error_msg(@2, "%s %s IS TYPEDEF must be level 01",
                             field->level_str(), field->name);
                    }
+                  field->codeset.set();
                    field->attr |= typedef_e;
                    if( $strong ) field->attr |= strongdef_e;
                    if( ! current.typedef_add(field) ) {
@@ -7007,6 +7081,8 @@ context_word:   APPLY                   { static char s[] ="APPLY";
                                           $$ = s; } // LOCK MODE clause
          |       MULTIPLE               { static char s[] ="MULTIPLE";
                                           $$ = s; } // LOCK ON phrase
+        |       NAT                    { static char s[] ="NAT";
+                                         $$ = s; } // CONVERT function
          |       NEAREST_AWAY_FROM_ZERO { static char s[] ="NEAREST-AWAY-FROM-ZERO";
                                           $$ = s; } // INTERMEDIATE ROUNDING clause and ROUNDED phrase
          |       NEAREST_EVEN           { static char s[] ="NEAREST-EVEN";
@@ -8544,7 +8620,7 @@ advance_by:     scalar lines { $$ = $1; } /* BUG: should accept reference */
                     * number of lines is negative.  So, we use the
                     * negative Number Of The Beast as a PAGE flag.
                     */
-                  $$ = new_reference( new_literal("-666") );
+                  $$ = new_reference( new_literal(xstrdup("-666")) );
                  }
          |       device_name { $$ = new_reference(literally_one); }
                  ;
@@ -8601,7 +8677,33 @@ io_invalid:     INVALID key {
  
  delete:         delete_impl  end_delete
          |       delete_cond  end_delete
+        |       delete_file  end_delete
                  ;
+delete_file:    DELETE delete_file_body[stmt] delete_error[err] {
+                  if( ! $err.on_error )  parser_file_delete_on_exception($stmt);
+                  if( ! $err.not_error ) parser_file_delete_not_exception($stmt);
+                  parser_file_delete_end($stmt);
+                  current.declaratives_evaluate();
+                }
+delete_file_body:
+                FILE_KW override filenames retry_phrase {
+                  $$ = label_add(@$, LblXml, uniq_label("xfile"));
+                  xml_statements.push($$);
+                  statement_begin(@$, DELETE);
+                  std::vector<cbl_file_t*>
+                      filenames($filenames->files.begin(),
+                                $filenames->files.end() );
+                  parser_file_delete_file( $$, filenames);
+                }
+                ;
+retry_phrase:   %empty
+        |       RETRY expr TIMES
+        |       FOR expr  SECONDS
+        |       FOREVER {
+                  cbl_unimplemented("DELETE FILE RETRY");
+                }
+                ;
+
  delete_impl:    DELETE delete_body[file]
                  {
                    file_delete_args.call_parser_file_delete(true);
@@ -8634,6 +8736,63 @@ delete_body:    filename[file] record
                    $$ = $file;
                  }
                  ;
+
+delete_error:   %empty %prec DELETE {
+                  $$.on_error = $$.not_error = nullptr;
+                }
+        |       delete_excepts %prec DELETE
+                ;
+delete_excepts:     delete_except[a] statements %prec DELETE
+                {
+                  assert( $a.on_error || $a.not_error );
+                  assert( ! ($a.on_error && $a.not_error) );
+                  $$ = $a;
+                }
+        |       delete_excepts[a] delete_except[b] statements %prec DELETE
+                {
+                  if( $a.on_error && $a.not_error ) {
+                    error_msg(@1, "too many ON ERROR clauses");
+                    YYERROR;
+                  }
+                  // "ON" and "NOT ON" could be reversed, but not duplicated.
+                  if( $a.on_error && $b.on_error ) {
+                    error_msg(@1, "duplicate ON ERROR clauses");
+                    YYERROR;
+                  }
+                  if( $a.not_error && $b.not_error ) {
+                    error_msg(@1, "duplicate NOT ON ERROR clauses");
+                    YYERROR;
+                  }
+                  $$ = $a;
+                  if( $$.on_error ) {
+                    assert($b.not_error);
+                    $$.not_error = $b.not_error;
+                  } else {
+                assert($b.on_error);
+                    $$.on_error = $b.on_error;
+                  }
+                }
+                ;
+delete_except:  EXCEPTION
+                {
+                  auto xml_stmt = xml_statements.top();
+                  // The value of the pointer no longer matters, only NULL or not. 
+                  $$.on_error = $$.not_error = nullptr;
+                  switch($1) {
+                  case EXCEPTION:
+                    $$.on_error = xml_stmt;
+                    parser_file_delete_on_exception(xml_stmt);
+                    break;
+                  case NOT:
+                    $$.not_error = xml_stmt;
+                    parser_file_delete_not_exception(xml_stmt);
+                    break;
+                  default:
+                    gcc_unreachable();
+                  }
+                }
+                ;
+
  end_delete:     %empty %prec DELETE
          |       END_DELETE
                  ;
@@ -10536,7 +10695,9 @@ function_udf:   FUNCTION_UDF '(' arg_list[args] ')' {
                                     cbl_ffi_arg_t actual(param.crv, ar);
                                     return actual;
                                   } );
-                 auto name = new_literal(strlen(L->name), L->name, quoted_e);
+                  // Pretend hex-encoded because that means use verbatim.
+                  auto attr = cbl_field_attr_t(quoted_e | hex_encoded_e);
+                  auto name = new_literal(strlen(L->name), L->name, attr);
                   ast_call( @1, name, $$, args.size(), args.data(), NULL, NULL, true );
                  }
          |       FUNCTION_UDF_0 {
@@ -10547,8 +10708,11 @@ function_udf:   FUNCTION_UDF '(' arg_list[args] ')' {
                   const auto returning = cbl_field_of(symbol_at(L->returning));
                    $$ = new_temporary_clone(returning);
                   $$->data.initial = returning->name; // user's name for the field
-
-                  auto name = new_literal(strlen(L->name), L->name, quoted_e);
+                  cbl_field_attr_t call_attr
+                                 = (cbl_field_attr_t)(quoted_e|hex_encoded_e);
+                  cbl_field_t *name = new_literal(strlen(L->name),
+                                                  L->name,
+                                                  call_attr);
                    ast_call( @1, name, $$, narg, args, NULL, NULL, true );
                  }
                  ;
@@ -11135,6 +11299,18 @@ subst_input:    anycase first_last varg[v1] varg[v2] {
                  }
                  ;
  
+locale_name:    NAME
+                {
+                  auto e = symbol_locale(PROGRAM, $NAME);
+                  if( !e ) {
+                    error_msg(@NAME, "no such SPECIAL-NAMES LOCALE: %qs", $NAME);
+                    YYERROR;
+                  }
+                  $$ = const_cast<char*>(
+                           __gg__encoding_iconv_name(cbl_locale_of(e)->encoding) );
+                }
+                ;
+
  intrinsic_locale:
                  LOCALE_COMPARE '(' varg[r1] varg[r2]  ')'
                  {
@@ -11143,11 +11319,12 @@ intrinsic_locale:
                    cbl_refer_t dummy = {};
                    if( ! intrinsic_call_3($$, LOCALE_COMPARE, $r1, $r2, &dummy) ) YYERROR;
                  }
-        |       LOCALE_COMPARE '(' varg[r1] varg[r2] varg[r3] ')'
+        |       LOCALE_COMPARE '(' varg[r1] varg[r2] locale_name ')'
                  {
                    location_set(@1);
                    $$ = new_alphanumeric();
-                  if( ! intrinsic_call_3($$, LOCALE_COMPARE, $r1, $r2, $r3) ) YYERROR;
+                  cbl_refer_t locale(new_literal($locale_name));
+                  if( ! intrinsic_call_3($$, LOCALE_COMPARE, $r1, $r2, &locale) ) YYERROR;
                  }
  
          |       LOCALE_DATE '(' varg[r1]  ')'
@@ -11453,6 +11630,10 @@ optional:       %empty   { $$ = false; }
          |       OPTIONAL { $$ = true;  }
                  ;
  
+override:      %empty   { $$ = false; }
+       |       OVERRIDE { $$ = true; }
+               ;
+
  program_kw:     %empty
          |       PROGRAM_kw
                  ;
@@ -11900,6 +12081,7 @@ void ast_call( const YYLTYPE& loc, cbl_refer_t name, const cbl_refer_t& returnin
    if( is_literal(name.field) ) {
      cbl_field_t called = { FldLiteralA, quoted_e | constant_e,
                             name.field->data, 77 };
+    called.attr |= name.field->attr;
      snprintf(called.name, sizeof(called.name), "_%s", name.field->data.initial);
      name.field = cbl_field_of(symbol_field_add(PROGRAM, &called));
      symbol_field_location(field_index(name.field), loc);
@@ -13030,13 +13212,13 @@ struct expand_group : public std::list<cbl_refer_t> {
  };
  
  
-static const char * initial_default_value;
-       const char * wsclear() { return initial_default_value; }
+static const uint32_t * initial_default_value;
+       const uint32_t * wsclear() { return initial_default_value; }
  
  void
-wsclear( char ch ) {
-  static char byte = ch;
-  initial_default_value = &byte;
+wsclear( uint32_t i ) {
+  static uint32_t init_val = i;
+  initial_default_value = &init_val;
    current.program_needs_initial();
  }
  
@@ -13558,16 +13740,16 @@ literal_t::set( const cbl_field_t * field ) {
  
  literal_t&
  literal_t::set_prefix( const char *input, size_t len ) {
-  encoding = current_encoding('A');
+  encoding = current_encoding(display_encoding_e);
    assert(len < sizeof(prefix));
    std::fill(prefix, prefix + sizeof(prefix), '\0');
    std::transform(input, input + len, prefix, toupper);
    switch(prefix[0]) {
    case '\0': case 'Z':
-    encoding = current_encoding('A');
+    encoding = current_encoding(display_encoding_e);
      break;
    case 'N':
-    encoding = current_encoding('N');
+    encoding = current_encoding(national_encoding_e);
      if( 'X' == prefix[1] ) {
        cbl_unimplemented("NX literals");
      }
@@ -13583,7 +13765,7 @@ literal_t::set_prefix( const char *input, size_t len ) {
    default:
      gcc_unreachable();
    }
-  assert(encoding <= iconv_YU_e);
+  assert(valid_encoding(encoding));
    return *this;
  }
  
@@ -13608,8 +13790,8 @@ literal_attr( const char prefix[] ) {
      case 'X':
        switch(prefix[0]) {
        case 'B': return cbl_field_attr_t(hex_encoded_e | bool_encoded_e);
-      case 'N': 
-      case 'U': cbl_unimplemented("National"); return none_e;
+      case 'N': cbl_unimplemented("Hexadecimal National"); return none_e;
+      case 'U': cbl_unimplemented("Hexadecimal Unicode");  return none_e;
        }
        break;
      }
diff --git a/gcc/cobol/parse_ante.h b/gcc/cobol/parse_ante.h

index 1fbc8f51774612e5670a0fa3619d480d68537d16..99c9cefae46578c383f9306ab608a0bbef28ff70 100644 (file)
--- a/gcc/cobol/parse_ante.h
+++ b/gcc/cobol/parse_ante.h
@@ -273,38 +273,11 @@ static inline char * dequote( char input[] ) {
  static const char *
  name_of( cbl_field_t *field ) {
    assert(field);
-  // Because this can be called after .initial has been converted to the
-  // field->codeset.encoding, we have to undo that.  There may be some danger
-  // associated with returning a static.  I don't actually know. -- RJD.
-  static size_t static_length = 0;
-  static char * static_buffer = nullptr;
-  
-  if( field->data.initial == nullptr ) return field->name;
-
-  if( field->name[0] == '_' )
-    {
-    // Make a copy of .initial
-    if( static_length < field->data.capacity+1 )
-      {
-      static_length = field->data.capacity+1;
-      static_buffer = static_cast<char *>(xrealloc(static_buffer,
-                                                   static_length));
-      memcpy(static_buffer, field->data.initial, field->data.capacity);
-      static_buffer[field->data.capacity] = '\0';
-      }
-    // Convert it from ->encoding to DEFAULT_CHARMAP_SOURCE
-    size_t charsout;
-    char *converted =  __gg__iconverter(field->codeset.encoding,
-                                        DEFAULT_CHARMAP_SOURCE,
-                                        field->data.initial,
-                                        field->data.capacity,
-                                        &charsout );
-    memcpy(static_buffer, converted, charsout);
-    static_buffer[charsout] = '\0';
-    }
-
+  if( field->data.initial == nullptr ) {
+    return field->name;
+  }
    return field->name[0] == '_' && field->data.initial?
-    static_buffer : field->name;
+    field->data.original() : field->name;
  }
  
  static const char *
@@ -1337,6 +1310,7 @@ std::map<std::string, std::list<std::string>>
  
  class prog_descr_t {
    std::set<std::string> call_targets, subprograms;
+  std::set<cbl_locale_t> locales;
  public:
    std::set<function_descr_t> function_repository;
    size_t program_index;
@@ -1361,17 +1335,14 @@ public:
      } alpha, national;
      encoding_t() : national(EBCDIC_e) {}
    } alphabet;
-  struct locale_t {
-    cbl_name_t name; const char *os_name;
-    locale_t() : name(""), os_name(nullptr) {}
-    locale_t(const cbl_name_t name, const char *os_name)
-      : name(""), os_name(os_name) {
-      if( name ) {
-        bool ok = namcpy(YYLTYPE(), this->name, name);
-        gcc_assert(ok);
-      }
-    }
-  } locale;
+  
+  bool locale_add( const cbl_locale_t& locale ) {
+    auto e = symbol_locale_add(program_index, &locale);
+    assert(e);
+    auto p = locales.insert(locale);
+    return p.second;
+  }
+      
    cbl_options_t options;
  
    explicit prog_descr_t( size_t isymbol )
@@ -1904,7 +1875,14 @@ static class current_t {
      return program.alphabet.alpha.encoding;
    }
    cbl_encoding_t  national_encoding() const {
-    if( programs.empty() ) return EBCDIC_e;
+    cbl_encoding_t when_empty = EBCDIC_e;
+    char *alternate = getenv("NATIONAL");
+    if( alternate )
+      {
+      when_empty = __gg__encoding_iconv_type(alternate);
+      gcc_assert(when_empty);
+      }
+    if( programs.empty() ) return when_empty;
      const prog_descr_t& program = programs.top();
      return program.alphabet.national.encoding;
    }
@@ -1929,23 +1907,8 @@ static class current_t {
      return programs.top().options.default_round = mode;
    }
  
-  const char *
-  locale() {
-    return programs.empty()? NULL : programs.top().locale.os_name;
-  }
-  const char *
-  locale( const cbl_name_t name ) {
-    if( programs.empty() ) return NULL;
-    const prog_descr_t::locale_t& locale = programs.top().locale;
-    return 0 == strcmp(name, locale.name)? locale.name : NULL;
-  }
-  const prog_descr_t::locale_t&
-  locale( const cbl_name_t name, const char os_name[] ) {
-    if( programs.empty() ) {
-      static prog_descr_t::locale_t empty;
-      return empty;
-    }
-    return programs.top().locale = prog_descr_t::locale_t(name, os_name);
+  bool locale_add( const cbl_locale_t& locale ) {
+    return programs.top().locale_add(locale);
    }
  
    bool new_program ( const YYLTYPE& loc, cbl_label_type_t type,
@@ -2296,11 +2259,13 @@ add_debugging_declarative( const cbl_label_t * label ) {
    }
  }
  
-cbl_options_t current_options() {
+cbl_options_t
+current_options() {
    return current.options_paragraph;
  }
  
-cbl_encoding_t current_encoding( char a_or_n ) {
+cbl_encoding_t
+current_encoding( char a_or_n ) {
    cbl_encoding_t retval;
    switch(a_or_n) {
    case 'A':
@@ -2316,14 +2281,17 @@ cbl_encoding_t current_encoding( char a_or_n ) {
    return retval;
  }
  
-size_t current_program_index() {
+size_t
+current_program_index() {
    return current.program()? current.program_index() : 0;
  }
  
-cbl_label_t * current_section() {
+cbl_label_t *
+current_section() {
    return current.section();
  }
-cbl_label_t * current_paragraph() {
+cbl_label_t *
+current_paragraph() {
    return current.paragraph();
  }
  
@@ -2402,8 +2370,13 @@ literal_refmod_valid( YYLTYPE loc, const cbl_refer_t& r );
  static bool
  is_integer_literal( const cbl_field_t *field ) {
    if( field->type == FldLiteralN ) {
-    const char *initial = field->data.initial;
-
+    size_t nchar;
+    const char *initial = __gg__iconverter(field->codeset.encoding,
+                                           DEFAULT_SOURCE_ENCODING,
+                                           field->data.initial,
+                                           strlen(field->data.initial),
+                                           &nchar);
+    assert(strlen(initial) == nchar);
      switch( *initial ) {
      case '-': case '+': ++initial;
      }
@@ -2982,16 +2955,28 @@ blank_pad_initial( const char initial[], size_t capacity, size_t new_size ) {
    return p;
  }
  
+/*
+ * When cbl_field_t::internalize is called, its data.initial value has been
+ * set, but nothing has been done to it.  It is encoded according to the source
+ * code.  internalize() converts data.initial to the field's encoding. 
+ *
+ * If syntax used was was PIC VALUE, in that order, then PIC set the field's
+ * encoding, and the VALUE clause can verify that its encoding matches.  If the
+ * order was VALUE PIC, the value leaves the encoding uninitialized unless the
+ * value string bore an encoding prefix.  When PIC is processed, codeset_t::set
+ * allows it to set the encoding only if it's either uninitialized, or the PIC
+ * encoding matches the existing one set by VALUE.  In no event does one
+ * override the other; they must agree.
+ *
+ * internalize() fails if data.initial cannot be converted to the field's
+ * encoding.
+ */
  static void
-value_encoding_check( const YYLTYPE& loc, cbl_field_t *field, cbl_encoding_t encoding ) {
+value_encoding_check( const YYLTYPE& loc, cbl_field_t *field ) {
    if( ! field->internalize() ) {
      error_msg(loc, "inconsistent string literal encoding for '%s'",
                field->data.initial);
    }
-  if( encoding != field->codeset.encoding ) {
-    warn_msg(loc, "VALUE encoded as %qs for data item encoded as %qs",
-             __gg__encoding_iconv_name(encoding), field->codeset.name());
-  }
  }
  
  #pragma GCC diagnostic push
@@ -3046,12 +3031,16 @@ file_add( YYLTYPE loc, cbl_file_t *file ) {
  
  
  static cbl_alphabet_t *
-alphabet_add( const YYLTYPE& loc, cbl_encoding_t encoding ) {
-  cbl_alphabet_t alphabet(loc, encoding);
+alphabet_add( const cbl_alphabet_t& alphabet ) {
    symbol_elem_t *e = symbol_alphabet_add(PROGRAM, &alphabet);
    assert(e);
    return cbl_alphabet_of(e);
  }
+static cbl_alphabet_t *
+alphabet_add( const YYLTYPE& loc, cbl_encoding_t encoding ) {
+  cbl_alphabet_t alphabet(loc, encoding);
+  return alphabet_add(alphabet);
+}
  
  // The current field always exists in the symbol table, even if it's incomplete.
  static cbl_field_t *
@@ -3302,8 +3291,9 @@ data_division_ready() {
    static size_t nsymbol = 0;
    if( (nsymbol = symbols_update(nsymbol, nparse_error == 0)) > 0 ) {
      if( ! literally_one ) {
-      literally_one = new_literal("1");
-      literally_zero = new_literal("0");
+      // Use strdup so cbl_field_t::internalize can free them if need be.
+      literally_one = new_literal(xstrdup("1"));
+      literally_zero = new_literal(xstrdup("0"));
      }
    }
  
diff --git a/gcc/cobol/scan.l b/gcc/cobol/scan.l

index 07aa76dbbf268e10c8a182b3ad92a57bb259a2ae..643d099f3899332936becfca7ffb5d8032bfec64 100644 (file)
--- a/gcc/cobol/scan.l
+++ b/gcc/cobol/scan.l
@@ -1801,8 +1801,8 @@ B-SHIFT-RC
                                   if( elem->type == SymField ) {
                                     auto f = cbl_field_of(elem);
                                     if( f->type == FldLiteralA && f->has_attr(constant_e) ) {
-                                     type = date_time_fmt(f->data.initial);
-                                     yylval.string = xstrdup(f->data.initial);
+                                     type = date_time_fmt(f->data.original());
+                                     yylval.string = xstrdup(f->data.original());
                                     }
                                   } else {
                                     yylval.string = xstrdup(yytext);
diff --git a/gcc/cobol/show_parse.h b/gcc/cobol/show_parse.h

index e1a8cb21b6f7647f911178cc933c546cea0665a0..7945e904573dd26c28490e49281c3185d04ca8fb 100644 (file)
--- a/gcc/cobol/show_parse.h
+++ b/gcc/cobol/show_parse.h
@@ -140,7 +140,13 @@ extern bool cursor_at_sol;
                  fprintf(stderr, "%s", (b).field->name); \
                  if( (b).field->type == FldLiteralA || (b).field->type == FldLiteralN ) \
                      { \
-                    fprintf(stderr, " \"%s\"", (b).field->data.initial); \
+                    size_t nbytes; \
+                    const char *literal = __gg__iconverter((b).field->codeset.encoding, \
+                                                           DEFAULT_SOURCE_ENCODING, \
+                                                           (b).field->data.initial, \
+                                                           strlen((b).field->data.initial), \
+                                                           &nbytes); \
+                    fprintf(stderr, " \"%s\"", literal); \
                      } \
                  else \
                      { \
diff --git a/gcc/cobol/symbols.cc b/gcc/cobol/symbols.cc

index 2a299ceee3c3e8975924937560131afb03d05927..07dc0e65f14ba1370761d2b99f2fbc8ead8e54fe 100644 (file)
--- a/gcc/cobol/symbols.cc
+++ b/gcc/cobol/symbols.cc
@@ -293,7 +293,7 @@ elementize( const cbl_field_t& field ) {
    // Dubner did the following because he didn't feel like creating yet another
    // cbl_field_t constructor that included the hardcoded encoding for the
    // global special registers.
-  sym.elem.field.codeset.encoding = iconv_CP1252_e;
+  sym.elem.field.codeset.set();
    return sym;
  }
  
@@ -511,6 +511,9 @@ symbol_elem_cmp( const void *K, const void *E )
    case SymSpecial:
      return special_pair_cmp(k->elem.special, e->elem.special)? 0 : 1;
      break;
+  case SymLocale:
+    return strcasecmp(k->elem.locale.name, e->elem.locale.name);
+    break;
    case SymAlphabet:
      return strcasecmp(k->elem.alphabet.name, e->elem.alphabet.name);
      break;
@@ -676,6 +679,22 @@ symbol_special( size_t program, const char name[] )
    return symbol_at(p->second);
  }
  
+struct symbol_elem_t *
+symbol_locale( size_t program, const char name[] )
+{
+  cbl_locale_t locale(name);
+  assert(strlen(name) < sizeof locale.name);
+  strcpy(locale.name, name);
+
+  struct symbol_elem_t key(SymLocale, program), *e;
+  key.elem.locale = locale;
+
+  e = static_cast<struct symbol_elem_t *>(lfind( &key, symbols.elems,
+                                                 &symbols.nelem, sizeof(key),
+                                                 symbol_elem_cmp ) );
+  return e;
+}
+
  struct symbol_elem_t *
  symbol_alphabet( size_t program, const char name[] )
  {
@@ -1510,11 +1529,11 @@ field_str( const cbl_field_t *field ) {
          {
          // Apparently we need to trace back the meaning of data.literal for
          // field::type == FldNumericDisplay
-        enc_from = DEFAULT_CHARMAP_SOURCE;
+        enc_from = DEFAULT_SOURCE_ENCODING;
          }
  
        init = __gg__iconverter(enc_from,
-                              DEFAULT_CHARMAP_SOURCE,
+                              DEFAULT_SOURCE_ENCODING,
                                false_data,
                                field->data.capacity,
                                &charsout);
@@ -1522,12 +1541,12 @@ field_str( const cbl_field_t *field ) {
        auto eoinit = init + strlen(init);
        char *s = xasprintf("'%s'", init);
  
-      // No NUL within the initial data. 
+      // No NUL within the initial data.
        auto ok = std::none_of( init, eoinit,
                                []( char ch ) { return ch == '\0'; } );
        assert(ok);
  
-      // If any of the init are unprintable, provide a hex version. 
+      // If any of the init are unprintable, provide a hex version.
        if( ! std::all_of(init, eoinit, fisprint) ) {
          if( is_elementary(field->type) && field->type != FldPointer ) {
            const size_t len = strlen(s) + 8 + 2 * field->data.capacity;
@@ -1663,7 +1682,7 @@ symbols_alphabet_set( size_t program, const char name[]) {
  ////  // Define alphabets for codegen.
  ////  const cbl_alphabet_t *alphabet = nullptr;
  ////  bool supported = true;
-////  
+////
  ////  std::for_each( symbols_begin(program), symbols_end(),
  ////                 [&alphabet, &supported]( const auto& sym ) {
  ////                   if( sym.type == SymAlphabet ) {
@@ -1679,7 +1698,7 @@ symbols_alphabet_set( size_t program, const char name[]) {
  ////    cbl_unimplemented("alphabet %qs (as %qs)", alphabet->name, encoding);
  ////    return false;
  ////  }
-////  
+////
  ////  // Set collation sequence before parser_symbol_add.`
  ////  if( name ) {
  ////    symbol_elem_t *e = symbol_alphabet(program, name);
@@ -1906,38 +1925,46 @@ symbols_update( size_t first, bool parsed_ok ) {
        }
      }
  
-    if( ! field->codeset.valid() ) {
-      switch(field->type) {
-      case FldForward:
-      case FldInvalid:
-        gcc_unreachable();
-      case FldAlphaEdited:
-      case FldAlphanumeric:
-      case FldClass:
-      case FldDisplay:
-      case FldGroup:
-      case FldLiteralA:
-      case FldNumericDisplay:
-      case FldNumericEdited:
+    if( ! field->codeset.consistent() ) {
+      if( ! field->codeset.valid() ) {
+        switch(field->type) {
+        case FldForward:
+        case FldInvalid:
+          gcc_unreachable();
+        case FldAlphaEdited:
+        case FldAlphanumeric:
+        case FldClass:
+        case FldDisplay:
+        case FldGroup:
+        case FldLiteralA:
+        case FldLiteralN:
+        case FldNumericDisplay:
+        case FldNumericEdited:
+          if( ! (field->has_attr(register_e) || field->has_attr(hex_encoded_e)) ) {
+            error_msg(symbol_field_location(field_index(field)),
+                      "internal: %qs encoding not defined", field->name);
+          }
+          break;
+        case FldConditional:
+        case FldFloat:
+        case FldIndex:
+        case FldNumericBin5:
+        case FldNumericBinary:
+        case FldPacked:
+        case FldPointer:
+        case FldSwitch:
+          break;
+        }
+      } else {
          if( ! (field->has_attr(register_e) || field->has_attr(hex_encoded_e)) ) {
            error_msg(symbol_field_location(field_index(field)),
-                   "internal: %qs encoding not defined", field->name);
+                    "internal: %qs encoding %qs inconsistent",
+                    field->name,
+                    cbl_alphabet_t::encoding_str(field->codeset.encoding) );
          }
-        break;
-      case FldConditional:
-      case FldFloat:
-      case FldIndex:
-      case FldLiteralN:
-      case FldNumericBin5:
-      case FldNumericBinary:
-      case FldPacked:
-      case FldPointer:
-      case FldSwitch:
-        break;
        }
      }
  
-
      assert( ! field->is_typedef() );
  
      if( parsed_ok ) parser_symbol_add(field);
@@ -2541,6 +2568,13 @@ symbol_file_add( size_t program, cbl_file_t *file ) {
    return e;
  }
  
+symbol_elem_t *
+symbol_locale_add( size_t program, const cbl_locale_t *locale ) {
+  symbol_elem_t sym{ SymLocale, program };
+  sym.elem.locale = *locale;
+  return symbol_add(&sym);
+}
+
  symbol_elem_t *
  symbol_alphabet_add( size_t program, const cbl_alphabet_t *alphabet ) {
    symbol_elem_t sym{ SymAlphabet, program };
@@ -3202,19 +3236,56 @@ constant_of( size_t isym )
    return field;
  }
  
+cbl_locale_t::cbl_locale_t( const cbl_name_t name, const char iconv_name[] ) {
+  gcc_assert(strlen(name) < sizeof this->name);
+  strcpy(this->name, name);
+
+  if( iconv_name ) {
+    encoding = __gg__encoding_iconv_type(iconv_name);
+
+    strcpy(collation, "C");
+    // If the iconv_name is prefixed by langauge_COUNTRY (e.g. en_US), capture that.
+    auto pend = iconv_name + strlen(iconv_name);
+    auto p = std::find(iconv_name, pend, '.');
+    if( p < pend ) {
+      auto pend2 = std::copy(iconv_name, p, collation);
+      std::fill(pend2, collation + sizeof(collation), '\0');
+      iconv_name = ++p;
+    }
+    encoding = __gg__encoding_iconv_type(iconv_name);
+  }
+}
+
+cbl_alphabet_t::cbl_alphabet_t(const YYLTYPE& loc, size_t locale, cbl_name_t name )
+  : loc(loc)
+  , locale(locale)
+  , low_index(0)
+  , high_index(255)
+  , last_index(0)
+{
+  if( locale >  0 ) {
+    encoding = cbl_locale_of(symbol_at(locale))->encoding;
+  }
+  memset(collation_sequence, 0xFF, sizeof(collation_sequence));
+  if( name ) { // from Special-Names collation_sequence
+    assert(strlen(name) < sizeof(cbl_name_t));
+    strcpy(this->name, name);
+  }
+}
+
  /*
   * As parsed, the alphabet reflects the encoding of the source code.  If the
   * program uses a different encoding for alphanumeric, convert the alphabet to
- * that. 
- * 
+ * that.
+ *
   * Because a custom alphabet is rare and occurs at most only once per program,
   * we don't attempt to avoid re-encoding.  "Conversion" of ASCII to ASCII is at
- * most 256 calls to iconv(3).  
+ * most 256 calls to iconv(3).
   */
  void
  cbl_alphabet_t::reencode()  {
  
-  const unsigned char * const pend = alphabet + sizeof(alphabet);
+  const unsigned char * const pend = collation_sequence + sizeof(collation_sequence);
    std::vector<char> tgt(256, (char)0xFF);
  
    /*  Keep copies of low_index and last_index for use in run-time as LOW-VALUE
@@ -3230,13 +3301,14 @@ cbl_alphabet_t::reencode()  {
     * a custom alphabet are from NIST, which of course are ASCII.
     */
    const char *fromcode = __gg__encoding_iconv_name(CP1252_e);
-  const char *tocode = __gg__encoding_iconv_name(current_encoding('A'));
+  const char *tocode =
+              __gg__encoding_iconv_name(current_encoding(display_encoding_e));
    iconv_t cd = iconv_open(tocode, fromcode);
-  
+
  #if optimal_reencode
    if( fromcode == tocode ) { // semantically
      tgt.resize(0);
-    return tgt; // Return empty vector; caller copies zero bytes.  
+    return tgt; // Return empty vector; caller copies zero bytes.
    }
  #endif
  
@@ -3247,14 +3319,14 @@ cbl_alphabet_t::reencode()  {
     * that letter in the alphanumeric encoding, and set its collation position
     * in that alphabet.
     */
-  for( const unsigned char *p = alphabet; p < pend; p++ ) {
+  for( const unsigned char *p = collation_sequence; p < pend; p++ ) {
      if( *p == 0xFF ) continue;
-    unsigned char ch = p - alphabet;
+    unsigned char ch = p - collation_sequence;
      unsigned char pos[8] = {};
      size_t  inbytesleft = 1, outbytesleft = sizeof(pos);
      char *inbuf = reinterpret_cast<char*>(&ch),
          *outbuf = reinterpret_cast<char*>(pos);
-    
+
      size_t n = iconv(cd, &inbuf, &inbytesleft, &outbuf, &outbytesleft);
  
      if( n == size_t(-1) ) {
@@ -3273,7 +3345,7 @@ cbl_alphabet_t::reencode()  {
                  fromcode, ch, ch, n, tocode);
        continue;
      }
-    
+
      if( ch == low_index ) {
        low_index = pos[0];
      }
@@ -3283,21 +3355,21 @@ cbl_alphabet_t::reencode()  {
      if( ch == high_index ) {
       high_index = pos[0];
      }
-    
+
      tgt.at(pos[0]) = *p;
    }
-  
-  std::copy(tgt.begin(), tgt.end(), alphabet); 
+
+  std::copy(tgt.begin(), tgt.end(), collation_sequence);
  }
  
  bool
  cbl_alphabet_t::assign( const YYLTYPE& loc, unsigned char ch, unsigned char high_value ) {
-  if( alphabet[ch] == 0xFF || alphabet[ch] == high_value) {
-    alphabet[ch] = high_value;
+  if( collation_sequence[ch] == 0xFF || collation_sequence[ch] == high_value) {
+    collation_sequence[ch] = high_value;
      last_index = ch;
      return true;
    }
-  auto taken = alphabet[ch];
+  auto taken = collation_sequence[ch];
    error_msg(loc, "ALPHABET %s, character %<%c%> (X%'%x%') "
             "in position %d already defined at position %d",
             name,
@@ -3310,7 +3382,7 @@ cbl_alphabet_t::assign( const YYLTYPE& loc, unsigned char ch, unsigned char high
  void
  cbl_alphabet_t::also( const YYLTYPE& loc, size_t ch ) {
    if( ch < 256 ) {
-    alphabet[ch] = alphabet[last_index];
+    collation_sequence[ch] = collation_sequence[last_index];
      if( ch == high_index ) high_index--;
      return;
    } // else it's a figurative constant ...
@@ -3323,20 +3395,20 @@ cbl_alphabet_t::also( const YYLTYPE& loc, size_t ch ) {
  
    // last_index is already set; use it as the "last value before ALSO"
    if( attr & low_value_e ) {
-    alphabet[0] = alphabet[last_index];
+    collation_sequence[0] = collation_sequence[last_index];
      return;
    }
    if( attr & high_value_e ) {
-    alphabet[high_index--] = alphabet[last_index];
+    collation_sequence[high_index--] = collation_sequence[last_index];
      return;
    }
    if( attr & (space_value_e|quote_value_e) ) {
      ch = field->data.initial[0];
-    alphabet[ch] = alphabet[last_index];
+    collation_sequence[ch] = collation_sequence[last_index];
      return;
    }
    if( attr & (zero_value_e) ) {
-    alphabet[0] = alphabet[last_index];
+    collation_sequence[0] = collation_sequence[last_index];
      error_msg(loc, "ALSO value '%s' is unknown", field->name);
      return;
    }
@@ -3448,18 +3520,33 @@ new_literal_add( const char initial[], uint32_t len,
      }
    else
      {
-    static char empty[2] = "\0";
      field = new_temporary_impl(FldLiteralA);
      field->attr |= attr;
-    field->data.initial = len > 0? initial : empty;
+
+    if(len == 0)
+      {
+      // This will cover UTF-32, should that arise.
+      size_t nbytes = 4;
+      char *init = static_cast<char *>(xmalloc(nbytes));
+      memset(init, 0, nbytes);
+      field->data.initial = init;
+      }
+    if(len)
+      {
+      char *init = static_cast<char *>(xmalloc(len+4));
+      memcpy(init, initial, len);
+      memset(init+len, 0, 4);
+      field->data.initial = init;
+      }
      field->data.capacity = len;
      }
  
    if( ! field->has_attr(hex_encoded_e) ) {
-    field->codeset.set(encoding);
-    if( ! field->internalize() ) {
-      ERROR_FIELD(field, "inconsistent string literal encoding for '%s'", initial);
+    // If the literal bore a prefix, set the encoding,
+    if( encoding != cbl_field_t::codeset_t::source_encoding->type ) {
+      field->codeset.set(encoding);
      }
+    field->internalize();
    }
  
    static size_t literal_count = 1;
@@ -3595,6 +3682,14 @@ new_alphanumeric( size_t capacity, const cbl_name_t name = nullptr ) {
  
  extern os_locale_t os_locale;
  
+const encodings_t cbl_field_t::codeset_t::source_encodings[2] = {
+  { false, iconv_UTF_8_e, "UTF-8" },
+  { true,  iconv_CP1252_e, "CP1252" },
+};
+const encodings_t * cbl_field_t::codeset_t::source_encoding = {
+  cbl_field_t::codeset_t::source_encodings
+};
+
  const encodings_t cbl_field_t::codeset_t::standard_internal = {
    true, iconv_CP1252_e, "CP1252"
  };
@@ -3603,7 +3698,7 @@ const encodings_t cbl_field_t::codeset_t::standard_internal = {
  cbl_field_t *
  new_temporary( enum cbl_field_type_t type, const char *initial, bool is_signed ) {
    const bool force_unsigned = type == FldNumericBin5 && ! is_signed;
-  
+
    if( ! initial && ! force_unsigned ) {
      assert( ! is_literal(type) ); // Literal type must have literal value.
      return temporaries.acquire(type, initial);
@@ -3719,29 +3814,26 @@ cbl_field_t::is_ascii() const {
   * never reverts.
   */
  
-static const char *
-guess_encoding() {
-  static const char *fromcode;
-
-  if( ! fromcode ) {
-    return fromcode = os_locale.assumed;
-  }
-
-  if( fromcode == os_locale.assumed ) {
-    fromcode = os_locale.codeset;
-    if( 0 != strcmp(fromcode, "C") ) { // anything but that
-      return fromcode;
-    }
-  }
-
-  return standard_internal.name;
-}
-
  const char *
  cbl_field_t::internalize() {
-  static const char *fromcode = guess_encoding();
+  /*  The purpose of this routine is to return a nul-terminated string which
+      is data.initial converted from the source-code characters to the
+      codeset.encoding characters.
+      
+      The contract between this routine and the routines that call it is that
+      for alphanumeric types, data.initial shall have the same number of
+      characters as will be needed to fill data.capacity.
+
+      Be aware that for PIC X(32) Z"foo", there are the characters "foo",
+      followed by a NUL, and then 28 spaces to fill it out.  It turns out that
+      iconv, given a character count of 32, converts all 32, including the
+      embedded NUL.  So, that case works even through strlen(initial) is
+      smaller than the length of initial, which is the same as capacity.
+  */
+
+  static const char *fromcode = codeset.source_encodings[0].name;
    static const size_t noconv = size_t(-1);
-  static std::map<std::string, iconv_t> tocodes;
+  static std::unordered_map<std::string, iconv_t> tocodes;
  
    if( ! codeset.valid() ) {
      dbgmsg("%s:%d: not converting %s", __func__, __LINE__, data.initial);
@@ -3769,20 +3861,33 @@ cbl_field_t::internalize() {
      assert(0 == strlen(data.initial));
      return data.initial;
    }
-  if( holds_ascii() && is_ascii() ) return data.initial;
+  if( holds_ascii() && is_ascii() ) {
+    if( type != FldNumericEdited ) {
+      if( ! data.initial_within_capacity() ) {
+        ERROR_FIELD(this, "%s %s VALUE %qs of %zu exceeds size %u",
+                    cbl_field_t::level_str(level), name, data.initial,
+                    strlen(data.initial), data.capacity );
+      }
+    }
+    return data.initial;
+  }
    assert(data.capacity > 0);
  
    // The final 2 bytes of the output are "!\0".  It's a debugging sentinel.
    size_t n;
    size_t inbytesleft = data.capacity;
    size_t outbytesleft = inbytesleft;
-  char *in  = const_cast<char*>(data.initial);
-  char *out = static_cast<char*>( xcalloc(1, outbytesleft + 2) ), *output = out;
    if( !is_literal(this) && inbytesleft < strlen(data.initial) ) {
      inbytesleft = strlen(data.initial);
    }
+  if( type == FldNumericEdited ) {
+    outbytesleft = inbytesleft;
+  }
    const unsigned int in_len = inbytesleft;
  
+  char *in  = const_cast<char*>(data.initial);
+  char *out = static_cast<char*>( xcalloc(1, outbytesleft + 2) ), *output = out;
+
    assert(fromcode != tocode);
  
    /*
@@ -3799,8 +3904,9 @@ cbl_field_t::internalize() {
  
    do {
      if( (n = iconv( cd, &in, &inbytesleft, &out, &outbytesleft)) == noconv ) {
-      if( fromcode == os_locale.assumed ) {
-        fromcode = standard_internal.name;
+      if( fromcode == codeset.source_encodings[0].name ) {
+        codeset.source_encoding = &codeset.source_encodings[1];
+        fromcode = codeset.source_encoding->name;
          tocodes.clear();
          cd = tocodes[toname] = iconv_open(tocode, fromcode);
          dbgmsg("%s: trying input encoding %s", __func__, fromcode);
@@ -3813,7 +3919,7 @@ cbl_field_t::internalize() {
  
    if( n == noconv ) {
      size_t i = in_len - inbytesleft;
-    yywarn("failed to encode %s %qs as %s (%zu of %u bytes left)",
+    yyerror("failed to encode %s %qs as %s (%zu of %u bytes left)",
             fromcode, data.initial + i, tocode, inbytesleft, in_len);
      if( false ) return NULL;
      return data.initial;
@@ -3821,7 +3927,7 @@ cbl_field_t::internalize() {
  
    if( 0 < inbytesleft ) {
      // data.capacity + inbytesleft is not correct if the remaining portion has
-    // multibyte characters.  But the fact reamins that the VALUE is too big.
+    // multibyte characters.  But the fact remains that the VALUE is too big.
      ERROR_FIELD(this, "%s %s VALUE '%s' requires %zu bytes for size %u",
              cbl_field_t::level_str(level), name, data.initial,
              data.capacity + inbytesleft, data.capacity );
@@ -3829,7 +3935,7 @@ cbl_field_t::internalize() {
  
    // Replace data.initial only if iconv output differs.
    if( 0 != memcmp(data.initial, output, out - output) ) {
-    assert(out <= output + data.capacity);
+    assert(out <= output + data.capacity || type == FldNumericEdited);
      dbgmsg("%s: converted '%.*s' to %s",
                          __func__, data.capacity, data.initial, tocode);
      struct localspace_t {
@@ -3858,14 +3964,16 @@ cbl_field_t::internalize() {
        data.capacity = out - output; // trailing '!' will be overwritten
      }
      // Pad with trailing blanks, tacking a '!' on the end.
-    for( const char *eout = output + data.capacity; 
+    for( const char *eout = output + data.capacity;
           out < eout;
           out += spc.len ) {
        memcpy(out, spc.space, spc.len);
      }
-    out[0] = '!';
+    // Numeric literal strings may have leading zeros, making their length
+    // longer than their capacity.
+    out[0] = type == FldLiteralN? '\0' : '!';
      assert(out[1] == '\0');
-    free(const_cast<char*>(data.initial));
+    data.orig = data.initial;
      data.initial = output;
    } else {
      free(output);
diff --git a/gcc/cobol/symbols.h b/gcc/cobol/symbols.h

index 66fb2fd912ff175a9fe1f3728843a1801b09359d..6d29d060a0517544aebf3a26f6ee7f6c11886b83 100644 (file)
--- a/gcc/cobol/symbols.h
+++ b/gcc/cobol/symbols.h
@@ -224,6 +224,7 @@ enum symbol_type_t {
    SymAlphabet,
    SymFile,
    SymDataSection,
+  SymLocale, 
  };
  
  // The ISO specification says alphanumeric literals have a maximum length of
@@ -237,7 +238,7 @@ struct cbl_field_data_t {
    uint32_t capacity,            // allocated space
             digits;              // magnitude: total digits (or characters)
    int32_t  rdigits;             // digits to the right
-  const char *initial, *picture;
+  const char *orig, *initial, *picture;
  
    enum etc_type_t { val88_e, upsi_e, value_e } etc_type;
    const char *
@@ -268,6 +269,7 @@ struct cbl_field_data_t {
      , capacity(0)
      , digits(0)
      , rdigits(0)
+    , orig(0)
      , initial(0)
      , picture(0)
      , etc_type(value_e)
@@ -279,6 +281,7 @@ struct cbl_field_data_t {
      , capacity(capacity)
      , digits(0)
      , rdigits(0)
+    , orig(0)
      , initial(0)
      , picture(0)
      , etc_type(value_e)
@@ -293,6 +296,7 @@ struct cbl_field_data_t {
      , capacity(capacity)
      , digits(digits)
      , rdigits(rdigits)
+    , orig(0)
      , initial(initial)
      , picture(picture)
      , etc_type(value_e)
@@ -387,6 +391,12 @@ struct cbl_field_data_t {
      return valify();
    }
  
+  bool initial_within_capacity() const {
+    return initial[capacity] == '\0'
+      ||   initial[capacity] == '!';
+  }
+  const char *original() const { return orig? orig : initial; }
+
   protected:
    cbl_field_data_t& copy_self( const cbl_field_data_t& that ) {
      memsize = that.memsize;
@@ -531,7 +541,7 @@ struct cbl_field_t {
    uint32_t level;
    cbl_occurs_t occurs;
    struct codeset_t {
-    static const encodings_t standard_internal;
+    static const encodings_t standard_internal, source_encodings[2], *source_encoding;
      cbl_encoding_t encoding;
      size_t alphabet;  // unlikely
      explicit codeset_t(cbl_encoding_t encoding = custom_encoding_e,
@@ -544,22 +554,26 @@ struct cbl_field_t {
          ||
          (alphabet != 0 && encoding == custom_encoding_e);
      }
+    bool consistent() const {
+      return valid() && ( encoding == current_encoding('A')
+                          ||
+                          encoding == current_encoding('N')
+                          ||
+                          encoding == UTF8_e );
+    }
      bool set( cbl_encoding_t encoding, size_t alphabet = 0 ) {
-      assert(encoding <= iconv_YU_e);
+      assert(valid_encoding(encoding));
        if( ! valid() ) { // setting first time
          this->encoding = encoding;
          this->alphabet = alphabet;
          return valid();
        }
-      // DUBNER override.  Encoding has to change when
-      //  01 FOO VALUE ZERO.  Just 0 is okay; ZERO is not.
-          this->encoding = encoding;
        return this->encoding == encoding && this->alphabet == alphabet;
      }
      bool set( const char picture_fragment[] = nullptr) {
        if( ! picture_fragment ) {
-        cbl_encoding_t currenc = current_encoding('A');
-        bool retval = set(currenc);
+        cbl_encoding_t enc = current_encoding('A');
+        bool retval = set(enc);
          return retval;
        }
        size_t len = strlen(picture_fragment);
@@ -568,14 +582,15 @@ struct cbl_field_t {
                       frag.begin(), ftoupper);
        switch(frag[0]) { 
        case 'A': case 'X': case '9':
-        return set(current_encoding('A'));
+        return set(current_encoding(display_encoding_e));
        case 'N': case 'U': 
          if( std::all_of(frag.begin(), frag.end(),
                          [first = frag[0]]( char ch ) {
                            return first == ch;
                          } ) ) {
            // All N's indicates National; all U's indicates UTF-8.
-          auto enc = frag[0] == 'N'? current_encoding('N') : UTF8_e;
+          auto enc = frag[0] == 'N' ? current_encoding(national_encoding_e)
+                                    : UTF8_e;
            return set(enc);
          }
          return false; // They all must be the same. 
@@ -739,7 +754,7 @@ struct cbl_field_t {
    uint32_t size() const; // table capacity or capacity
  
    const char * pretty_name() const {
-    if( name[0] == '_' && data.initial ) return data.initial;
+    if( name[0] == '_' && data.original() ) return data.original();
      return name;
    }
    static const char * level_str(uint32_t level );
@@ -1185,6 +1200,13 @@ struct cbl_arith_error_t {
      cbl_label_addresses_t bottom;
  };
  
+struct cbl_delete_file_t {
+    cbl_label_addresses_t over;
+    cbl_label_addresses_t exception;
+    cbl_label_addresses_t no_exception;
+    cbl_label_addresses_t bottom;
+};
+
  struct cbl_compute_error_t {
      // This is an int.  The value is a cbl_compute_error_code_t
      tree compute_error_code;
@@ -1232,7 +1254,10 @@ struct cbl_label_t {
      
      // for parse_xml processing:
      struct cbl_xml_parse_t *xml_parse;
-    
+
+    // For parser_file_delete_file
+    struct cbl_delete_file_t *delete_file;
+
      } structs;
  
    bool is_function() const { return type == LblFunction; }
@@ -1525,6 +1550,19 @@ struct cbl_section_t {
    }
  };
  
+struct cbl_locale_t {
+  cbl_name_t name;
+  cbl_encoding_t encoding;
+  cbl_name_t collation;
+
+  explicit cbl_locale_t(const cbl_name_t name,
+                        const char iconv_name[] = nullptr );
+
+  bool operator<( const cbl_locale_t& that ) const {
+    return strcmp(name, that.name) < 0;
+  }
+};
+
  struct cbl_special_name_t {
    int token;
    enum special_name_t id;
@@ -1536,22 +1574,35 @@ struct cbl_special_name_t {
  char * hex_decode( const char text[] );
  
  /*
- * For a custom alphabet of single-byte encoding, cbl_alphabet_t::alphabet
+ * An alphabet may just name an encoding, which implies binary collation.
+ *
+ * An alphabet may reference a Special-Names LOCALE, which defines an encoding
+ * and a collation (perhaps by default).
+ * 
+ * During Special-Names parsing, an Alphabet may reference an as-yet undefined
+ * LOCALE with an as-yet unknown encoding. As a placeholder it inserts a named,
+ * undefined cbl_locale_t symbol, which the Alphabet references.  If that
+ * locale is never defined, the encoding remains unknown, resulting in an error
+ * diagnostic at the end of Special-Names.
+ *
+ * For a custom alphabet of single-byte encoding, cbl_alphabet_t::collation_sequence
   * holds the collation position of each encoded value.  
- * If 'A' sorts first (after LOW-VALUE), then alphabet['A'] == 1. 
- * If the encoding is ASCII,         then 'A' is  65 and alphabet[ 65] == 1.
- * If the encoding is EBCDIC CP1140, then 'A' is 193 and alphabet[193] == 1.
+ * If 'A' sorts first (after LOW-VALUE), then collation_sequence['A'] == 1. 
+ * If the encoding is ASCII,         then 'A' is  65 and collation_sequence[ 65] == 1.
+ * If the encoding is EBCDIC CP1140, then 'A' is 193 and collation_sequence[193] == 1.
   */
  struct cbl_alphabet_t {
    YYLTYPE loc;
    cbl_name_t name;
    cbl_encoding_t encoding;
-  unsigned char low_index, high_index, last_index, alphabet[256];
+  size_t locale;  // index to cbl_locale_t symbol
+  unsigned char low_index, high_index, last_index, collation_sequence[256];
    unsigned char low_char, high_char;
  
    cbl_alphabet_t()
      : loc { 1,1, 1,1 }
      , encoding(ASCII_e)
+    , locale(0)
      , low_index(0)
      , high_index(255)
      , last_index(0)
@@ -1559,12 +1610,13 @@ struct cbl_alphabet_t {
      , high_char(0)
    {
      memset(name, '\0', sizeof(name));
-    memset(alphabet, 0xFF, sizeof(alphabet));
+    memset(collation_sequence, 0xFF, sizeof(collation_sequence));
    }
  
    cbl_alphabet_t(const YYLTYPE& loc, cbl_encoding_t enc)
      : loc(loc)
      , encoding(enc)
+    , locale(0)
      , low_index(0)
      , high_index(255)
      , last_index(0)
@@ -1572,14 +1624,17 @@ struct cbl_alphabet_t {
      , high_char(0)
    {
      memset(name, '\0', sizeof(name));
-    memset(alphabet, 0xFF, sizeof(alphabet));
+    memset(collation_sequence, 0xFF, sizeof(collation_sequence));
    }
  
+  cbl_alphabet_t(const YYLTYPE& loc, size_t locale, cbl_name_t name );
+
    cbl_alphabet_t( const YYLTYPE& loc, const cbl_name_t name,
                    unsigned char low_index, unsigned char high_index,
-                  unsigned char alphabet[] )
+                  unsigned char collation_sequence[] )
      : loc(loc)
      , encoding(custom_encoding_e)
+    , locale(0)
      , low_index(low_index), high_index(high_index)
      , last_index(high_index)
      , low_char(low_index)
@@ -1587,21 +1642,23 @@ struct cbl_alphabet_t {
    {
      assert(strlen(name) < sizeof(this->name));
      strcpy(this->name, name);
-    std::copy(alphabet, alphabet + sizeof(this->alphabet), this->alphabet);
+    std::copy(collation_sequence,
+              collation_sequence + sizeof(this->collation_sequence),
+              this->collation_sequence);
    }
  
    unsigned char low_value() const {
-    return alphabet[low_index];
+    return collation_sequence[low_index];
    }
    unsigned char high_value() const {
-    return alphabet[high_index];
+    return collation_sequence[high_index];
    }
  
    void
    add_sequence( const YYLTYPE& loc, const unsigned char seq[] ) {
      if( low_index == 0 ) low_index = seq[0];
  
-    unsigned char last = last_index > 0? alphabet[last_index] + 1 : 0;
+    unsigned char last = last_index > 0? collation_sequence[last_index] + 1 : 0;
  
      for( const unsigned char *p = seq; !end_of_string(p); p++  ) {
        assign(loc, *p, last++);
@@ -1612,7 +1669,7 @@ struct cbl_alphabet_t {
    add_interval( const YYLTYPE& loc, unsigned char low, unsigned char high ) {
      if( low_index == 0 ) low_index = low;
  
-    unsigned char last = alphabet[last_index];
+    unsigned char last = collation_sequence[last_index];
  
      for( unsigned char ch = low; ch < high; ch++  ) {
        assign(loc, ch, last++);
@@ -1649,8 +1706,11 @@ struct cbl_alphabet_t {
                 "  0   1   2   3   4   5   6   7"
                "   8   9   A   B   C   C   E   F");
        unsigned int row = 0;
-      for( auto p = alphabet; p < alphabet + sizeof(alphabet); p++ ) {
-        if( (p - alphabet) % 16 == 0 ) fprintf(stderr, "\n%4X\t", row++);
+      for( auto p = collation_sequence;
+           p < collation_sequence + sizeof(collation_sequence); p++ ) {
+        if( (p - collation_sequence) % 16 == 0 ) {
+          fprintf(stderr, "\n%4X\t", row++);
+        }
          fprintf(stderr, "%3u ", *p);
        }
        fprintf(stderr, "\n");
@@ -1870,6 +1930,7 @@ struct symbol_elem_t {
      cbl_field_t        field;
      cbl_label_t        label;
      cbl_special_name_t special;
+    cbl_locale_t       locale;
      cbl_alphabet_t     alphabet;
      cbl_file_t         file;
      cbl_section_t      section;
@@ -1927,6 +1988,9 @@ struct symbol_elem_t {
      case SymSpecial:
        elem.special = that.elem.special;
        break;
+    case SymLocale:
+      elem.locale = that.elem.locale;
+      break;
      case SymAlphabet:
        elem.alphabet = that.elem.alphabet;
        break;
@@ -2092,6 +2156,18 @@ cbl_special_name_of( symbol_elem_t *e ) {
    return &e->elem.special;
  }
  
+static inline cbl_locale_t *
+cbl_locale_of( symbol_elem_t *e ) {
+  assert(e && e->type == SymLocale);
+  return &e->elem.locale;
+}
+
+static inline const cbl_locale_t *
+cbl_locale_of( const symbol_elem_t *e ) {
+  assert(e && e->type == SymLocale);
+  return &e->elem.locale;
+}
+
  static inline cbl_alphabet_t *
  cbl_alphabet_of( symbol_elem_t *e ) {
    assert(e && e->type == SymAlphabet);
@@ -2104,6 +2180,7 @@ cbl_alphabet_of( const symbol_elem_t *e ) {
    return &e->elem.alphabet;
  }
  
+
  static inline cbl_file_t *
  cbl_file_of( symbol_elem_t *e ) {
    assert(e && e->type == SymFile);
@@ -2477,6 +2554,7 @@ struct symbol_elem_t * symbol_literalA( size_t program, const char name[] );
  
  struct cbl_special_name_t * symbol_special( special_name_t id );
  struct symbol_elem_t * symbol_special( size_t program, const char name[] );
+struct symbol_elem_t * symbol_locale( size_t program, const char name[] );
  struct symbol_elem_t * symbol_alphabet( size_t program, const char name[] );
  
  struct symbol_elem_t * symbol_file( size_t program, const char name[] );
@@ -2524,6 +2602,7 @@ cbl_label_t *   symbol_label_add( size_t program,
  cbl_label_t *   symbol_program_add( size_t program, cbl_label_t *input );
  symbol_elem_t * symbol_special_add( size_t program,
                                     cbl_special_name_t *special );
+symbol_elem_t * symbol_locale_add( size_t program, const cbl_locale_t *locale );
  symbol_elem_t * symbol_alphabet_add( size_t program,
                                      const cbl_alphabet_t *alphabet );
  symbol_elem_t * symbol_file_add( size_t program,
@@ -2548,8 +2627,8 @@ static inline size_t upsi_register() {
    return symbol_index(symbol_field(0,0,"UPSI-0"));
  }
  
-void wsclear( char ch);
-const char *wsclear();
+void wsclear( uint32_t ch);
+const uint32_t *wsclear();
  
  enum cbl_call_convention_t {
    cbl_call_verbatim_e = 'V',
diff --git a/gcc/cobol/util.cc b/gcc/cobol/util.cc

index 96159876f14c5532a73ef223861b3ad57ba57643..0e6ec8cfb246c9229d99a30bcf68961a64fd9cd8 100644 (file)
--- a/gcc/cobol/util.cc
+++ b/gcc/cobol/util.cc
@@ -271,6 +271,8 @@ symbol_type_str( enum symbol_type_t type )
          return "SymLabel";
      case SymSpecial:
          return "SymSpecial";
+    case SymLocale:
+        return "SymLocale";
      case SymAlphabet:
          return "SymAlphabet";
      case SymFile:
@@ -1094,28 +1096,18 @@ cbl_field_t::report_invalid_initial_value(const YYLTYPE& loc) const {
    if( has_attr(all_alpha_e) ) {
      bool alpha_value = fig != zero_value_e;
      
-    // In order to check for all alphabetic characters, we have to convert
-    // data.initial back to ASCII:
-
-    size_t outchars;
-    char *initial = __gg__iconverter(codeset.encoding,
-                                     DEFAULT_CHARMAP_SOURCE,
-                                     data.initial,
-                                     data.capacity,
-                                     &outchars);
-
      if( fig == normal_value_e ) {
-      alpha_value = std::all_of( initial,
-                                 initial +
-                                 data.capacity,
-                                 []( char ch ) {
-                                   return ISSPACE(ch) ||
-                                     ISPUNCT(ch) ||
-                                     ISALPHA(ch); } );
+      alpha_value = std::none_of( data.initial,
+                                  data.initial +
+                                  data.capacity,
+                                  []( char ch ) {
+                                    return 
+                                      ISPUNCT(ch) ||
+                                      ISDIGIT(ch); } );
      }
      if( ! alpha_value ) {
        error_msg(loc, "alpha-only %s VALUE '%s' contains non-alphabetic data",
-               name, fig == zero_value_e? cbl_figconst_str(fig) : initial);
+               name, fig == zero_value_e? cbl_figconst_str(fig) : data.initial);
      }
    }
  
@@ -1315,7 +1307,7 @@ valid_move( const struct cbl_field_t *tgt, const struct cbl_field_t *src )
          size_t outcount;
          char *in_ascii = static_cast<char *>(xmalloc(4 * src->data.capacity));
          const char *in_asciip = __gg__iconverter( src->codeset.encoding,
-                                                  DEFAULT_CHARMAP_SOURCE,
+                                                  DEFAULT_SOURCE_ENCODING,
                                                    src->data.initial,
                                                    src->data.capacity,
                                                    &outcount );
@@ -2078,7 +2070,8 @@ cobol_lineno() {
  
  const char *
  cobol_filename() {
-  return input_filenames.empty()? input_filename_vestige : input_filenames.top().name;
+  return input_filenames.empty()?
+    input_filename_vestige : input_filenames.top().name;
  }
  
  void
diff --git a/libgcobol/charmaps.cc b/libgcobol/charmaps.cc

index 349c669aa7ca3b508d9840ed436320c092bfa614..d4084654319850293cd3e92719d0fb43a7f17eca 100644 (file)
--- a/libgcobol/charmaps.cc
+++ b/libgcobol/charmaps.cc
@@ -1376,7 +1376,7 @@ static encodings_t encodings[] = {
    { false, iconv_UTF_7_e, "UTF-7" },
    // Is UTF-8 supported??  "supported" means "recognized by parser_alphabet",
    // but UTF-8 is not a valid runtime encoding.
-  { false, iconv_UTF_8_e, "UTF-8" },  
+  { false, iconv_UTF_8_e, "UTF-8" },
    { false, iconv_UTF_16_e, "UTF-16" },
    { false, iconv_UTF_16BE_e, "UTF-16BE" },
    { false, iconv_UTF_16LE_e, "UTF-16LE" },
@@ -1439,10 +1439,20 @@ cbl_encoding_t
  __gg__encoding_iconv_type( const char *name ) {
    static encodings_t *eoencodings = encodings + COUNT_OF(encodings);
  
+  char *slashless = strdup(name);
+  assert(slashless);
+  char *pslash = strchr(slashless, '/');
+  if( pslash )
+    {
+    *pslash = '\0';
+    }
+
    auto p = std::find_if( encodings, eoencodings,
-                         [name]( const encodings_t& elem ) {
-                           return strcmp(name, elem.name) == 0;
+                         [slashless]( const encodings_t& elem ) {
+                           return strcasecmp(slashless, elem.name) == 0;
                           } );
+  free(slashless);
+
    return p < eoencodings? p->type : no_encoding_e;
  }
  
@@ -1557,7 +1567,7 @@ __gg__get_charmap(cbl_encoding_t encoding)
  
    if( encoding == custom_encoding_e)
      {
-    encoding = DEFAULT_CHARMAP_SOURCE;
+    encoding = DEFAULT_SOURCE_ENCODING;
      }
  
    charmap_t *retval;
diff --git a/libgcobol/charmaps.h b/libgcobol/charmaps.h

index f35d033f910f030f0bd7dd1e2a3d64843d72070c..f48c063e60ba4c36eaa00369e39f48a5026343ac 100644 (file)
--- a/libgcobol/charmaps.h
+++ b/libgcobol/charmaps.h
@@ -228,13 +228,17 @@ char * __gg__iconverter(cbl_encoding_t from,
                          size_t length,
                          size_t *outlength);
  
-#define DEFAULT_CHARMAP_SOURCE (iconv_CP1252_e)
+#define DEFAULT_SOURCE_ENCODING (iconv_CP1252_e)
  
  class charmap_t
    {
    private:
      // This is the encoding of this character map
      cbl_encoding_t m_encoding;
+    bool m_is_valid;
+    bool m_is_big_endian;
+    bool m_has_bom = false;
+    int  m_stride; // Number of bytes between one character and the next
  
      enum
        {
@@ -246,32 +250,114 @@ class charmap_t
      // need be called but once for each ASCII value.
      std::unordered_map<int, int>m_map_of_encodings;
  
-    void determine_sign_type()
+  public:
+    explicit charmap_t(cbl_encoding_t e) : m_encoding(e)
        {
-      if( mapped_character(ascii_0) & 0x80 )
+      // We are constructing a new charmap_t from an arbitrary encoding.  We
+      // need to figure out how wide it is, its endianness, whether or not
+      // it is EBCDIC-based, and so on.
+
+      // We do that by converting "0" to the target encoding, and we analyze
+      // what we get back.
+      
+      size_t outlength = 0;
+      const char challenge[] = "0";
+      const unsigned char *response = PTRCAST(unsigned char,
+                                   __gg__iconverter(DEFAULT_SOURCE_ENCODING,
+                                                    m_encoding,
+                                                    challenge,
+                                                    1,
+                                                    &outlength));
+      unsigned char char_0 = 0x00;
+
+      m_is_valid = false;
+      m_has_bom  = false;
+      m_is_big_endian = false;
+
+      if( outlength == 1 )
          {
-        m_numeric_sign_type = sign_type_ebcdic;
+        m_stride = 1;
+        // This is our happy place:  A single-byte encoded character set.
+        char_0 = response[0];
          }
-      else
+      else if( outlength == 2 )
+        {
+        m_stride = 2;
+        if( response[0] )
+          {
+          char_0 = response[0];
+          }
+        else if( response[1] )
+          {
+          m_is_big_endian = true;
+          char_0 = response[1];
+          }
+        }
+      else if( outlength == 4 )
          {
+        // Check for the Byte Order Mark (BOM)
+        if( response[0] == 0xFF && response[1] == 0xFE )
+          {
+          m_stride = 2;
+          m_has_bom = true;
+          char_0 = response[2];
+          }
+        else if( response[0] == 0xFE && response[1] == 0xFF )
+          {
+          m_stride = 2;
+          m_has_bom = true;
+          m_is_big_endian = true;
+          char_0 = response[3];
+          }
+        else if( response[0] )
+          {
+          m_stride = 4;
+          char_0 = response[0];
+          }
+        else
+          {
+          m_stride = 4;
+          m_is_big_endian = true;
+          char_0 = response[3];
+          }
+        }
+      else if( outlength == 8 )
+        {
+        m_stride = 4;
+        if( response[0] == 0xFF && response[1] == 0xFE )
+          {
+          char_0 = response[4];
+          }
+        else if( response[0] == 0xFE && response[1] == 0xFF )
+          {
+          m_is_big_endian = true;
+          char_0 = response[7];
+          }
+        }
+
+      // With everything else established, we now check the zero character.
+      // We know about only 0x30 for ASCII and 0xF0 for EBCDIC.
+      if( char_0 == 0x30 )
+        {
+        m_is_valid = true;
          m_numeric_sign_type = sign_type_ascii;
          }
+      else if( char_0 == 0xF0 )
+        {
+        m_is_valid = true;
+        m_numeric_sign_type = sign_type_ebcdic;
+        }
        }
  
-  public:
-    explicit charmap_t(cbl_encoding_t e) : m_encoding(e)
-      {
-      determine_sign_type();
-      }
-    explicit charmap_t(uint16_t e) : m_encoding(static_cast<cbl_encoding_t>(e))
-      {
-      determine_sign_type();
-      }
+    bool is_valid()      const{return m_is_valid     ;}
+    bool is_big_endian() const{return m_is_big_endian;}
+    bool has_bom()       const{return m_has_bom      ;}
+    int  stride()        const{return m_stride       ;}
  
      int mapped_character(int ch)
        {
        // The assumption is that anybody calling this routine is providing
-      // a single-byte character in the DEFAULT_CHARMAP_SOURCE encoding.  We
+      // a single-byte character in the DEFAULT_SOURCE_ENCODING encoding.  We
        // return the equivalent character in the m_encoding
        int retval;
        std::unordered_map<int, int>::const_iterator it =
@@ -284,7 +370,7 @@ class charmap_t
          {
          retval = 0;
          size_t outlength = 0;
-        const char *mapped = __gg__iconverter(DEFAULT_CHARMAP_SOURCE,
+        const char *mapped = __gg__iconverter(DEFAULT_SOURCE_ENCODING,
                                                m_encoding,
                                                PTRCAST(char, &ch),
                                                1,
diff --git a/libgcobol/encodings.h b/libgcobol/encodings.h

index 51cc6c39287398709135641db8b9e9c2dd3dd6bc..37bcde305fc3332577fd076f07ed6bcaf7a32d3b 100644 (file)
--- a/libgcobol/encodings.h
+++ b/libgcobol/encodings.h
@@ -1195,6 +1195,11 @@ enum cbl_encoding_t {
    iconv_YU_e,
  };
  
+static inline bool
+valid_encoding( cbl_encoding_t enc ) {
+  return enc <= iconv_YU_e;
+}
+
  #define ASCII_e  iconv_ASCII_e   
  #define CP1252_e iconv_CP1252_e
  #define EBCDIC_e iconv_CP1140_e
diff --git a/libgcobol/gcobolio.h b/libgcobol/gcobolio.h

index 7a1c9ac07023e451df4c4ee94a395a53ec6c605d..14ef069ccb7339ef7e52de825f902e7a793d2e90 100644 (file)
--- a/libgcobol/gcobolio.h
+++ b/libgcobol/gcobolio.h
@@ -81,6 +81,7 @@ enum cblc_file_prior_op_t
    file_op_rewrite,
    file_op_delete,
    file_op_close,
+  file_op_remove,
    };
  
  /* end implementation details */
diff --git a/libgcobol/gfileio.cc b/libgcobol/gfileio.cc

index 16d75b0f186411b6795925f19861d9b74751277d..7c01f39a8c066fb018a1c46b4baba69a9a9becbe 100644 (file)
--- a/libgcobol/gfileio.cc
+++ b/libgcobol/gfileio.cc
@@ -191,9 +191,10 @@ handle_errno(cblc_file_t *file, const char *function, const char *msg)
  
  static
  char *
-get_filename( const cblc_file_t *file,
-              int is_quoted)
+get_filename( const cblc_file_t *file)
    {
+  bool is_quoted = !!(file->flags & file_name_quoted_e);
+
    static size_t fname_size = MINIMUM_ALLOCATION_SIZE;
    static char *fname = static_cast<char *>(malloc(MINIMUM_ALLOCATION_SIZE));
    massert(fname);
@@ -1151,6 +1152,80 @@ __io__file_delete(cblc_file_t *file, bool is_random)
      }
    }
  
+static void
+__io__file_remove(cblc_file_t *file, char *filename, int is_quoted)
+  {
+  // filename is the result of a strdup or malloc.  Because both FILE OPEN
+  // and FILE DELETE can establish or change a name, we free it here and
+  // replace it.  The same is true in FILE DELETE Format 2
+  free(file->filename);
+  file->filename = filename;
+  file->flags &= ~file_name_quoted_e;
+  file->flags |= is_quoted ? file_name_quoted_e : file_flag_none_e;
+  int erc;
+
+  // This code copied from reopen
+  const char *trimmed_name = get_filename(file);
+  if( !trimmed_name[0] )
+    {
+    bool all_spaces = true;
+    for(size_t i=0; i<strlen(file->filename); i++)
+      {
+      if( file->filename[i] != ascii_space )
+        {
+        all_spaces = false;
+        }
+      break;
+      }
+    if( all_spaces )
+      {
+      warnx("Warning: %s specified with a filename that is all spaces",
+            file->name);
+      file->io_status = FsNameError;    // "31"
+      goto done;
+      }
+
+    warnx(  "%s(): There is no environment variable named \"%s\"\n",
+            __func__,
+            file->filename);
+    file->io_status = FsNoFile;    // "35"
+    goto done;
+    }
+  // trimmed_name is now the file system name of the file to be removed.
+
+  // If the file is open, we flag that with "41"
+  if( file->file_pointer )
+    {
+    file->io_status = FsIsOpen;    // "41"
+    goto done;
+    }
+
+  // There's been a lot of buildup.  We can now try to remove the file:
+  errno = 0;
+  erc = remove(trimmed_name);
+  if( erc == 0 )
+    {
+    // All is copacetic.  There was a file, and now it's gone.
+    file->io_status = FsSuccess;    // "00"
+    }
+  else if( errno == ENOENT )
+    {
+    // The file didn't exist.  
+    file->io_status = FsUnavail;    // "05"
+    }
+  else
+    {
+    // We have some other kind of error.  Lack of credentials, or whatever.
+    file->io_status = FsErrno;    // 
+    goto done;
+    }
+
+  file->prior_op = file_op_remove;
+  done:
+  file->errnum = errno;
+  establish_status(file, -1);
+  }
+
  static void
  indexed_file_start( cblc_file_t *file,
                      int relop,
@@ -4115,7 +4190,7 @@ __gg__file_reopen(cblc_file_t *file, int mode_char)
    // Stash the mode_char for later analysis during READ and WRITE operations
    file->mode_char = mode_char;
    char *trimmed_name;
-  trimmed_name = get_filename(file, !!(file->flags & file_name_quoted_e));
+  trimmed_name = get_filename(file);
    if( !trimmed_name[0] )
      {
      bool all_spaces = true;
@@ -4353,8 +4428,10 @@ __io__file_open(cblc_file_t *file,
      }
    else
      {
-    // filename is the result of a strdup or malloc.  We will free() it at
-    // file close time.
+    // filename is the result of a strdup or malloc.  Because both FILE OPEN
+    // and FILE DELETE can establish or change a name, we free it here and
+    // replace it.  The same is true in FILE DELETE Format 2
+    free(file->filename);
      file->filename = filename;
      file->flags &= ~file_name_quoted_e;
      file->flags |= is_quoted ? file_name_quoted_e : file_flag_none_e;
@@ -4492,6 +4569,9 @@ public:
                              size_t length, bool is_random );
    typedef void (delete_t)( cblc_file_t *file,
                            bool is_random );
+  typedef void (remove_t)( cblc_file_t *file,
+                          char *filename,
+                          int is_quoted);
  
    open_t      *Open;
    close_t     *Close;
@@ -4500,6 +4580,7 @@ public:
    write_t     *Write;
    rewrite_t   *Rewrite;
    delete_t    *Delete;
+  remove_t    *Remove;
  
    gcobol_io_t()
      : Open(NULL)
@@ -4509,15 +4590,17 @@ public:
      , Write(NULL)
      , Rewrite(NULL)
      , Delete(NULL)
+    , Remove(NULL)
    {}
  
-  gcobol_io_t(  open_t      *Open,
+  gcobol_io_t(   open_t      *Open,
                   close_t     *Close,
                   start_t     *Start,
                   read_t      *Read,
                   write_t     *Write,
                   rewrite_t   *Rewrite,
-                 delete_t    *Delete )
+                 delete_t    *Delete,
+                 remove_t    *Remove)
      : Open(Open)
      , Close(Close)
      , Start(Start)
@@ -4525,6 +4608,7 @@ public:
      , Write(Write)
      , Rewrite(Rewrite)
      , Delete(Delete)
+    , Remove(Remove)
    {}
  
  #if FILE_IO_IMPLEMENTED
@@ -4552,7 +4636,8 @@ gcobol_fileops() {
                            __io__file_read,
                            __io__file_write,
                            __io__file_rewrite,
-                          __io__file_delete );
+                          __io__file_delete,
+                          __io__file_remove);
  }
  
  /*
@@ -4657,9 +4742,19 @@ extern "C"
  void
  __gg__file_delete(cblc_file_t *file, bool is_random)
    {
+    // DELETE FILE Format 1 - deletes a record.
      gcobol_io_t *functions = gcobol_io_funcs();
      functions->Delete(file, is_random);
    }
+extern "C"
+
+void
+__gg__file_remove(cblc_file_t *file, char *name, int is_quoted)
+  {
+    // DELETE FILE Format 2 - removes a file.
+    gcobol_io_t *functions = gcobol_io_funcs();
+    functions->Remove(file, name, is_quoted);
+  }
  
  /* end interface functions */
  
diff --git a/libgcobol/intrinsic.cc b/libgcobol/intrinsic.cc

index 49dee6e3aef4040358f11b48a528fc6b58217b4f..bb03f629d0a95aea3292ee10e38550c8e84c67c2 100644 (file)
--- a/libgcobol/intrinsic.cc
+++ b/libgcobol/intrinsic.cc
@@ -55,7 +55,6 @@
  #include "libgcobol.h"
  #include "charmaps.h"
  
-
  #pragma GCC diagnostic ignored "-Wformat-truncation"
  
  #define JD_OF_1601_01_02 2305812.5
@@ -576,7 +575,7 @@ get_all_time( const cblc_field_t *dest, // needed for the target encoding
            ctm.day_of_year,
            ctm.ZZZZ);
    __gg__convert_encoding(PTRCAST(char, stime),
-                         DEFAULT_CHARMAP_SOURCE,
+                         DEFAULT_SOURCE_ENCODING,
                           dest->encoding);
    }
  
@@ -810,7 +809,6 @@ ftime_replace(char *dest,
    const char *src;
    bool saw_decimal_point = false;
    bool saw_plus_sign = false;
-  char decimal_point = __gg__get_decimal_point();
    static const int OFFSET_TO_YYYY           =  0;
    static const int OFFSET_TO_MM             =  4;
    static const int OFFSET_TO_DD             =  6;
@@ -826,18 +824,20 @@ ftime_replace(char *dest,
    static const int OFFSET_TO_DOY            = 34;
    static const int OFFSET_TO_ZZZZ           = 37;
  
-  int source_Y    = charmap_source->mapped_character(ascii_Y   );
-  int source_W    = charmap_source->mapped_character(ascii_W   );
-  int source_s    = charmap_source->mapped_character(ascii_s   );
-  int source_m    = charmap_source->mapped_character(ascii_m   );
-  int source_h    = charmap_source->mapped_character(ascii_h   );
-  int source_plus = charmap_source->mapped_character(ascii_plus);
-  int source_D    = charmap_source->mapped_character(ascii_D   );
-  int source_M    = charmap_source->mapped_character(ascii_M   );
+  unsigned int decimal_point =
+                   charmap_source->mapped_character(__gg__get_decimal_point());
+  unsigned int source_Y    = charmap_source->mapped_character(ascii_Y   );
+  unsigned int source_W    = charmap_source->mapped_character(ascii_W   );
+  unsigned int source_s    = charmap_source->mapped_character(ascii_s   );
+  unsigned int source_m    = charmap_source->mapped_character(ascii_m   );
+  unsigned int source_h    = charmap_source->mapped_character(ascii_h   );
+  unsigned int source_plus = charmap_source->mapped_character(ascii_plus);
+  unsigned int source_D    = charmap_source->mapped_character(ascii_D   );
+  unsigned int source_M    = charmap_source->mapped_character(ascii_M   );
  
    while( source < source_end && dest < dest_end )
      {
-    char fchar = *source;
+    unsigned char fchar = *source;
      if( fchar == source_Y )
        {
        // This can only be a YYYY
@@ -847,7 +847,7 @@ ftime_replace(char *dest,
        const char *p = source;
        while(p < source_end)
          {
-        if( *p++ == source_W )
+        if( (unsigned char)*p++ == source_W )
            {
            src = ftime + OFFSET_TO_ZZZZ;
            }
@@ -864,12 +864,12 @@ ftime_replace(char *dest,
      else if( fchar == source_D )
        {
        // It can be a D, DD or DDD
-      if( source[2] == source_D )
+      if( (unsigned char)source[2] == source_D )
          {
          ncount = 3;
          src = ftime + OFFSET_TO_DOY;
          }
-      else if( source[1] == source_D )
+      else if( (unsigned char)source[1] == source_D )
          {
          ncount = 2;
          src = ftime + OFFSET_TO_DD;
@@ -946,7 +946,7 @@ ftime_replace(char *dest,
        {
        // This indicates special processing for a variable number of 's'
        // characters
-      while(*source == 's' && dest < dest_end)
+      while((unsigned char)*source == source_s && dest < dest_end)
          {
          source += 1;
          *dest++ = *src++;
@@ -1279,7 +1279,7 @@ __gg__current_date(cblc_field_t *dest)
    char retval[DATE_STRING_BUFFER_SIZE];
    timespec_to_string(retval, tp);
    __gg__convert_encoding(PTRCAST(char, retval),
-                         DEFAULT_CHARMAP_SOURCE,
+                         DEFAULT_SOURCE_ENCODING,
                           dest->encoding);
    string_to_dest(dest, retval);
    }
@@ -2022,7 +2022,6 @@ __gg__max(cblc_field_t *dest,
          }
        }
  
-
      __gg__adjust_dest_size(dest, best_length);
      dest->type = FldAlphanumeric;
      assert(best_location);
@@ -2088,12 +2087,12 @@ __gg__lower_case( cblc_field_t *dest,
    __gg__convert_encoding_length(PTRCAST(char, dest->data),
                                  length,
                                  from,
-                                DEFAULT_CHARMAP_SOURCE);
+                                DEFAULT_SOURCE_ENCODING);
    std::transform(dest->data, dest->data + dest_length, dest->data,
                   [](unsigned char c) { return std::tolower(c); });
    __gg__convert_encoding_length(PTRCAST(char, dest->data),
                                  length,
-                                DEFAULT_CHARMAP_SOURCE,
+                                DEFAULT_SOURCE_ENCODING,
                                  to);
    }
  
@@ -2391,25 +2390,23 @@ numval( cblc_field_t *dest,
          size_t input_offset,
          size_t input_size)
    {
-  // Returns the one-based character position of a bad character
-  // returns zero if it is okay
+  // Returns the one-based character position of a bad character.
+  // Returns zero if it is okay.
+
+  // This routine works in ASCII space:
  
-  const char *p    = PTRCAST(char, (input->data + input_offset));
-  const char *pend =     p + input_size;
+  size_t nbytes;
+  const char *p = __gg__iconverter(input->encoding,
+                                   DEFAULT_SOURCE_ENCODING,
+                                   PTRCAST(char, input->data + input_offset),
+                                   input_size,
+                                   &nbytes);
+  const char *pend = p + input_size;
  
    int errpos = 0;
    __int128 retval = 0;
    int retval_rdigits = 0;
  
-  charmap_t *charmap = __gg__get_charmap(input->encoding);
-  unsigned char decimal_point
-                   = charmap->mapped_character(__gg__get_decimal_point());
-  int mapped_0 = charmap->mapped_character(ascii_0);
-  int mapped_9 = charmap->mapped_character(ascii_9);
-  int mapped_space = charmap->mapped_character(ascii_space);
-  int mapped_plus = charmap->mapped_character(ascii_plus);
-  int mapped_minus = charmap->mapped_character(ascii_minus);
-
    bool saw_digit= false;
    bool in_fraction  = false;
    bool leading_sign = false;
@@ -2437,31 +2434,31 @@ numval( cblc_field_t *dest,
        case SPACE1:
          // We tolerate spaces, and expect to end with a sign, digit,
          // or decimal point:
-        if( ch == mapped_space )
+        if( ch == ascii_space )
            {
            continue;
            }
-        if( ch == mapped_plus )
+        if( ch == ascii_plus )
            {
            leading_sign = true;
            state = SPACE2;
            break;
            }
-        if( ch == mapped_minus )
+        if( ch == ascii_minus )
            {
            leading_sign = true;
            is_negative  = true;
            state = SPACE2;
            break;
            }
-        if( ch >= mapped_0 && ch <= mapped_9 )
+        if( ch >= ascii_0 && ch <= ascii_9 )
            {
            saw_digit = true;
            retval = ch & 0xF;
            state = DIGITS;
            break;
            }
-        if( ch == decimal_point )
+        if( ch == __gg__decimal_point )
            {
            in_fraction = true;
            state = DIGITS;
@@ -2473,18 +2470,18 @@ numval( cblc_field_t *dest,
  
        case SPACE2:
          // We tolerate spaces, and expect to end with a digit or decimal point:
-        if( ch == mapped_space )
+        if( ch == ascii_space )
            {
            break;
            }
-        if( ch >= mapped_0 && ch <= mapped_9 )
+        if( ch >= ascii_0 && ch <= ascii_9 )
            {
            saw_digit = true;
            retval = ch & 0xF;
            state = DIGITS;
            break;
            }
-        if( ch == decimal_point )
+        if( ch == __gg__decimal_point )
            {
            in_fraction = true;
            state = DIGITS;
@@ -2499,7 +2496,7 @@ numval( cblc_field_t *dest,
          // end with a space, a sign, "DB" or "CR", or the the end of the string
          // It's a bit complicated
  
-        if( ch >= mapped_0 && ch <= mapped_9 )
+        if( ch >= ascii_0 && ch <= ascii_9 )
            {
            saw_digit = true;
            retval *= 10;
@@ -2510,43 +2507,43 @@ numval( cblc_field_t *dest,
              }
            break;
            }
-        if( ch == decimal_point && in_fraction )
+        if( ch == __gg__decimal_point && in_fraction )
            {
            // Only one decimal is allowed
            goto done;
            }
-        if( ch == decimal_point )
+        if( ch == __gg__decimal_point )
            {
            in_fraction = true;
            break;
            }
-        if( ch == mapped_space )
+        if( ch == ascii_space )
            {
            state = SPACE3;
            break;
            }
-        if( ch == mapped_plus && leading_sign)
+        if( ch == ascii_plus && leading_sign)
            {
            // We are allowed leading or trailing signs, but not both
            goto done;
            }
-        if( ch == mapped_minus && leading_sign)
+        if( ch == ascii_minus && leading_sign)
            {
            // We are allowed leading or trailing signs, but not both
            goto done;
            }
-        if( ch == mapped_plus )
+        if( ch == ascii_plus )
            {
            state = SPACE4;
            break;
            }
-        if( ch == mapped_minus )
+        if( ch == ascii_minus )
            {
            is_negative = true;
            state = SPACE4;
            break;
            }
-        if( std::tolower(ch) == 'd' )
+        if( std::tolower(ch) == ascii_d )
            {
            if( leading_sign )
              {
@@ -2554,7 +2551,7 @@ numval( cblc_field_t *dest,
              }
            ch = *p++;
            errpos += 1;
-          if( p > pend || std::tolower(ch) != 'b' )
+          if( p > pend || std::tolower(ch) != ascii_b )
              {
              goto done;
              }
@@ -2562,7 +2559,7 @@ numval( cblc_field_t *dest,
            state = SPACE4;
            break;
            }
-        if( std::tolower(ch) == 'c' )
+        if( std::tolower(ch) == ascii_c )
            {
            if( leading_sign )
              {
@@ -2570,7 +2567,7 @@ numval( cblc_field_t *dest,
              }
            ch = *p++;
            errpos += 1;
-          if( p > pend || std::tolower(ch) != 'r' )
+          if( p > pend || std::tolower(ch) != ascii_r )
              {
              goto done;
              }
@@ -2584,26 +2581,26 @@ numval( cblc_field_t *dest,
  
        case SPACE3:
          // We tolerate spaces, or we end with a sign:
-        if( ch == mapped_space )
+        if( ch == ascii_space )
            {
            break;
            }
-        if( ch == mapped_plus && leading_sign)
+        if( ch == ascii_plus && leading_sign)
            {
            // We are allowed leading or trailing signs, but not both
            goto done;
            }
-        if( ch == mapped_minus && leading_sign)
+        if( ch == ascii_minus && leading_sign)
            {
            // We are allowed leading or trailing signs, but not both
            goto done;
            }
-        if( ch == mapped_plus )
+        if( ch == ascii_plus )
            {
            state = SPACE4;
            break;
            }
-        if( ch == mapped_minus )
+        if( ch == ascii_minus )
            {
            is_negative = true;
            state = SPACE4;
@@ -2617,7 +2614,7 @@ numval( cblc_field_t *dest,
              }
            ch = *p++;
            errpos += 1;
-          if( p > pend || std::tolower(ch) != 'b' )
+          if( p > pend || std::tolower(ch) != ascii_b )
              {
              goto done;
              }
@@ -2633,7 +2630,7 @@ numval( cblc_field_t *dest,
              }
            ch = *p++;
            errpos += 1;
-          if( p > pend || std::tolower(ch) != 'r' )
+          if( p > pend || std::tolower(ch) != ascii_r )
              {
              goto done;
              }
@@ -2644,7 +2641,7 @@ numval( cblc_field_t *dest,
          goto done;
          break;
        case SPACE4:
-        if( ch == mapped_space )
+        if( ch == ascii_space )
            {
            break;
            }
@@ -2658,7 +2655,7 @@ numval( cblc_field_t *dest,
      }
    else if( p == pend )
      {
-    // If we got to the end without seeing adigit, we need to bump the
+    // If we got to the end without seeing a digit, we need to bump the
      // error pointer:
      errpos += 1;
      }
@@ -2696,7 +2693,15 @@ numval_c( cblc_field_t *dest,
    {
    size_t errcode = 0;
  
-  char *pstart = PTRCAST(char, (src->data+src_offset));
+//  char *pstart = PTRCAST(char, (src->data+src_offset));
+  size_t nbytes;
+  const char *converted = __gg__iconverter(src->encoding,
+                                  DEFAULT_SOURCE_ENCODING,
+                                  PTRCAST(char, src->data+src_offset),
+                                  src_size,
+                                  &nbytes);
+  char *pstart = strdup(converted);
+  massert(pstart);
    char *pend   = pstart + src_size;
    char *p      = pstart;
  
@@ -2704,45 +2709,41 @@ numval_c( cblc_field_t *dest,
    int sign = 0;
    int rdigits = 0;
    int rdigit_bump = 0;
-  charmap_t *charmap = __gg__get_charmap(src->encoding);
-  unsigned char decimal_point
-                   = charmap->mapped_character(__gg__get_decimal_point());
-  unsigned char decimal_separator
-                   = charmap->mapped_character(__gg__get_decimal_separator());
-  int mapped_0 = charmap->mapped_character(ascii_0);
-  int mapped_9 = charmap->mapped_character(ascii_9);
-  int mapped_space = charmap->mapped_character(ascii_space);
-  int mapped_plus = charmap->mapped_character(ascii_plus);
-  int mapped_minus = charmap->mapped_character(ascii_minus);
-  int mapped_C = charmap->mapped_character(ascii_C);
-  int mapped_R = charmap->mapped_character(ascii_R);
-  int mapped_D = charmap->mapped_character(ascii_D);
-  int mapped_B = charmap->mapped_character(ascii_B);
-  int mapped_c = charmap->mapped_character(ascii_c);
-  int mapped_r = charmap->mapped_character(ascii_r);
-  int mapped_d = charmap->mapped_character(ascii_d);
-  int mapped_b = charmap->mapped_character(ascii_b);
+  unsigned char decimal_point = __gg__decimal_point;
+  unsigned char decimal_separator = __gg__decimal_separator;
+
+  char *currency_in_ascii;
  
    char *currency_start;
    char *currency_end;
    if( crcy )
      {
-    currency_start = PTRCAST(char, (crcy->data+crcy_offset));
-    currency_end   = currency_start + crcy_size;
+    converted = __gg__iconverter(crcy->encoding,
+                                 DEFAULT_SOURCE_ENCODING,
+                                 PTRCAST(char, crcy->data+crcy_offset),
+                                 crcy_size,
+                                 &nbytes);
+    currency_in_ascii = static_cast<char*>(malloc(nbytes+1));
+    massert(currency_in_ascii);
+    strcpy(currency_in_ascii, converted);
      }
    else
      {
-    currency_start = __gg__get_default_currency_string();
-    currency_end   = currency_start + strlen(currency_start);
+    // This is in ascii
+    currency_in_ascii = strdup(__gg__get_default_currency_string());
+    massert(currency_in_ascii);
      }
+  currency_start = currency_in_ascii;
+  currency_end   = currency_start + strlen(currency_start);
+
    char *pcurrency = currency_start;
    // Trim off spaces from the currency:
-  while( *pcurrency == mapped_space && pcurrency < currency_end )
+  while( *pcurrency == ascii_space && pcurrency < currency_end )
      {
      pcurrency += 1;
      }
  
-  while( *(currency_end-1) == mapped_space && currency_end > currency_start )
+  while( *(currency_end-1) == ascii_space && currency_end > currency_start )
      {
      currency_end -= 1;
      }
@@ -2769,12 +2770,12 @@ numval_c( cblc_field_t *dest,
        {
        case first_space   :
          // Eat up spaces, if any, and then dispatch on the first non-space:
-        if( ch != mapped_space )
+        if( ch != ascii_space )
            {
            // ch can now be a plus, a minus, a digit, or the first character
            // of the currency string
-          if(   ch == mapped_plus
-             || ch == mapped_minus )
+          if(   ch == ascii_plus
+             || ch == ascii_minus )
              {
              state = first_sign;
              // Decrement to pointer in order to pick up the character again
@@ -2785,7 +2786,7 @@ numval_c( cblc_field_t *dest,
              state = currency;
              p -= 1;
              }
-          else if(  (ch >= mapped_0 && ch <= mapped_9)
+          else if(  (ch >= ascii_0 && ch <= ascii_9)
                      || ch == decimal_point )
              {
              state = digits;
@@ -2805,7 +2806,7 @@ numval_c( cblc_field_t *dest,
  
        case first_sign    :
          // We know the character is a plus or a minus:
-        if( ch == mapped_plus )
+        if( ch == ascii_plus )
            {
            sign = 1;
            state = second_space;
@@ -2820,14 +2821,14 @@ numval_c( cblc_field_t *dest,
        case second_space :
          // Eat up spaces, if any.  This segment has to end with a currency or
          // a digit:
-        if( ch != mapped_space )
+        if( ch != ascii_space )
            {
            if( ch == *pcurrency )
              {
              state = currency;
              p -= 1;
              }
-          else if(  (ch >= mapped_0 && ch <= mapped_9)
+          else if(  (ch >= ascii_0 && ch <= ascii_9)
                      || ch == decimal_point )
              {
              state = digits;
@@ -2868,9 +2869,9 @@ numval_c( cblc_field_t *dest,
  
        case before_digits :
          // Eat up spaces, if any.  This segment has to end with a digit
-        if( ch != mapped_space )
+        if( ch != ascii_space )
            {
-          if(  (ch >= mapped_0 && ch <= mapped_9)
+          if(  (ch >= ascii_0 && ch <= ascii_9)
                 || ch == decimal_point )
              {
              state = digits;
@@ -2890,7 +2891,7 @@ numval_c( cblc_field_t *dest,
        case digits     :
          // The only thing allowed here are digits, decimal points, and
          // decimal separators
-        if( ch >= mapped_0 && ch <= mapped_9 )
+        if( ch >= ascii_0 && ch <= ascii_9 )
            {
            // We have a digit.
            rdigits += rdigit_bump;
@@ -2923,14 +2924,14 @@ numval_c( cblc_field_t *dest,
  
        case after_digits  :
          // after digits, the only valid things are spaces, plus, minus, D, or C
-        if( ch != charmap->mapped_character(ascii_space) )
+        if( ch != ascii_space )
            {
-          if(       ch == mapped_plus
-                 || ch == mapped_minus
-                 || ch == mapped_D
-                 || ch == mapped_d
-                 || ch == mapped_C
-                 || ch == mapped_c )
+          if(       ch == ascii_plus
+                 || ch == ascii_minus
+                 || ch == ascii_D
+                 || ch == ascii_d
+                 || ch == ascii_C
+                 || ch == ascii_c )
              {
              state = second_sign;
              p -= 1;
@@ -2945,24 +2946,24 @@ numval_c( cblc_field_t *dest,
            errcode = p - pstart;
            p = pend;
            }
-        if( ch == mapped_plus )
+        if( ch == ascii_plus )
            {
            sign = 1;
            }
-        else if( ch == mapped_minus )
+        else if( ch == ascii_minus )
            {
            sign = -1;
            }
-        else if(    (ch == mapped_D || ch == mapped_d)
+        else if(    (ch == ascii_D || ch == ascii_d)
                      && p < pend
-                    && (*p == mapped_B || *p == mapped_b) )
+                    && (*p == ascii_B || *p == ascii_b) )
            {
            sign = -1;
            p += 1;
            }
-        else if(    (ch == mapped_C || ch == mapped_c)
+        else if(    (ch == ascii_C || ch == ascii_c)
                      && p < pend
-                    && (*p == mapped_R || *p == mapped_r) )
+                    && (*p == ascii_R || *p == ascii_r) )
            {
            sign = -1;
            p += 1;
@@ -2972,7 +2973,7 @@ numval_c( cblc_field_t *dest,
  
        case final_space   :
          // There should be only spaces until the end
-        if( ch == mapped_space )
+        if( ch == ascii_space )
            {
            continue;
            }
@@ -3003,6 +3004,8 @@ numval_c( cblc_field_t *dest,
                              truncation_e,
                              NULL);
      }
+  free(currency_in_ascii);
+  free(pstart);
    return (int)errcode;
    }
  
@@ -3910,12 +3913,12 @@ __gg__upper_case( cblc_field_t *dest,
    __gg__convert_encoding_length(PTRCAST(char, dest->data),
                                  length,
                                  from,
-                                DEFAULT_CHARMAP_SOURCE);
+                                DEFAULT_SOURCE_ENCODING);
    std::transform(dest->data, dest->data + dest_length, dest->data,
                   [](unsigned char c) { return std::toupper(c); });
    __gg__convert_encoding_length(PTRCAST(char, dest->data),
                                  length,
-                                DEFAULT_CHARMAP_SOURCE,
+                                DEFAULT_SOURCE_ENCODING,
                                  to);
    }
  
@@ -3946,7 +3949,7 @@ __gg__when_compiled(cblc_field_t *dest, size_t tv_sec, long tv_nsec)
    char retval[DATE_STRING_BUFFER_SIZE];
    timespec_to_string(retval, tp);
    __gg__convert_encoding(PTRCAST(char, retval),
-                         DEFAULT_CHARMAP_SOURCE,
+                         DEFAULT_SOURCE_ENCODING,
                           dest->encoding);
    string_to_dest(dest, retval);
    }
@@ -3992,8 +3995,8 @@ gets_int( int ndigits,
    // position (starting at 1) where the problem is.
    int retval = 0;
  
-  int checked_0 = charmap->mapped_character(ascii_0);
-  int checked_9 = charmap->mapped_character(ascii_9);
+  unsigned int checked_0 = charmap->mapped_character(ascii_0);
+  unsigned int checked_9 = charmap->mapped_character(ascii_9);
  
    memset(digits, 0xFF, ndigits * sizeof(int));
    for(int i=1; i<=ndigits; i++)
@@ -4004,7 +4007,7 @@ gets_int( int ndigits,
        retval = -i;
        break;
        }
-    int ch = *p++;
+    unsigned int ch = (unsigned char)*p++;
      if( ch < checked_0 || ch > checked_9 )
        {
        // This isn't a digit zero through nine
@@ -4502,19 +4505,19 @@ gets_nanoseconds( const char *f,
    // positive return value.  A negative return value contains the number of
    // digits we processed,
  
-  int format_s = charmap_format->mapped_character(ascii_s);
-  int source_0 = charmap_source->mapped_character(ascii_0);
-  int source_9 = charmap_source->mapped_character(ascii_9);
+  unsigned int format_s = charmap_format->mapped_character(ascii_s);
+  unsigned int source_0 = charmap_source->mapped_character(ascii_0);
+  unsigned int source_9 = charmap_source->mapped_character(ascii_9);
  
    int errpos = 0;
    int ncount = 0;
    int nanoseconds = 0;
  
    const char *pinit = p;
-  while( f < f_end && *f == format_s && p < pend )
+  while( f < f_end && (unsigned char)*f == format_s && p < pend )
      {
      f += 1;
-    int ch = *p++;
+    unsigned int ch = (unsigned char)*p++;
      errpos += 1;
  
      if( ch < source_0 || ch > source_9 )
@@ -4560,6 +4563,10 @@ fill_cobol_tm(cobol_tm &ctm,
    charmap_t *charmap_format = __gg__get_charmap(par1->encoding);
    charmap_t *charmap_checked = __gg__get_charmap(par2->encoding);
    int checked_space = charmap_checked->mapped_character(ascii_space);
+  int source_plus  = charmap_checked->mapped_character(ascii_plus);
+  int source_minus = charmap_checked->mapped_character(ascii_minus);
+  int source_zero  = charmap_checked->mapped_character(ascii_zero);
+
    int format_space  = charmap_format->mapped_character(ascii_space);
    int format_T      = charmap_format->mapped_character(ascii_T      );
    int format_colon  = charmap_format->mapped_character(ascii_colon  );
@@ -4576,6 +4583,8 @@ fill_cobol_tm(cobol_tm &ctm,
    int format_M      = charmap_format->mapped_character(ascii_M      );
    int format_D      = charmap_format->mapped_character(ascii_D      );
    int format_zero   = charmap_format->mapped_character(ascii_zero   );
+  char decimal_point
+                = charmap_format->mapped_character(__gg__get_decimal_point());
  
    // Let's eliminate trailing spaces...
    trim_trailing_spaces(format, format_end, format_space);
@@ -4584,8 +4593,6 @@ fill_cobol_tm(cobol_tm &ctm,
    bool in_offset = false;
    bool in_nanoseconds = false;
  
-  char decimal_point = __gg__get_decimal_point();
-
    // We keep constant track of the current error location.
    int retval = 1;
    int errpos;
@@ -4596,7 +4603,7 @@ fill_cobol_tm(cobol_tm &ctm,
    int bump;
    while( format < format_end && source < source_end )
      {
-    char ch = *format;
+    unsigned char ch = *format;
  
      if(    ch == format_T
             || ch == format_colon
@@ -4605,7 +4612,7 @@ fill_cobol_tm(cobol_tm &ctm,
        {
        // These are just formatting characters.  They need to be duplicated,
        // but are otherwise ignored.
-      if( *source != ch )
+      if( (unsigned char)*source != ch )
          {
          break;
          }
@@ -4616,31 +4623,31 @@ fill_cobol_tm(cobol_tm &ctm,
      if( ch == format_plus )
        {
        // This flags a following hhmm offset.  It needs to match a '+' or '-'
-      if(    *source != format_plus
-          && *source != format_minus
-          && *source != format_zero)
+      if(    (unsigned char)*source != source_plus
+          && (unsigned char)*source != source_minus
+          && (unsigned char)*source != source_zero)
          {
          break;
          }
-      if( *source == format_zero )
+      if( (unsigned char)*source == format_zero )
          {
          // The next four characters have to be zeroes
-        if( source[1] != format_zero )
+        if( (unsigned char)source[1] != format_zero )
            {
            retval += 1;
            break;
            }
-        if( source[2] != format_zero )
+        if( (unsigned char)source[2] != format_zero )
            {
            retval += 2;
            break;
            }
-        if( source[3] != format_zero )
+        if( (unsigned char)source[3] != format_zero )
            {
            retval += 3;
            break;
            }
-        if( source[4] != format_zero )
+        if( (unsigned char)source[4] != format_zero )
            {
            retval += 4;
            break;
@@ -4691,7 +4698,7 @@ fill_cobol_tm(cobol_tm &ctm,
      if( ch == format_D )
        {
        // We have three possibilities: DDD, DD, and D
-      if( format[1] != format_D )
+      if( (unsigned char)format[1] != format_D )
          {
          // A singleton 'D' is a day-of-week
          errpos = gets_day_of_week(source, source_end, charmap_checked, ctm);
@@ -4702,7 +4709,7 @@ fill_cobol_tm(cobol_tm &ctm,
            }
          bump = 1;
          }
-      else if( format[2] != format_D )
+      else if( (unsigned char)format[2] != format_D )
          {
          // This is DD, for day-of-month
          errpos = gets_day(source, source_end, charmap_checked, ctm);
@@ -4798,7 +4805,8 @@ fill_cobol_tm(cobol_tm &ctm,
      if( ch == format_Z || ch == format_z )
        {
        // This has to be the end of the road
-      if( std::toupper((unsigned char)source[0]) != 'Z' )
+      if(    (unsigned char)source[0] != format_Z
+          && (unsigned char)source[0] != format_z )
          {
          retval += 0;
          break;
@@ -5040,19 +5048,9 @@ __gg__lowest_algebraic( cblc_field_t *dest,
  
  static int
  floating_format_tester( char const * const f,
-                        char const * const f_end,
-                        cbl_encoding_t encoding)
+                        char const * const f_end)
    {
-  charmap_t *charmap = __gg__get_charmap(encoding);
-  int mapped_space = charmap->mapped_character(ascii_space);
-  int mapped_plus  = charmap->mapped_character(ascii_plus);
-  int mapped_minus = charmap->mapped_character(ascii_minus);
-  int mapped_0 = charmap->mapped_character(ascii_0);
-  int mapped_9 = charmap->mapped_character(ascii_9);
-  int mapped_E = charmap->mapped_character(ascii_E);
-  int mapped_e = charmap->mapped_character(ascii_e);
-  int decimal_point = charmap->mapped_character(__gg__get_decimal_point());
-
+  // This routine operates in ASCII space
    int retval = -1;
  
    enum
@@ -5074,23 +5072,23 @@ floating_format_tester( char const * const f,
      switch(state)
        {
        case SPACE1:
-        if( ch == mapped_space )
+        if( ch == ascii_space )
            {
            // Just keep looking
            break;
            }
-        if(    ch == mapped_minus
-            || ch == mapped_plus)
+        if(    ch == ascii_minus
+            || ch == ascii_plus)
            {
            state = SPACE2;
            break;
            }
-        if( ch >= mapped_0 && ch <= mapped_9 )
+        if( ch >= ascii_0 && ch <= ascii_9 )
            {
            state = DIGITS1;
            break;
            }
-        if( decimal_point )
+        if( __gg__decimal_point )
            {
            state = DIGITS2;
            break;
@@ -5100,16 +5098,16 @@ floating_format_tester( char const * const f,
          break;
  
        case SPACE2:
-        if( ch == mapped_space )
+        if( ch == ascii_space )
            {
            break;
            }
-        if( ch >= mapped_0 && ch <= mapped_9 )
+        if( ch >= ascii_0 && ch <= ascii_9 )
            {
            state = DIGITS1;
            break;
            }
-        if( ch == decimal_point )
+        if( ch == __gg__decimal_point )
            {
            state = DIGITS2;
            break;
@@ -5118,16 +5116,16 @@ floating_format_tester( char const * const f,
          break;
  
        case DIGITS1:
-        if( ch >= mapped_0 && ch <= mapped_9 )
+        if( ch >= ascii_0 && ch <= ascii_9 )
            {
            break;
            }
-        if( ch == decimal_point )
+        if( ch == __gg__decimal_point )
            {
            state = DIGITS2;
            break;
            }
-        if( ch == mapped_space )
+        if( ch == ascii_space )
            {
            state = SPACE3;
            break;
@@ -5136,16 +5134,16 @@ floating_format_tester( char const * const f,
          break;
  
        case DIGITS2:
-        if( ch >= mapped_0 && ch <= mapped_9 )
+        if( ch >= ascii_0 && ch <= ascii_9 )
            {
            break;
            }
-        if( ch == mapped_space )
+        if( ch == ascii_space )
            {
            state = SPACE3;
            break;
            }
-        if( ch == mapped_E || ch == mapped_e )
+        if( ch == ascii_E || ch == ascii_e )
            {
            state = SPACE4;
            break;
@@ -5154,16 +5152,16 @@ floating_format_tester( char const * const f,
          break;
  
        case SPACE3:
-        if( ch == mapped_space )
+        if( ch == ascii_space )
            {
            break;
            }
-        if( ch >= mapped_0 && ch <= mapped_9 )
+        if( ch >= ascii_0 && ch <= ascii_9 )
            {
            retval = index;
            break;
            }
-        if( ch == mapped_E || ch == mapped_e )
+        if( ch == ascii_E || ch == ascii_e )
            {
            state = SPACE4;
            break;
@@ -5172,16 +5170,16 @@ floating_format_tester( char const * const f,
          break;
  
        case SPACE4:
-        if( ch == mapped_space )
+        if( ch == ascii_space )
            {
            break;
            }
-        if( ch == mapped_minus || ch == mapped_plus )
+        if( ch == ascii_minus || ch == ascii_plus )
            {
            state = SPACE5;
            break;
            }
-        if( ch >= mapped_0 && ch <= mapped_9 )
+        if( ch >= ascii_0 && ch <= ascii_9 )
            {
            state = DIGITS3;
            break;
@@ -5190,11 +5188,11 @@ floating_format_tester( char const * const f,
          break;
  
        case SPACE5:
-        if( ch == mapped_space )
+        if( ch == ascii_space )
            {
            break;
            }
-        if( ch >= mapped_0 && ch <= mapped_9 )
+        if( ch >= ascii_0 && ch <= ascii_9 )
            {
            state = DIGITS3;
            break;
@@ -5203,11 +5201,11 @@ floating_format_tester( char const * const f,
          break;
  
        case DIGITS3:
-        if( ch >= mapped_0 && ch <= mapped_9 )
+        if( ch >= ascii_0 && ch <= ascii_9 )
            {
            break;
            }
-        if( ch == mapped_space )
+        if( ch == ascii_space )
            {
            state = SPACE6;
            break;
@@ -5216,7 +5214,7 @@ floating_format_tester( char const * const f,
          break;
  
        case SPACE6:
-      if( ch == mapped_space )
+      if( ch == ascii_space )
          {
          break;
          }
@@ -5242,16 +5240,19 @@ __gg__numval_f( cblc_field_t *dest,
                  size_t source_offset,
                  size_t source_size)
    {
+  // It's just easiest for this routine to operate in ASCII space:
+  size_t nbytes;
+  char *converted = __gg__iconverter(source->encoding,
+                                  DEFAULT_SOURCE_ENCODING,
+                                  PTRCAST(char, source->data + source_offset),
+                                  source_size,
+                                  &nbytes);
    GCOB_FP128 value = 0;
-  const char *data     = PTRCAST(char, (source->data + source_offset));
+  const char *data     = converted;
    const char *data_end = data + source_size;
-  charmap_t *charmap = __gg__get_charmap(source->encoding);
-  int mapped_space = charmap->mapped_character(ascii_space);
  
    int error = floating_format_tester( data,
-                                      data_end,
-                                      source->encoding);
-
+                                      data_end);
    if( error || source_size >= 256 )
      {
      exception_raise(ec_argument_function_e);
@@ -5264,12 +5265,13 @@ __gg__numval_f( cblc_field_t *dest,
      while( data < data_end )
        {
        char ch = *data++;
-      if( ch != mapped_space )
+      if( ch != ascii_space )
          {
          *p++ = ch;
          }
        }
      *p++ = '\0';
+    // This next call is why we needed to be in ASCII space.
      value = strtofp128(ach, NULL);
      }
    __gg__float128_to_field(dest,
@@ -5285,13 +5287,18 @@ __gg__test_numval_f(cblc_field_t *dest,
                      size_t source_offset,
                      size_t source_size)
    {
-  const char *data     = PTRCAST(char, (source->data + source_offset));
+  // It's just easiest for this routine to operate in ASCII space:
+  size_t nbytes;
+  char *converted = __gg__iconverter(source->encoding,
+                                  DEFAULT_SOURCE_ENCODING,
+                                  PTRCAST(char, source->data + source_offset),
+                                  source_size,
+                                  &nbytes);
+
+  const char *data     = converted;
    const char *data_end = data + source_size;
-
    int error = floating_format_tester( data,
-                                      data_end,
-                                      source->encoding);
-
+                                      data_end);
    __gg__int128_to_field(dest,
                          error,
                          NO_RDIGITS,
@@ -5314,14 +5321,52 @@ ismatch(const char *a1, const char *a2, const char *b1, const char *b2)
    }
  
  static bool
-iscasematch(const char *a1, const char *a2, const char *b1, const char *b2)
+iscasematch(const char *a1, const char *a2, 
+            const char *b1, const char *b2,
+            bool is_ebcdic)
    {
+  static const unsigned int ebcdic_lower[256] =
+    {
+    0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, 0x08, 0x09, 0x0a, 0x0b, 0x0c, 0x0d, 0x0e, 0x0f,
+    0x10, 0x11, 0x12, 0x13, 0x14, 0x15, 0x16, 0x17, 0x18, 0x19, 0x1a, 0x1b, 0x1c, 0x1d, 0x1e, 0x1f,
+    0x20, 0x21, 0x22, 0x23, 0x24, 0x25, 0x26, 0x27, 0x28, 0x29, 0x2a, 0x2b, 0x2c, 0x2d, 0x2e, 0x2f,
+    0x30, 0x31, 0x32, 0x33, 0x34, 0x35, 0x36, 0x37, 0x38, 0x39, 0x3a, 0x3b, 0x3c, 0x3d, 0x3e, 0x3f,
+    0x40, 0x41, 0x42, 0x43, 0x44, 0x45, 0x46, 0x47, 0x48, 0x49, 0x4a, 0x4b, 0x4c, 0x4d, 0x4e, 0x4f,
+    0x50, 0x51, 0x52, 0x53, 0x54, 0x55, 0x56, 0x57, 0x58, 0x59, 0x5a, 0x5b, 0x5c, 0x5d, 0x5e, 0x5f,
+    0x60, 0x61, 0x62, 0x63, 0x64, 0x65, 0x66, 0x67, 0x68, 0x69, 0x6a, 0x6b, 0x6c, 0x6d, 0x6e, 0x6f,
+    0x70, 0x71, 0x72, 0x73, 0x74, 0x75, 0x76, 0x77, 0x78, 0x79, 0x7a, 0x7b, 0x7c, 0x7d, 0x7e, 0x7f,
+    0x80, 0x81, 0x82, 0x83, 0x84, 0x85, 0x86, 0x87, 0x88, 0x89, 0x8a, 0x8b, 0x8c, 0x8d, 0x8e, 0x8f,
+    0x90, 0x91, 0x92, 0x93, 0x94, 0x95, 0x96, 0x97, 0x98, 0x99, 0x9a, 0x9b, 0x9c, 0x9d, 0x9e, 0x9f,
+    0xa0, 0xa1, 0xa2, 0xa3, 0xa4, 0xa5, 0xa6, 0xa7, 0xa8, 0xa9, 0xaa, 0xab, 0xac, 0xad, 0xae, 0xaf,
+    0xb0, 0xb1, 0xb2, 0xb3, 0xb4, 0xb5, 0xb6, 0xb7, 0xb8, 0xb9, 0xba, 0xbb, 0xbc, 0xbd, 0xbe, 0xbf,
+    0xc0, 0x81, 0x82, 0x83, 0x84, 0x85, 0x86, 0x87, 0x88, 0x89, 0xca, 0xcb, 0xcc, 0xcd, 0xce, 0xcf,
+    0xd0, 0x91, 0x92, 0x93, 0x94, 0x95, 0x96, 0x97, 0x98, 0x99, 0xda, 0xdb, 0xdc, 0xdd, 0xde, 0xdf,
+    0xe0, 0xe1, 0xa2, 0xa3, 0xa4, 0xa5, 0xa6, 0xa7, 0xa8, 0xa9, 0xea, 0xeb, 0xec, 0xed, 0xee, 0xef,
+    0xf0, 0xf1, 0xf2, 0xf3, 0xf4, 0xf5, 0xf6, 0xf7, 0xf8, 0xf9, 0xfa, 0xfb, 0xfc, 0xfd, 0xfe, 0xff,
+    };
+
    bool retval = true;
-  while( a1 < a2 && b1 < b2 )
+
+  if( !is_ebcdic )
      {
-    if( std::tolower((unsigned char)*a1++) != std::tolower((unsigned char)*b1++) )
+    while( a1 < a2 && b1 < b2 )
        {
-      retval = false;
+      if( std::tolower((unsigned char)*a1++)
+              != std::tolower((unsigned char)*b1++) )
+        {
+        retval = false;
+        }
+      }
+    }
+  else
+    {
+    while( a1 < a2 && b1 < b2 )
+      {
+      if( ebcdic_lower[(unsigned int)(unsigned char)*a1++]
+                          != ebcdic_lower[(unsigned int)(unsigned char)*b1++] )
+        {
+        retval = false;
+        }
        }
      }
    return retval;
@@ -5353,13 +5398,14 @@ const char *
  strcasestr( const char *haystack,
              const char *haystack_e,
              const char *needle,
-            const char *needle_e)
+            const char *needle_e,
+            bool is_ebcdic)
    {
    const char *retval = NULL;
    const char *pend = haystack_e - (needle_e - needle);
    while( haystack <= pend )
      {
-    if(iscasematch(haystack, haystack_e, needle, needle_e))
+    if(iscasematch(haystack, haystack_e, needle, needle_e, is_ebcdic))
        {
        retval = haystack;
        break;
@@ -5394,13 +5440,14 @@ const char *
  strcaselaststr( const char *haystack,
                  const char *haystack_e,
                  const char *needle,
-                const char *needle_e)
+                const char *needle_e,
+                bool is_ebcdic)
    {
    const char *retval = NULL;
    const char *pend = haystack_e - (needle_e - needle);
    while( haystack <= pend )
      {
-    if(iscasematch(haystack, haystack_e, needle, needle_e))
+    if(iscasematch(haystack, haystack_e, needle, needle_e, is_ebcdic))
        {
        retval = haystack;
        }
@@ -5409,7 +5456,6 @@ strcaselaststr( const char *haystack,
    return retval;
    }
  
-
  extern "C"
  void
  __gg__substitute( cblc_field_t *dest,
@@ -5442,6 +5488,9 @@ __gg__substitute( cblc_field_t *dest,
    const char **pflasts = static_cast<const char **>(malloc(N * sizeof(char *)));
    massert(pflasts);
  
+  const charmap_t *charmap = __gg__get_charmap(arg1_f->encoding);
+  bool is_ebcdic = charmap->is_like_ebcdic();
+
    if( arg1_s == 0 )
      {
      exception_raise(ec_argument_function_e);
@@ -5462,14 +5511,16 @@ __gg__substitute( cblc_field_t *dest,
          pflasts[i] = strcasestr(haystack,
                                  haystack_e,
                                  PTRCAST(char, (arg2_f[i]->data+arg2_o[i])),
-                                PTRCAST(char, (arg2_f[i]->data+arg2_o[i])) + arg2_s[i]);
+                                PTRCAST(char, (arg2_f[i]->data+arg2_o[i])) + arg2_s[i],
+                                is_ebcdic);
          }
        else if( control[i] & substitute_last_e)
          {
          pflasts[i] = strcaselaststr(haystack,
                                  haystack_e,
                                  PTRCAST(char, (arg2_f[i]->data+arg2_o[i])),
-                                PTRCAST(char, (arg2_f[i]->data+arg2_o[i])) + arg2_s[i]);
+                                PTRCAST(char, (arg2_f[i]->data+arg2_o[i])) + arg2_s[i],
+                                is_ebcdic);
          }
        else
          {
@@ -5532,7 +5583,8 @@ __gg__substitute( cblc_field_t *dest,
                                                                   haystack,
                                                                   haystack_e,
                                                                   needle,
-                                                                 needle_e);
+                                                                 needle_e,
+                                                                 is_ebcdic);
          if( !matched )
            {
            matched = !(control[i] & substitute_anycase_e) && ismatch(haystack,
@@ -5622,11 +5674,11 @@ __gg__locale_compare( cblc_field_t *dest,
        }
      }
  
-  __gg__adjust_dest_size(dest, 1);
-  dest->data[0] = *achretval;
-  __gg__convert_encoding(PTRCAST(char, dest->data),
-                         DEFAULT_CHARMAP_SOURCE,
+  __gg__convert_encoding(achretval,
+                         DEFAULT_SOURCE_ENCODING,
                           dest->encoding);
+  memcpy(dest->data, achretval, strlen(achretval));
+  __gg__adjust_dest_size(dest, strlen(achretval));
    }
  
  extern "C"
@@ -5659,12 +5711,11 @@ __gg__locale_date(cblc_field_t *dest,
      strcpy(ach, nl_langinfo(D_FMT));
      strftime(ach, sizeof(ach), nl_langinfo(D_FMT), &tm);
      }
-
-  __gg__adjust_dest_size(dest, strlen(ach));
-  __gg__convert_encoding(PTRCAST(char, dest->data),
-                         DEFAULT_CHARMAP_SOURCE,
+  __gg__convert_encoding(ach,
+                         DEFAULT_SOURCE_ENCODING,
                           dest->encoding);
    memcpy(dest->data, ach, strlen(ach));
+  __gg__adjust_dest_size(dest, strlen(ach));
    }
  
  extern "C"
@@ -5698,11 +5749,11 @@ __gg__locale_time(cblc_field_t *dest,
      strftime(ach, sizeof(ach), nl_langinfo(T_FMT), &tm);
      }
  
-  __gg__adjust_dest_size(dest, strlen(ach));
-  __gg__convert_encoding(PTRCAST(char, dest->data),
-                         DEFAULT_CHARMAP_SOURCE,
+  __gg__convert_encoding(ach,
+                         DEFAULT_SOURCE_ENCODING,
                           dest->encoding);
    memcpy(dest->data, ach, strlen(ach));
+  __gg__adjust_dest_size(dest, strlen(ach));
    }
  
  extern "C"
@@ -5738,9 +5789,9 @@ __gg__locale_time_from_seconds( cblc_field_t *dest,
      strftime(ach, sizeof(ach), nl_langinfo(T_FMT), &tm);
      }
  
-  __gg__adjust_dest_size(dest, strlen(ach));
-  __gg__convert_encoding(PTRCAST(char, dest->data),
-                         DEFAULT_CHARMAP_SOURCE,
+  __gg__convert_encoding(ach,
+                         DEFAULT_SOURCE_ENCODING,
                           dest->encoding);
    memcpy(dest->data, ach, strlen(ach));
+  __gg__adjust_dest_size(dest, strlen(ach));
    }
diff --git a/libgcobol/libgcobol.cc b/libgcobol/libgcobol.cc

index 89153bbcca2f26783feae976c3ff76e2e87fd4f1..f587fbfa06f9ba67a5f28b17b6093b7324295e0a 100644 (file)
--- a/libgcobol/libgcobol.cc
+++ b/libgcobol/libgcobol.cc
@@ -289,6 +289,7 @@ class ec_status_t {
        case file_op_write: return "write";
        case file_op_rewrite: return "rewrite";
        case file_op_delete: return "delete";
+      case file_op_remove: return "remove";
        }
        return "???";
      }
@@ -1627,7 +1628,7 @@ int128_to_field(cblc_field_t   *var,
                                  var->picture);
                size_t outlength;
                const char *converted = __gg__iconverter(
-                                     DEFAULT_CHARMAP_SOURCE,
+                                     DEFAULT_SOURCE_ENCODING,
                                       var->encoding,
                                       PTRCAST(char, location),
                                       var->capacity,
@@ -2755,7 +2756,7 @@ __gg__dirty_to_float( const char *dirty,
    int delta_r = 0;
  
    // We now loop over the remaining input characters:
-  char ch = '\0';
+  unsigned char ch = '\0';
  
    charmap_t *charmap = __gg__get_charmap(field->encoding);
  
@@ -3055,7 +3056,7 @@ format_for_display_internal(char **dest,
          // This buffer is larger than can validly be needed
          unsigned char converted[128];
          size_t outlength;
-        retval = DEFAULT_CHARMAP_SOURCE;
+        retval = DEFAULT_SOURCE_ENCODING;
          const char *mapped = __gg__iconverter(
                                    var->encoding,
                                    retval,
@@ -3285,7 +3286,7 @@ format_for_display_internal(char **dest,
          }
  
        char ach[128];
-      retval = DEFAULT_CHARMAP_SOURCE;
+      retval = DEFAULT_SOURCE_ENCODING;
        charmap_t *charmap = __gg__get_charmap(retval);
  
        __gg__binary_to_string_ascii(ach, digits, value);
@@ -3724,7 +3725,13 @@ get_float128( const cblc_field_t *field,
      {
      if( __gg__decimal_point == '.' )
        {
-      retval = strtofp128(field->initial, NULL);
+      size_t charsout;
+      char *converted = __gg__iconverter(field->encoding,
+                                         DEFAULT_SOURCE_ENCODING,
+                                         field->initial,
+                                         strlen(field->initial),
+                                         &charsout);
+      retval = strtofp128(converted, NULL);
        }
      else
        {
@@ -3954,7 +3961,7 @@ compare_field_class(const cblc_field_t  *conditional,
          walker = right + right_len;
  
          GCOB_FP128 left_value;
-        if( left_flag == 'F' && left[0] == 'Z' )
+        if( left_flag == ascii_F && left[0] == ascii_Z )
            {
            left_value = 0;
            }
@@ -4375,9 +4382,11 @@ __gg__compare_2(cblc_field_t *left_side,
              }
            massert(buffer);
            strcpy(buffer, right_side->initial);
+
            if( __gg__decimal_point == ',' )
              {
-            // We need to replace any commas with periods
+            // We are operating in DECIMAL IS COMMA mode, so we need to
+            // replace any commas with periods.
              char *p = strchr(buffer, ',');
              if(p)
                {
@@ -4385,8 +4394,9 @@ __gg__compare_2(cblc_field_t *left_side,
                }
              }
  
-          // buffer[] now contains the string we want to convert
-
+          // buffer[] now contains the right-side string we want to convert
+          // to one of the floating-point types.  We want them to be the
+          // same size:
            switch(left_side->capacity)
              {
              case 4:
@@ -4970,7 +4980,7 @@ init_var_both(cblc_field_t  *var,
              {
              strcpy(first, walker);
              __gg__convert_encoding( first,
-                                    DEFAULT_CHARMAP_SOURCE,
+                                    DEFAULT_SOURCE_ENCODING,
                                      var->encoding);
              }
            walker += strlen(first) + 1;
@@ -4987,7 +4997,7 @@ init_var_both(cblc_field_t  *var,
            else
              {
             __gg__convert_encoding( last,
-                                   DEFAULT_CHARMAP_SOURCE,
+                                   DEFAULT_SOURCE_ENCODING,
                                     var->encoding);
              }
            walker += strlen(last) + 1;
@@ -6234,7 +6244,7 @@ __gg__move( cblc_field_t        *fdest,
              // ascii:
              size_t charsout;
              const char *converted = __gg__iconverter(fsource->encoding,
-                                                     DEFAULT_CHARMAP_SOURCE,
+                                                     DEFAULT_SOURCE_ENCODING,
                                      PTRCAST(char, fsource->data+source_offset),
                                                       source_size,
                                                       &charsout);
@@ -9142,11 +9152,17 @@ display_both(cblc_field_t  *field,
        }
      }
  
+  size_t conversion_length = strlen(display_string);
+  if( charmap->stride() != 1 )
+    {
+    conversion_length = qual_size;
+    }
+
    size_t outlength;
    const char *converted = __gg__iconverter( encoding,
                                              encout,
                                              display_string,
-                                            strlen(display_string),
+                                            conversion_length,
                                              &outlength);
    write(file_descriptor,
          converted,
@@ -10059,7 +10075,7 @@ is_numeric_display_numeric(cblc_field_t *field, size_t offset, size_t size)
        digits_e -= 1;
        unsigned char final_char = (unsigned char)*digits_e;
        final_char = charmap->set_digit_negative(final_char, false);
-      if(   final_char<charmap->mapped_character(ascii_0) 
+      if(   final_char<charmap->mapped_character(ascii_0)
           || final_char>charmap->mapped_character(ascii_9) )
          {
          retval = 0;
@@ -10420,14 +10436,14 @@ accept_envar( cblc_field_t  *tgt,
      // Convert the name to the console codeset:
      __gg__convert_encoding( trimmed_env,
                              encoding,
-                            DEFAULT_CHARMAP_SOURCE);
+                            DEFAULT_SOURCE_ENCODING);
  
      // Pick up the environment variable, and convert it to the internal codeset
      const char *p = getenv(trimmed_env);
      if(p)
        {
        retval = 0; // Okay
-      move_string(tgt, tgt_offset, tgt_length, p, DEFAULT_CHARMAP_SOURCE);
+      move_string(tgt, tgt_offset, tgt_length, p, DEFAULT_SOURCE_ENCODING);
        }
      free(env);
      }
@@ -10638,7 +10654,7 @@ __gg__get_argv( cblc_field_t *dest,
                  dest_offset,
                  dest_length,
                  stashed_argv[N],
-                DEFAULT_CHARMAP_SOURCE);
+                DEFAULT_SOURCE_ENCODING);
      retcode = 0;  // Okay
      }
    return retcode;
@@ -11381,7 +11397,7 @@ __gg__unstring( const cblc_field_t *id1,        // The string being unstring
                      id5_o[nreceiver],
                      id5_s[nreceiver],
                      "",
-                    DEFAULT_CHARMAP_SOURCE);
+                    DEFAULT_SOURCE_ENCODING);
          }
        }
  
@@ -11768,6 +11784,7 @@ __gg__check_fatal_exception()
      case file_op_write:
      case file_op_rewrite:
      case file_op_delete:
+    case file_op_remove:
        break;
      }
    } else {
@@ -12039,6 +12056,23 @@ __gg__adjust_dest_size(cblc_field_t *dest, size_t ncount)
      }
    }
  
+extern "C"
+void
+__gg__adjust_encoding(cblc_field_t *field)
+  {
+  // Assume that field->data is in ASCII;  We need to convert it to the target
+  size_t nbytes;
+  const char *converted = __gg__iconverter(DEFAULT_SOURCE_ENCODING,
+                                           field->encoding,
+                                           PTRCAST(char, field->data),
+                                           field->capacity,
+                                           &nbytes);
+  size_t tocopy = std::min(nbytes, field->allocated);
+  field->capacity = tocopy;
+  memcpy(field->data, converted, tocopy);
+  }
+
+
  extern "C"
  void
  __gg__func_exception_location(cblc_field_t *dest)
@@ -12088,6 +12122,7 @@ __gg__func_exception_location(cblc_field_t *dest)
      }
    __gg__adjust_dest_size(dest, strlen(ach));
    memcpy(dest->data, ach, strlen(ach));
+  __gg__adjust_encoding(dest);
    }
  
  extern "C"
@@ -12102,6 +12137,7 @@ __gg__func_exception_statement(cblc_field_t *dest)
      }
    __gg__adjust_dest_size(dest, strlen(ach));
    memcpy(dest->data, ach, strlen(ach));
+  __gg__adjust_encoding(dest);
    }
  
  extern "C"
@@ -12128,6 +12164,7 @@ __gg__func_exception_status(cblc_field_t *dest)
      }
    __gg__adjust_dest_size(dest, strlen(ach));
    memcpy(dest->data, ach, strlen(ach));
+  __gg__adjust_encoding(dest);
    }
  
  extern "C"
@@ -12195,6 +12232,7 @@ __gg__func_exception_file(cblc_field_t      *dest,
  
    __gg__adjust_dest_size(dest, strlen(ach));
    memcpy(dest->data, ach, strlen(ach));
+  __gg__adjust_encoding(dest);
    }
  
  extern "C"
@@ -12693,7 +12731,7 @@ __gg__just_mangle_name( const cblc_field_t  *field,
    // We need ach_name to be in ASCII:
    size_t charsout;
    const char *converted = __gg__iconverter(field->encoding,
-                                           DEFAULT_CHARMAP_SOURCE,
+                                           DEFAULT_SOURCE_ENCODING,
                                             PTRCAST(char, field->data),
                                             length,
                                             &charsout);
@@ -12784,7 +12822,7 @@ __gg__function_handle_from_name(int                 program_id,
  
    size_t charsout;
    const char *converted = __gg__iconverter(field->encoding,
-                                           DEFAULT_CHARMAP_SOURCE,
+                                           DEFAULT_SOURCE_ENCODING,
                                             PTRCAST(char, field->data + offset),
                                             length,
                                             &charsout);
@@ -13093,6 +13131,7 @@ __gg__deallocate( cblc_field_t *target,
  static int
  get_the_byte(cblc_field_t *field)
    {
+  // This is a helper routine for ALLOCATE
    int retval = -1;
    if( field )
      {
@@ -13100,7 +13139,14 @@ get_the_byte(cblc_field_t *field)
      retval = __gg__fc_char(field);
      if(retval == -1)
        {
-      retval = (int)__gg__get_integer_binary_value(field);
+      retval = (int)(unsigned char)__gg__get_integer_binary_value(field);
+      }
+    else
+      {
+      // This is a bit of a hack.  It turns out the figurative constant is
+      // encoded in ASCII.  We need it to be in the current DISPLAY encoding.
+      charmap_t *charmap = __gg__get_charmap(__gg__display_encoding);
+      retval = charmap->mapped_character(retval);
        }
      }
    return retval;
@@ -13373,6 +13419,7 @@ __gg__module_name(cblc_field_t *dest, module_type_t type)
  
    __gg__adjust_dest_size(dest, strlen(result));
    memcpy(dest->data, result, strlen(result)+1);
+  __gg__adjust_encoding(dest);
    }
  
  /*
@@ -13652,7 +13699,7 @@ __gg__accept_arg_value( cblc_field_t *dest,
                  dest_offset,
                  dest_length,
                  stashed_argv[sv_argument_number],
-                DEFAULT_CHARMAP_SOURCE);
+                DEFAULT_SOURCE_ENCODING);
      retcode = 0;  // Okay
  
      // The Fujitsu spec says bump this value by one.
diff --git a/libgcobol/xmlparse.cc b/libgcobol/xmlparse.cc

index 54b9f02ca37c9f35f8cc3b611486396d39dea6ff..d89d48022c076e641448cc8dd6001c142d34b80b 100644 (file)
--- a/libgcobol/xmlparse.cc
+++ b/libgcobol/xmlparse.cc
@@ -408,6 +408,7 @@ static void fatalError(void * CTX, const char * msg, ...)
  }
  
  #if 0
+
  static xmlEntityPtr getEntity(void * CTX,
                                const xmlChar * name)
  { SAYSO_DATAZ(name); }
@@ -484,6 +485,7 @@ static void setDocumentLocator(void * CTX,
   * xmlCtxtGetStandalone() to get data from the XML declaration.
   */
  static void startDocument(void * CTX)
+
  {
    SAYSO();
  }
@@ -616,7 +618,6 @@ xmlchar_of( const char input[] ) {
  static const char *
  xmlParserErrors_str( xmlParserErrors erc, const char name[] ) {
    const char *msg = "???";
-
    switch( erc ) {
    case XML_ERR_OK:
      msg = "Success";
@@ -630,6 +631,7 @@ xmlParserErrors_str( xmlParserErrors erc, const char name[] ) {
    case XML_ERR_UNSUPPORTED_ENCODING:
      msg = "Unsupported character encoding";
      break;
+
  #if LIBXML_VERSION >= 21400
    case XML_ERR_RESOURCE_LIMIT:
      msg = "Internal resource limit like maximum amplification factor exceeded";
@@ -710,6 +712,7 @@ static class context_t {
      }
    }
  
+
   protected:
    void init() {
      const char *external_entities = nullptr;
author	Robert Dubner <rdubner@symas.com>
	Thu, 6 Nov 2025 12:26:18 +0000 (07:26 -0500)
committer	Robert Dubner <rdubner@symas.com>
	Thu, 6 Nov 2025 13:57:54 +0000 (08:57 -0500)
gcc/cobol/Make-lang.in		patch \| blob \| blame \| history
gcc/cobol/cdf.y		patch \| blob \| blame \| history
gcc/cobol/cobol1.cc		patch \| blob \| blame \| history
gcc/cobol/genapi.cc		patch \| blob \| blame \| history
gcc/cobol/genapi.h		patch \| blob \| blame \| history
gcc/cobol/genmath.cc		patch \| blob \| blame \| history
gcc/cobol/genutil.cc		patch \| blob \| blame \| history
gcc/cobol/parse.y		patch \| blob \| blame \| history
gcc/cobol/parse_ante.h		patch \| blob \| blame \| history
gcc/cobol/scan.l		patch \| blob \| blame \| history
gcc/cobol/show_parse.h		patch \| blob \| blame \| history
gcc/cobol/symbols.cc		patch \| blob \| blame \| history
gcc/cobol/symbols.h		patch \| blob \| blame \| history
gcc/cobol/util.cc		patch \| blob \| blame \| history
libgcobol/charmaps.cc		patch \| blob \| blame \| history
libgcobol/charmaps.h		patch \| blob \| blame \| history
libgcobol/encodings.h		patch \| blob \| blame \| history
libgcobol/gcobolio.h		patch \| blob \| blame \| history
libgcobol/gfileio.cc		patch \| blob \| blame \| history
libgcobol/intrinsic.cc		patch \| blob \| blame \| history
libgcobol/libgcobol.cc		patch \| blob \| blame \| history
libgcobol/xmlparse.cc		patch \| blob \| blame \| history