]> git.ipfire.org Git - thirdparty/gcc.git/commitdiff
cobol: Bring TRIM into accordance with the COBOL specification.
authorRobert Dubner <rdubner@symas.com>
Thu, 4 Jun 2026 17:53:14 +0000 (13:53 -0400)
committerRobert Dubner <rdubner@symas.com>
Thu, 4 Jun 2026 19:10:02 +0000 (15:10 -0400)
The COBOL TRIM function can take optional additional characters to be
trimmed off the ends of a string, rather than just the default, which
trims away spaces.  These changes implement that functionality.

gcc/cobol/ChangeLog:

* genapi.cc (handle_gg_trim): Provide an array of characters to the
library function.
(parser_trim): New function that handles the optional trim chars.
(parser_intrinsic_call_2): Remove __gg__handling from this routine.
* genapi.h (parser_trim): New declaration.
* parse.y: Calls parser_trim() with the list of trim chars.

libgcobol/ChangeLog:

* intrinsic.cc (__gg__formatted_date): Formatting change.
(change_case): Likewise.
(__gg__trim): Rename as __gg__trim_1.
(__gg__trim_1): Reworked to handle an array of single-byte chars.
(__gg__trim_a): Reworked to handle an array of multi-byte chars.
(iscasematch): Formatting change.

gcc/testsuite/ChangeLog:

* cobol.dg/group2/FUNCTION_TRIM_with_extra_characters.cob: New test.
* cobol.dg/group2/FUNCTION_TRIM_with_extra_characters.out: New test.

gcc/cobol/genapi.cc
gcc/cobol/genapi.h
gcc/cobol/parse.y
gcc/testsuite/cobol.dg/group2/FUNCTION_TRIM_with_extra_characters.cob [new file with mode: 0644]
gcc/testsuite/cobol.dg/group2/FUNCTION_TRIM_with_extra_characters.out [new file with mode: 0644]
libgcobol/intrinsic.cc

index ecc37031a00fc3409b6390d73843a368e33598f0..d75d39a76ad1d45fd0542b580fb0cd3011d1c148 100644 (file)
@@ -11394,29 +11394,51 @@ parser_intrinsic_call_1( cbl_field_t *tgt,
 
 static bool
 handle_gg_trim(cbl_field_t *tgt,
-               const char function_name[],
-         const cbl_refer_t& ref1,
-         const cbl_refer_t& ref2 )
+               const cbl_refer_t& input,
+               size_t how,
+               const std::vector<cbl_refer_t>& args )
   {
   bool handled = false;
-  if( strcmp(function_name, "__gg__trim") == 0 )
+  charmap_t *charmap = __gg__get_charmap(input.field->codeset.encoding);
     {
-    charmap_t *charmap = __gg__get_charmap(ref1.field->codeset.encoding);
-    if(charmap->stride() == 1)
+    if(charmap->stride() == 1 && !charmap->is_like_utf8() )
       {
-      uint8_t space = charmap->mapped_character(ascii_space);
-      int how = atoi(ref2.field->data.original());
-      if( how == 0 )
+      size_t array_size = args.size();
+      tree charstype = build_array_type_nelts(UCHAR, array_size);
+      tree chars     = gg_define_variable( charstype,
+                                           NULL,
+                                           vs_stack);
+      TREE_ADDRESSABLE (chars) = 1;
+      tree char_p    = gg_define_variable(UCHAR_P);
+      gg_assign(char_p, gg_pointer_to_array (chars));
+
+      for(const auto& arg : args)
         {
-        how = 3;
+        cbl_figconst_t figconst = static_cast<cbl_figconst_t>
+                                            (arg.field->attr & FIGCONST_MASK);
+        if( figconst )
+          {
+          cbl_char_t figcst = charmap->figconst_character(figconst);
+          tree tfigcst = build_int_cst_type(UCHAR, figcst);
+          gg_assign(gg_indirect(char_p), tfigcst);
+          }
+        else
+          {
+          tree location;
+          get_location(location, arg);
+          gg_assign(gg_indirect(char_p), gg_indirect(location));
+          }
+        gg_increment(char_p);
         }
+
       gg_call(VOID,
               "__gg__trim_1",
               gg_get_address_of(tgt->var_decl_node),
-              gg_get_address_of(ref1.field->var_decl_node),
-              refer_offset(ref1),
-              refer_size_source(ref1),
-              build_int_cst_type(INT, (space<<8) + how),
+              gg_get_address_of(input.field->var_decl_node),
+              refer_offset(input),
+              refer_size_source(input),
+              gg_pointer_to_array(chars),
+              build_int_cst_type(INT, (args.size()<<8) + how),
               NULL_TREE);
       handled = true;
       }
@@ -11424,6 +11446,66 @@ handle_gg_trim(cbl_field_t *tgt,
   return handled;
   }
 
+void
+parser_trim( cbl_field_t *tgt,
+             const cbl_refer_t& input,
+             size_t how,
+             const std::vector<cbl_refer_t>& args )
+  {
+  RETURN_IF_PARSE_ONLY;
+  gcc_assert(how >= 1 && how <= 3);
+  if( !handle_gg_trim(tgt, input, how, args) )
+    {
+    cbl_encoding_t encoding = input.field->codeset.encoding;
+    charmap_t *charmap = __gg__get_charmap(encoding);
+    int stride = charmap->stride();
+    tree tstride = build_int_cst_type(SIZE_T, stride);
+
+    size_t array_size = args.size() * stride;
+    tree charstype = build_array_type_nelts(CHAR, array_size);
+    tree chars     = gg_define_variable( charstype,
+                                         NULL,
+                                         vs_stack);
+    TREE_ADDRESSABLE (chars) = 1;
+    tree char_p    = gg_define_variable(CHAR_P);
+    gg_assign(char_p, gg_pointer_to_array (chars));
+
+    for(const auto& arg : args)
+      {
+      cbl_figconst_t figconst = static_cast<cbl_figconst_t>
+                                          (arg.field->attr & FIGCONST_MASK);
+      if( figconst )
+        {
+        cbl_char_t figcst = charmap->figconst_character(figconst);
+        tree tfigcst = build_int_cst_type(ULONG, figcst);
+
+        gg_memcpy(char_p,
+                  gg_get_address_of(tfigcst),
+                  tstride );
+        }
+      else
+        {
+        tree location;
+        get_location(location, arg);
+        gg_memcpy(char_p,
+                  location,
+                  tstride);
+        }
+      gg_assign(char_p, gg_add(char_p, tstride));
+      }
+    gg_call(VOID,
+            "__gg__trim_a",
+            gg_get_address_of(tgt->var_decl_node),
+            gg_get_address_of(input.field->var_decl_node),
+            refer_offset(input),
+            refer_size_source(input),
+            gg_pointer_to_array (chars),
+            build_int_cst_type(SIZE_T, array_size),
+            build_int_cst_type(INT, how),
+            NULL_TREE);
+    }
+  }
+
 void
 parser_intrinsic_call_2( cbl_field_t *tgt,
                        const char function_name[],
@@ -11451,24 +11533,18 @@ parser_intrinsic_call_2( cbl_field_t *tgt,
     }
   store_location_stuff(function_name);
 
-  if( handle_gg_trim(tgt, function_name, ref1, ref2) )
-    {
-    // The specialty routine did the job
-    }
-  else
-    {
-    gg_call(VOID,
-            function_name,
-            gg_get_address_of(tgt->var_decl_node),
-            gg_get_address_of(ref1.field->var_decl_node),
-            refer_offset(ref1),
-            refer_size_source(ref1),
-            ref2.field ? gg_get_address_of(ref2.field->var_decl_node)
-                       : null_pointer_node,
-            refer_offset(ref2),
-            refer_size_source(ref2),
-            NULL_TREE);
-    }
+  gg_call(VOID,
+          function_name,
+          gg_get_address_of(tgt->var_decl_node),
+          gg_get_address_of(ref1.field->var_decl_node),
+          refer_offset(ref1),
+          refer_size_source(ref1),
+          ref2.field ? gg_get_address_of(ref2.field->var_decl_node)
+                     : null_pointer_node,
+          refer_offset(ref2),
+          refer_size_source(ref2),
+          NULL_TREE);
+
   TRACE1
     {
     TRACE1_INDENT
index b4761c3bf98f0947733d554bfdee9e30d2feb906..479ecc22ce347347caa960b0de3f07f784241b0e 100644 (file)
@@ -618,4 +618,7 @@ void parser_statement_end( const std::list<cbl_field_t*>& );
 tree parser_compile_ecs( const std::vector<uint64_t>& ecs );
 tree parser_compile_dcls( const std::vector<uint64_t>& dcls );
 
+void parser_trim( cbl_field_t *tgt, const cbl_refer_t& input,
+                  size_t how, const std::vector<cbl_refer_t>& args );
+
 #endif
index 804bf28363f96b593f902a7c56001ac600421b71..dfd5bbf0a903aa8d651ab41407da21bb725aae4f 100644 (file)
@@ -749,7 +749,7 @@ class locale_tgt_t {
                        perform_inline perform_except
 
 %type   <refer>         eval_subject1
-%type   <vargs>         vargs disp_vargs
+%type   <vargs>         vargs disp_vargs trim_expr
 %type   <field>         level_name
 %type   <number>        fd_name
 %type   <string>        picture_sym name66 paragraph_name
@@ -783,7 +783,7 @@ class locale_tgt_t {
 %type   <refer>         move_tgt selected_name read_key read_into vary_by
 %type   <refer>         num_operand envar search_expr any_arg
 %type   <accept_func>  accept_body
-%type   <refers>        subscript_exprs subscripts arg_list free_tgts
+%type   <refers>        subscript_exprs subscripts arg_list free_tgts 
 %type   <targets>       move_tgts set_tgts
 %type   <field>         search_varying
 %type   <field>         search_term search_terms
@@ -820,8 +820,8 @@ class locale_tgt_t {
 %type   <number>        intrinsic_v intrinsic_I intrinsic_N intrinsic_X
 %type   <number>        intrinsic_I2 intrinsic_N2 intrinsic_X2
 %type   <number>        lopper_case 
-%type   <number>        return_body return_file
-%type   <field>         trim_trailing function_udf
+%type   <number>        return_body return_file trim_trailing 
+%type   <field>         function_udf
 
 %type   <refer>         str_input str_size
 %type   <refer2>        str_into
@@ -11469,7 +11469,11 @@ intrinsic:      function_udf
                   error_msg(@error, "invalid TRIM argument");
                   YYERROR;
                 }
-        |       TRIM '(' expr[r1] trim_trailing ')'
+                /*
+                 * TRIM (arg-1 arg-2a arg-2b) is the same as 
+                 * TRIM (TRIM (arg-1 arg-2a) arg-2b).
+                 */
+        |       TRIM '(' expr[r1] trim_trailing[how] trim_expr[args2] ')'
                 {
                   location_set(@1);
                    switch( $r1->field->type ) {
@@ -11482,6 +11486,8 @@ intrinsic:      function_udf
                    default:
                      // BLANK WHEN ZERO implies numeric-edited, so OK
                      if( $r1->field->has_attr(blank_zero_e) ) {
+                       dbgmsg("logic error: must be numeric-edited");
+                       gcc_unreachable();
                        break;
                      }
                      error_msg(@r1, "TRIM argument must be alphanumeric");
@@ -11489,9 +11495,10 @@ intrinsic:      function_udf
                      break;
                   }
                   $$ = new_alphanumeric("TRIM", $r1->field->codeset.encoding);
-                  cbl_refer_t * how = new_reference($trim_trailing);
-                  if( ! intrinsic_call_2($$, TRIM, $r1, how) ) YYERROR;
-                }
+                  std::vector<cbl_refer_t> args($args2->args.begin(),
+                                                $args2->args.end());
+                  parser_trim($$, *$r1, $how, args);
+                }  
 
         |       USUBSTR '(' alpha_val[r1] expr[r2] expr[r3]  ')' {
                   location_set(@1);
@@ -11811,11 +11818,37 @@ lopper_case:    LOWER_CASE      { $$ = LOWER_CASE; }
         |       UPPER_CASE      { $$ = UPPER_CASE; }
                 ;
 
-trim_trailing:  %empty          { $$ = new_constant("0"); }  // Remove both
-        |       LEADING         { $$ = new_constant("1"); }  // Remove leading  spaces
-        |       TRAILING        { $$ = new_constant("2"); }  // Remove trailing spaces
+trim_trailing:  %empty          { $$ = 3; }  // Remove both
+        |       LEADING         { $$ = 1; }  // Remove leading  spaces
+        |       TRAILING        { $$ = 2; }  // Remove trailing spaces
         ;
 
+trim_expr:      %empty {
+                  cbl_field_t *space = constant_of(constant_index(SPACES));
+                  $$ = new vargs_t( new_reference(space) );
+                }
+        |       vargs {
+                  $$ = new vargs_t;
+                  std::copy_if( $vargs->args.begin(),
+                                $vargs->args.end(),
+                                std::back_inserter($$->args), 
+                                []( const auto& arg ) {
+                     bool is_alpha =
+                       arg.field->type == FldAlphanumeric ||
+                       arg.field->type == FldLiteralA;
+                    if( arg.addr_of || ! is_alpha ) {
+                      error_msg(arg.loc, "invalid TRIM character");
+                      return false;
+                    }
+                    if( arg.field->char_capacity() != 1 ) {
+                      error_msg(arg.loc, "TRIM argument may be only 1 character");
+                      return false;
+                    }
+                    return true;
+                  } );
+                }
+                ;
+
 intrinsic0:     CURRENT_DATE {
                   location_set(@1);
                   $$ = new_alphanumeric("CURRENT-DATE");
diff --git a/gcc/testsuite/cobol.dg/group2/FUNCTION_TRIM_with_extra_characters.cob b/gcc/testsuite/cobol.dg/group2/FUNCTION_TRIM_with_extra_characters.cob
new file mode 100644 (file)
index 0000000..5cca2ce
--- /dev/null
@@ -0,0 +1,22 @@
+       *> { dg-do run }
+       *> { dg-output-file "group2/FUNCTION_TRIM_with_extra_characters.out" }
+        identification              division.
+        program-id.                 prog.
+        data                        division.
+        working-storage             section.
+        77 foo pic x(24).
+        77 b pic x value 'b'.
+        procedure                   division.
+            move '  """000Bobaaabbbccc     ' to foo
+            display ''''
+                 function trim(foo space quote zero 'c' b 'a')
+                    ''''
+            display ''''
+                 function trim(foo leading space quote zero 'c' b 'a')
+                    ''''
+            display ''''
+                 function trim(foo trailing space quote zero 'c' b 'a')
+                    ''''
+            goback.
+        end program                 prog.
+
diff --git a/gcc/testsuite/cobol.dg/group2/FUNCTION_TRIM_with_extra_characters.out b/gcc/testsuite/cobol.dg/group2/FUNCTION_TRIM_with_extra_characters.out
new file mode 100644 (file)
index 0000000..407c535
--- /dev/null
@@ -0,0 +1,4 @@
+'Bob'
+'Bobaaabbbccc    '
+'  """000Bob'
+
index dce815acc85526db0ca0978dc9adcc28f91a2868..b62518f5b47705b584106ab55588aade0c17fc06 100644 (file)
@@ -1607,7 +1607,7 @@ __gg__formatted_date(cblc_field_t *dest, // Destination string
                      size_t arg1_offset,
                      size_t arg1_size,
                const cblc_field_t *arg2, // integer date
-                     size_t arg2_offset,  
+                     size_t arg2_offset,
                      size_t arg2_size)
   {
   // FUNCTION FORMATTED-DATE
@@ -2117,7 +2117,7 @@ change_case( cblc_field_t *dest,
                                            &converted_bytes);
   // Make a copy of it to prevent the static nature of iconverter from causing
   // trouble:
-  cbl_char_t *duped = 
+  cbl_char_t *duped =
           static_cast<cbl_char_t *>(__gg__memdup(converted, converted_bytes));
   cbl_char_t *pend = duped + converted_bytes / width_of_utf32;
 
@@ -3509,78 +3509,131 @@ __gg__rem(cblc_field_t *dest,
 
 extern "C"
 void
-__gg__trim( cblc_field_t *dest,
-      const cblc_field_t *arg1,
+__gg__trim_1( cblc_field_t *dest,
+        const cblc_field_t *src,
+              size_t        src_offset,
+              size_t        src_size,
+              unsigned char *chars,
+              int           count_how ) // (args.size()<<8) + how),
+  {
+  /* This routine is called for ASCII/EBCDIC single-byte-code values. Since
+     we know that, we can use the fast std::find_if() routine.  */
+
+  #define LEADING  1  // Remove leading  spaces
+  #define TRAILING 2  // Remove trailing spaces
+
+  const uint8_t *left  = src->data + src_offset;    // Leftmost  character
+  const uint8_t *right = left      + src_size;      // One past the end
+
+  int count = count_how >> 8;
+  for(int i=0; i<count; i++)
+    {
+    uint8_t trimch = chars[i];
+
+    if ((count_how & LEADING) && left < right)
+      {
+      left = std::find_if(left,
+                          right,
+                          [trimch](uint8_t c){return c != trimch;});
+      }
+
+    if( (count_how & TRAILING) && left < right)
+      {
+      right = std::find_if(
+          std::make_reverse_iterator(right),
+          std::make_reverse_iterator(left),
+          [trimch](uint8_t c) {return c != trimch;}
+      ).base();
+      }
+    }
+
+  size_t bytes_converted = right - left;
+  __gg__adjust_dest_size(dest, bytes_converted);
+
+  memcpy(dest->data,
+         left,
+         bytes_converted);
+
+  return;
+  }
+
+extern "C"
+void
+__gg__trim_a( cblc_field_t *dest,
+      const cblc_field_t *arg1,        // This is the string to be trimmed
             size_t        arg1_offset,
             size_t        arg1_size,
-      const cblc_field_t *arg2,
-            size_t        arg2_offset,
-            size_t        arg2_size)
+            char         *arg2,        // This is the string of characters
+            size_t        arg2_size,   // to be removed
+            int type)
   {
+  #define LEADING  1  // Remove leading  characters
+  #define TRAILING 2  // Remove trailing trailing characters
+
   // We assume that dest is an intermediate_e with the same encoding as arg1.
-  assert(     dest->type == FldAlphanumeric 
+  assert(     dest->type == FldAlphanumeric
           && (dest->attr & intermediate_e)
           &&  dest->encoding == arg1->encoding );
-  charmap_t *charmap = __gg__get_charmap(arg1->encoding);
+  const charmap_t *charmap = __gg__get_charmap(arg1->encoding);
   int stride = charmap->stride();
-  cbl_char_t mapped_space = charmap->mapped_character(ascii_space);
-
-  int rdigits;
-  __int128 type = __gg__binary_value_from_qualified_field(&rdigits,
-                                                          arg2,
-                                                          arg2_offset,
-                                                          arg2_size);
-  #define LEADING  1  // Remove leading  spaces
-  #define TRAILING 2  // Remove trailing spaces
 
+  char *strippers = arg2;
+  const char *strip_end = arg2 + arg2_size;
   char *left  = reinterpret_cast<char *>(arg1->data) + arg1_offset;
   char *right = left + arg1_size-stride; // Points AT the character, not beyond
-  switch(type)
+
+  while( strippers < strip_end )
     {
-    case 0: // Strip off leading and trailing spaces
-      while(left <= right)
-        {
-        if( charmap->getch(left, (size_t)0) != mapped_space )
+    cbl_char_t stripper = charmap->getch(strippers, (size_t)0);
+
+    switch(type)
+      {
+      case 3: // Strip off leading and trailing spaces
+        while(left <= right)
           {
-          break;
+          if( charmap->getch(left, (size_t)0) != stripper )
+            {
+            break;
+            }
+          left += stride;
           }
-        left += stride;
-        }
-      while(left <= right)
-        {
-        if( charmap->getch(right, (size_t)0) != mapped_space )
+        while(left <= right)
           {
-          break;
+          if( charmap->getch(right, (size_t)0) != stripper )
+            {
+            break;
+            }
+          right -= stride;
           }
-        right -= stride;
-        }
-      break;
-    
-    case LEADING: // Just leading
-      {
-      while(left <= right)
+        break;
+
+      case LEADING: // Just leading
         {
-        if( charmap->getch(left,  (size_t)0) != mapped_space )
+        while(left <= right)
           {
-          break;
+          if( charmap->getch(left,  (size_t)0) != stripper )
+            {
+            break;
+            }
+          left += stride;
           }
-        left += stride;
+        break;
         }
-      break;
-      }
 
-    case TRAILING: // Just trailing
-      {
-      while(left <= right)
+      case TRAILING: // Just trailing
         {
-        if( charmap->getch(right,  (size_t)0) != mapped_space )
+        while(left <= right)
           {
-          break;
+          if( charmap->getch(right,  (size_t)0) != stripper )
+            {
+            break;
+            }
+          right -= stride;
           }
-        right -= stride;
+        break;
         }
-      break;
       }
+    strippers += stride;
     }
   size_t ncount = right+stride - left;
   __gg__adjust_dest_size(dest, ncount);
@@ -5353,7 +5406,7 @@ ismatch(const char *a1, const char *a2, const char *b1, const char *b2)
   }
 
 static bool
-iscasematch(const char *a1, const char *a2, 
+iscasematch(const char *a1, const char *a2,
             const char *b1, const char *b2,
             bool is_ebcdic)
   {
@@ -5841,55 +5894,3 @@ __gg__locale_time_from_seconds( cblc_field_t *dest,
   }
 
 
-extern "C"
-void
-__gg__trim_1( cblc_field_t *dest,
-        const cblc_field_t *src,
-              size_t        src_offset,
-              size_t        src_size,
-              int           space_how ) // (space<<8) + how
-  {
-  // This is the no-holds-barred, do-it-as-fast-as-possible, TRIM routine. It
-  // gets called only when the stride is 1.  Spaces is eight bytes of the
-  // character to be trimmed away, usually 0x2020202020202020 because we are
-  // usually working in ASCII.  'how' indicates LEADING and TRAILING.
-
-  #define LEADING  1  // Remove leading  spaces
-  #define TRAILING 2  // Remove trailing spaces
-
-  const uint8_t *left  = src->data + src_offset;    // Leftmost  character
-  const uint8_t *right = left      + src_size;      // One past the end
-
-  uint8_t space = space_how >> 8;
-
-  if ((space_how & LEADING) && left < right)
-    {
-    left = std::find_if(left, right, [space](uint8_t c)
-      {
-          return c != space;
-      });
-    }
-
-  if( (space_how & TRAILING) && left < right)
-    {
-    right = std::find_if(
-        std::make_reverse_iterator(right),
-        std::make_reverse_iterator(left),
-        [space](uint8_t c) { return c != space; }
-    ).base();
-    }
-
-  size_t bytes_converted = right - left;
-  __gg__adjust_dest_size(dest, bytes_converted);
-
-#if 0
-  __gg__field_from_string(dest, 0, dest->capacity, reinterpret_cast<const char *>(left), bytes_converted);
-#else
-  memcpy(dest->data,
-         left,
-         bytes_converted);
-#endif
-
-  return;
-  }
-