cobol_warning(SynFileCodeSet, file_code_set, warning_as_error);
return true;
+ case OPT_Wrecording_mode:
+ cobol_warning(SynRecordingMode, recording_mode, warning_as_error);
+ return true;
+
case OPT_Wset_locale_to:
cobol_warning(SynSetLocaleTo, set_locale_to, warning_as_error);
return true;
<number>
.It
.Sy VOLATILE
+.It
+Per-program Registers
+.Bl -tag -compact -width XML-NNAMESPACE-PREFIX
+.\" .It Sy JSON-CODE
+.\" S9(9)
+.\" .It Sy JSON-STATUS
+.\" S9(9)
+.It Sy RETURN-CODE
+S9(4)
+.It Sy SORT-CONTROL
+X(160)
+.It Sy SORT-CORE-SIZE
+S9(8)
+.It Sy SORT-FILE-SIZE
+S9(8)
+.It Sy SORT-MESSAGE
+X(8)
+.It Sy SORT-MODE-SIZE
+S9(5)
+.It Sy SORT-RETURN
+S9(4)
+.It Sy TALLY
+9(5)
+.It Sy WHEN-COMPILED
+X(16)
+.It Sy XML-CODE
+S9(9)
+.It Sy XML-EVENT
+X(30)
+.It Sy XML-INFORMATION
+S9(9)
+.It Sy XML-NAMESPACE
+X(0) to X(32,768)
+.It Sy XML-NNAMESPACE
+N(0) to N(16,384)
+.It Sy XML-NAMESPACE-PREFIX
+X(0) to X(4,096)
+.It Sy XML-NNAMESPACE-PREFIX
+N(0) to N(2,048)
+.It Sy XML-NTEXT
+N(0) to N(2,000,000)
+.It Sy XML-TEXT
+X(0) to X(2,147,483,646)
+.El
.El
.It gnu
to indicate GnuCOBOL syntax, generally compatible with MicroFocus.
static tree label_list_back_goto;
static tree label_list_back_label;
+#ifdef ENABLE_HIJACKING
+#pragma message "HIJACKING IS ENABLED - It should be disabled for release"
+static bool hijacked = false; // Indicates a DUBNER hijacking is in progress.
static void hijack_for_development(const char *funcname);
+static void hijacker();
+#define RETURN_WHEN_HIJACKED do{if(hijacked){return;}}while(0);
+#else
+#define RETURN_WHEN_HIJACKED
+#define hijacked (false)
+#endif
static size_t sv_data_name_counter = 1;
{
gg_call(VOID,
"__gg__set_exception_environment",
- ecs ? gg_get_address_of(ecs) : null_pointer_node,
- dcls ? gg_get_address_of(dcls) : null_pointer_node,
+ ecs ? gg_pointer_to_array(ecs) : null_pointer_node,
+ dcls ? gg_pointer_to_array(dcls) : null_pointer_node,
NULL_TREE);
}
TRACE1_TEXT(psz);
free(psz);
}
-
+
gg_free(member(field->var_decl_node, "data"));
// Flag this guy as free:
gg_assign(member(field->var_decl_node, "data"), gg_cast(UCHAR_P, null_pointer_node));
free(psz2);
// Needed so that GDB-COBOL can trap at a section name.
insert_nop(101);
+
+ // Go see if there was an ALTER statement targeting this procedure
+ gg_append_statement(procedure->alter_switch_goto);
+ // Lay down the label we will return to if there is no ALTER in play
+ gg_append_statement(procedure->no_alter_label);
}
static void
// Yes, trying to understand this causes headaches for many people who read
// this. Take an aspirin.
insert_nop(102);
+
+ // Go see if there was an ALTER statement targeting this procedure
+ gg_append_statement(procedure->alter_switch_goto);
+ // Lay down the label we will return to if there is no ALTER in play
+ gg_append_statement(procedure->no_alter_label);
}
static void
-pseudo_return_push(cbl_proc_t *procedure, tree return_addr)
+pseudo_return_push(cbl_proc_t *procedure, size_t index)
{
// Put the return address onto the stack:
//gg_suppress_location(true);
TRACE1
{
TRACE1_HEADER
- gg_printf("%s %p %p",
+ gg_printf("%s %p %ld",
gg_string_literal(procedure->label->name),
gg_cast(SIZE_T, procedure->exit.addr),
- return_addr,
+ build_int_cst_type(SIZE_T, index),
NULL_TREE);
TRACE1_END
}
gg_call(VOID,
"__gg__pseudo_return_push",
procedure->exit.addr,
- return_addr,
+ build_int_cst_type(SIZE_T, index),
NULL_TREE);
-
- //gg_suppress_location(false);
}
static void
pseudo_return_pop(cbl_proc_t *procedure)
{
- //gg_suppress_location(true);
-
TRACE1
{
TRACE1_HEADER
TRACE1
{
TRACE1_TEXT("Returning")
+ TRACE1_END
}
// The top of the stack is us!
- // Pick up the return address from the pseudo_return stack:
+ // Pick up the return index from the pseudo_return stack:
token_location_override(current_location_minus_one());
- gg_assign(current_function->void_star_temp,
- gg_call_expr( VOID_P,
- "__gg__pseudo_return_pop",
- NULL_TREE));
+
// And do the return:
token_location_override(current_location_minus_one());
- gg_goto(current_function->void_star_temp);
+ gg_append_statement(procedure->dispatch_switch_goto);
}
ELSE
{
{
TRACE1_END
}
- //gg_suppress_location(false);
}
static void
if( !retval )
{
- static int counter=1;
-
// This is a new section or paragraph; we need to create its values:
- retval = static_cast<struct cbl_proc_t *>
- (xmalloc(sizeof(struct cbl_proc_t)));
+ //retval = static_cast<struct cbl_proc_t *>(xmalloc(sizeof(struct cbl_proc_t)));
+ retval = new struct cbl_proc_t;
gcc_assert(retval);
retval->label = label;
&retval->top.decl);
gg_create_goto_pair(&retval->exit.go_to,
&retval->exit.label,
- &retval->exit.addr
- );
+ &retval->exit.addr);
gg_create_goto_pair(&retval->bottom.go_to,
&retval->bottom.label,
- &retval->bottom.addr
- );
+ &retval->bottom.addr);
- // fprintf(stderr, "NewProcedure: (%p) %s %p %p %p %p %p %p\n",
- // retval,
- // retval->name,
- // retval->top.go_to,
- // retval->top.label,
- // retval->exit.go_to,
- // retval->exit.label,
- // retval->bottom.go_to,
- // retval->bottom.label);
-
- // If this procedure is a paragraph, and it becomes the target of
- // an ALTER statement, alter_location will be used to make that change
- char *psz = xasprintf("_%s_alter_loc_%d", label->name, counter);
- retval->alter_location = gg_define_void_star(psz, vs_static);
- free(psz);
- DECL_INITIAL(retval->alter_location) = null_pointer_node;
+ // We need a goto/label pair for the location of the dispatch switch for
+ // this paragraph:
+ gg_create_goto_pair(&retval->dispatch_switch_goto,
+ &retval->dispatch_switch_label);
+
+ // We need goto/label pairs for the location of the dispatch switch for
+ // any potential ALTER to this paragraph
+ gg_create_goto_pair(&retval->alter_switch_goto,
+ &retval->alter_switch_label);
+ gg_create_goto_pair(&retval->no_alter_goto,
+ &retval->no_alter_label);
- counter +=1 ;
+ // We can now add this procedure to the of paragraphs that might be
+ // performed:
+ current_function->list_of_procedures.push_back(retval);
+
+ // When this paragraph becomes the target of an ALTER statement, the index
+ // that will be used in the switch() statement goes here:
+ retval->alter_index = gg_define_variable(SIZE_T, NULL, vs_static, 0);
label->structs.proc = retval;
}
parser_enter_section(cbl_label_t *label)
{
Analyze();
+
+ RETURN_WHEN_HIJACKED;
+
// Do the leaving before the SHOW_PARSE; it makes the output more sensible
// A new section ends the current paragraph:
leave_paragraph_internal();
parser_enter_paragraph(cbl_label_t *label)
{
Analyze();
+
+ RETURN_WHEN_HIJACKED;
+
// Do the leaving before the SHOW_PARSE; the output makes more sense that way
// A new paragraph ends the current paragraph:
leave_paragraph_internal();
CHECK_LABEL(label);
struct cbl_proc_t *procedure = find_procedure(label);
+
gg_append_statement(procedure->top.label);
paragraph_label(procedure);
current_function->current_paragraph = procedure;
struct cbl_proc_t *altered_proc = find_procedure(altered);
struct cbl_proc_t *proceed_to_proc = find_procedure(proceed_to);
- gg_assign( altered_proc->alter_location,
- proceed_to_proc->top.addr);
+ // We add one to the size of the alter_decls list, because we use zero to
+ // indicate that alter_index hasn't been changed.
+ gg_assign(altered_proc->alter_index,
+ build_int_cst_type(SIZE_T,
+ altered_proc->alter_decls.size()+1));
+ altered_proc->alter_decls.push_back(proceed_to_proc->top.addr);
}
void
parser_goto( cbl_refer_t value_ref, size_t narg, cbl_label_t * const labels[] )
- {
+ // This routine takes
+{
// This is part of the Terrible Trio of parser_perform, parser_goto and
// parser_enter_[procedure]. parser_goto has an easier time of it than
// the other two, because it just has to jump from here to the entry point
gcc_assert(narg >= 1);
- // This is a computed GOTO. It might have only one element, which is
- // an ordinary GOTO without a DEPENDING ON clause. We create that table
- // anyway, because in the case of an ALTER statement, we will be replacing
- // that sole element with the PROCEED TO element.
-
- // We need to create a static array of pointers to locations:
- static int comp_gotos = 1;
- char *psz = xasprintf("_comp_goto_%d", comp_gotos++);
- tree array_of_pointers_type = build_array_type_nelts(VOID_P, narg);
- tree array_of_pointers = gg_define_variable(array_of_pointers_type, psz, vs_static);
- free(psz);
-
- // We have the array. Now we need to build the constructor for it
- tree constr = make_node(CONSTRUCTOR);
- TREE_TYPE(constr) = array_of_pointers_type;
- TREE_STATIC(constr) = 1;
- TREE_CONSTANT(constr) = 1;
-
- for(size_t i=0; i<narg; i++)
+ if( narg == 1 )
{
- CHECK_LABEL(labels[i]);
- struct cbl_proc_t *procedure = find_procedure(labels[i]);
- CONSTRUCTOR_APPEND_ELT( CONSTRUCTOR_ELTS(constr),
- build_int_cst_type(SIZE_T, i),
- procedure->top.addr );
+ // This is the simplest possible case -- no DEPENDING ON clause.
+ struct cbl_proc_t *procedure = find_procedure(labels[0]);
+ gg_append_statement(procedure->top.go_to);
}
- DECL_INITIAL(array_of_pointers) = constr;
-
- // We need to pick up the value argument as an INT:
- tree value = gg_define_int();
-
- if( value_ref.field )
+ else
{
+ // We will implement the two or more fanout with a switch statement.
+
+ tree value = gg_define_int();
get_binary_value( value,
NULL,
value_ref.field,
refer_offset(value_ref));
- // Convert it from one-based to zero-based:
- gg_decrement(value);
- // Check to see if the value is in the range 0...narg-1:
- IF( value, ge_op, integer_zero_node)
- {
- IF( value, lt_op, build_int_cst_type(INT, narg) )
- {
- // It is in the valid range, so we can do the goto:
- Analyzer.ExitMessage();
- gg_goto(gg_array_value(array_of_pointers, value));
- }
- ELSE
- {
- // Otherwise, just fall through
- }
- ENDIF
- }
- ELSE
- ENDIF
- }
- else
- {
- // This is a simple GOTO. Because it is a simple GO TO, there is the
- // possibility that this paragraph was the target of an ALTER statement.
- IF( current_function->current_paragraph->alter_location, ne_op, null_pointer_node )
- {
- // Somebody did an ALTER statement before we got here
- gg_assign(current_function->void_star_temp, current_function->current_paragraph->alter_location);
- }
- ELSE
- {
- // This paragraph wasn't the target of an ALTER:
- gg_assign(current_function->void_star_temp, gg_array_value(array_of_pointers, 0));
- }
- ENDIF
- Analyzer.ExitMessage();
- gg_goto(current_function->void_star_temp);
- }
- return;
- }
-
-void
-parser_perform(cbl_label_t *label, bool suppress_nexting)
- {
- Analyze();
- SHOW_PARSE
- {
- SHOW_PARSE_HEADER
- SHOW_PARSE_LABEL(" ", label)
- char ach[32];
- sprintf(ach, " label is at %p", static_cast<void*>(label));
- SHOW_PARSE_TEXT(ach)
- if( label )
- {
- sprintf(ach,
- " label->proc is %p",
- static_cast<void*>(label->structs.proc));
- }
- SHOW_PARSE_TEXT(ach)
- SHOW_PARSE_END
- }
-
- TRACE1
- {
- TRACE1_HEADER
- TRACE1_LABEL("", label, "")
- TRACE1_END
- }
- CHECK_LABEL(label);
- label->used = yylineno;
+ // value is properly 1 through nargs
- struct cbl_proc_t *procedure = find_procedure(label);
+ tree switch_statement_list = make_node(STATEMENT_LIST);
+ TREE_TYPE(switch_statement_list) = void_type_node;
- // We need to create the unnamed return address that we
- // will instantiate right after the goto:
- tree return_address_decl = build_decl( UNKNOWN_LOCATION,
- LABEL_DECL,
- NULL_TREE,
- void_type_node);
- DECL_CONTEXT(return_address_decl) = current_function->function_decl;
- TREE_USED(return_address_decl) = 1;
-
- tree return_label_expr = build1(LABEL_EXPR,
- void_type_node,
- return_address_decl);
- tree return_addr = gg_get_address_of(return_address_decl);
-
-// cbl_parser_mod *parser_mod = new cbl_parser_mod;
+ tree switchexpr = build2(SWITCH_EXPR,
+ integer_type_node,
+ value,
+ switch_statement_list);
+ gg_append_statement(switchexpr);
+ current_function->statement_list_stack.push_back(switch_statement_list);
- // Put the return address onto the pseudo-return stack
- pseudo_return_push(procedure, return_addr);
+ tree caselabel;
+ tree labeldecl;
- // Create the code that will launch the paragraph
- // The following comment is, believe it or not, necessary. The insertion
- // includes a line number insertion that's needed because when the goto/label
- // pairs were created, the locations of the goto instruction and the label
- // were not known.
-
- const char *para_name = nullptr;
- const char *sect_name = nullptr;
- const char *program_name = current_function->our_unmangled_name;
- size_t deconflictor = symbol_label_id(label);
-
- char ach[256];
- if( label->type == LblParagraph )
- {
- const cbl_label_t *sec_label = cbl_label_of(symbol_at(label->parent));
- para_name = label->name;
- sect_name = sec_label->name;
- sprintf(ach,
- "%s PERFORM %s of %s of %s (" HOST_SIZE_T_PRINT_DEC ")",
- ASM_COMMENT_START,
- para_name,
- sect_name,
- program_name,
- (fmt_size_t)deconflictor);
+ for(size_t i = 0; i < narg; ++i)
+ {
+ tree val = build_int_cst(INT, i+1);
+ labeldecl = create_artificial_label(UNKNOWN_LOCATION);
+ DECL_CONTEXT(labeldecl) = current_function->function_decl;
+ caselabel = build_case_label(val,
+ NULL_TREE,
+ labeldecl);
+ gg_append_statement(caselabel);
- gg_insert_into_assembler(ach);
- }
- else
- {
- sect_name = label->name;
- sprintf(ach,
- "%s PERFORM %s of %s (" HOST_SIZE_T_PRINT_DEC ")",
- ASM_COMMENT_START,
- sect_name,
- program_name,
- (fmt_size_t)deconflictor);
- gg_insert_into_assembler(ach);
- }
+ struct cbl_proc_t *procedure = find_procedure(labels[i]);
+ gg_append_statement(procedure->top.go_to);
+ }
- if( !suppress_nexting )
- {
- // Flag this source-code line as being a PERFORM statement.
- perform_is_armed = CURRENT_LINE_NUMBER ;
- }
+ // Finish with a default case that just falls through
+ labeldecl = create_artificial_label(UNKNOWN_LOCATION);
+ DECL_CONTEXT(labeldecl) = current_function->function_decl;
- // We do the indirect jump in order to prevent the compiler from complaining
- // in the case where we are performing a USE GLOBAL DECLARATIVE. Without the
- // indirection, the compiler isn't able to handle the case where we are
- // jumping to a location in our parent program-id; it can't find a matching
- // local symbol, and crashes.
- gg_goto(procedure->top.addr);
+ caselabel = build_case_label(NULL_TREE,
+ NULL_TREE,
+ labeldecl);
+ gg_append_statement(caselabel);
- // And create the return address label:
- gg_append_statement(return_label_expr);
- TRACE1
- {
- TRACE1_HEADER
- TRACE1_LABEL("back_from_performing ", label, "")
- TRACE1_END
+ current_function->statement_list_stack.pop_back();
}
}
if( !proc_2 )
{
- parser_perform(proc_1, suppress_nexting);
- return;
+ proc_2 = proc_1;
}
struct cbl_proc_t *proc1 = find_procedure(proc_1);
struct cbl_proc_t *proc2 = find_procedure(proc_2);
+ size_t dispatch_index = proc2->pseudo_return_decls.size();
+
// We need to create the unnamed return address that we
// will instantiate right after the goto:
tree return_address_decl = build_decl( UNKNOWN_LOCATION,
tree return_label_expr = build1(LABEL_EXPR,
void_type_node,
return_address_decl);
- tree return_addr = gg_get_address_of(return_address_decl);
-
- //cbl_parser_mod *parser_mod_proc1 = new cbl_parser_mod;
- //cbl_parser_mod *parser_mod_proc2 = new cbl_parser_mod;
- // Put the return address of the second procedure onto the stack:
- pseudo_return_push(proc2, return_addr);
+ // Put the dispatch_index for this PERFORM onto the stack
+ pseudo_return_push(proc2, dispatch_index);
// Create the code that will launch the first procedure
- gg_insert_into_assemblerf("%s PERFORM %s THROUGH %s",
- ASM_COMMENT_START, proc_1->name, proc_2->name);
+ if( proc_1 != proc_2 )
+ {
+ gg_insert_into_assemblerf("%s PERFORM %s THROUGH %s",
+ ASM_COMMENT_START, proc_1->name, proc_2->name);
+ }
+ else
+ {
+ gg_insert_into_assemblerf("%s PERFORM %s",
+ ASM_COMMENT_START, proc_1->name);
+ }
if( !suppress_nexting )
{
// And create the return address label:
gg_append_statement(return_label_expr);
+
+ // Now we add the return location for the PERFORM to the vector of such
+ // locations for proc2:
+ proc2->pseudo_return_decls.push_back(return_address_decl);
+ }
+
+void
+parser_perform(cbl_label_t *label, bool suppress_nexting)
+ {
+ return internal_perform_through(label, NULL, suppress_nexting);
}
static void
SET_VAR_DECL(var_decl_rdigits , INT , "__gg__rdigits");
SET_VAR_DECL(var_decl_unique_prog_id , SIZE_T , "__gg__unique_prog_id");
- SET_VAR_DECL(var_decl_entry_location , VOID_P , "__gg__entry_pointer");
SET_VAR_DECL(var_decl_exit_address , VOID_P , "__gg__exit_address");
SET_VAR_DECL(var_decl_call_parameter_signature , CHAR_P , "__gg__call_parameter_signature");
SET_VAR_DECL(var_decl_call_parameter_count , INT , "__gg__call_parameter_count");
SET_VAR_DECL(var_decl_call_parameter_lengths , build_array_type(SIZE_T, NULL),
"__gg__call_parameter_lengths");
- SET_VAR_DECL(var_decl_return_code , SHORT , "__gg__data_return_code");
SET_VAR_DECL(var_decl_arithmetic_rounds_size , SIZE_T , "__gg__arithmetic_rounds_size");
SET_VAR_DECL(var_decl_arithmetic_rounds , INT_P , "__gg__arithmetic_rounds");
SET_VAR_DECL(var_decl_treeplet_4s , SIZE_T_P , "__gg__treeplet_4s" );
SET_VAR_DECL(var_decl_nop , INT , "__gg__nop" );
SET_VAR_DECL(var_decl_main_called , INT , "__gg__main_called" );
- SET_VAR_DECL(var_decl_entry_label , VOID_P , "__gg__entry_label" );
+ SET_VAR_DECL(var_decl_entry_index , SIZE_T , "__gg__entry_index" );
}
}
// We are leaving the top-level file, which means this compilation is
// done, done, done.
- // This is where we create the file-static table of PERFORM/FOLLOWING line
- // number pairs so that the GDB-COBOL debugger can know where to "return"
- // to after a NEXT is issued on a PERFORM statement.
-
- // We need to create a file-static static array of 32-bit integers. The
- // array is terminated with a {0,0} pair:
- tree array_of_int_type = build_array_type_nelts(INT, (perform_line_pairs.size()+1)*2);
- tree array_of_int = gg_define_variable( array_of_int_type,
- "_perform_line_pairs",
- vs_file_static);
- // We have the array. Now we need to build the constructor for it
- tree constr = make_node(CONSTRUCTOR);
- TREE_TYPE(constr) = array_of_int_type;
- TREE_STATIC(constr) = 1;
- TREE_CONSTANT(constr) = 1;
-
- // The first element of the array contains the number of elements to follow
- size_t i = 0;
- for(auto it : perform_line_pairs)
+ if( !hijacked )
{
+ // This is where we create the file-static table of PERFORM/FOLLOWING line
+ // number pairs so that the GDB-COBOL debugger can know where to "return"
+ // to after a NEXT is issued on a PERFORM statement.
+
+ // We need to create a file-static static array of 32-bit integers. The
+ // array is terminated with a {0,0} pair:
+ tree array_of_int_type = build_array_type_nelts(INT, (perform_line_pairs.size()+1)*2);
+ tree array_of_int = gg_define_variable( array_of_int_type,
+ "_perform_line_pairs",
+ vs_file_static);
+ // We have the array. Now we need to build the constructor for it
+ tree constr = make_node(CONSTRUCTOR);
+ TREE_TYPE(constr) = array_of_int_type;
+ TREE_STATIC(constr) = 1;
+ TREE_CONSTANT(constr) = 1;
+
+ // The first element of the array contains the number of elements to follow
+ size_t i = 0;
+ for(auto it : perform_line_pairs)
+ {
+ CONSTRUCTOR_APPEND_ELT( CONSTRUCTOR_ELTS(constr),
+ build_int_cst_type(SIZE_T, i++),
+ build_int_cst_type(INT, it.first) );
+ CONSTRUCTOR_APPEND_ELT( CONSTRUCTOR_ELTS(constr),
+ build_int_cst_type(SIZE_T, i++),
+ build_int_cst_type(INT, it.second) );
+ }
CONSTRUCTOR_APPEND_ELT( CONSTRUCTOR_ELTS(constr),
build_int_cst_type(SIZE_T, i++),
- build_int_cst_type(INT, it.first) );
+ integer_zero_node );
CONSTRUCTOR_APPEND_ELT( CONSTRUCTOR_ELTS(constr),
build_int_cst_type(SIZE_T, i++),
- build_int_cst_type(INT, it.second) );
- }
- CONSTRUCTOR_APPEND_ELT( CONSTRUCTOR_ELTS(constr),
- build_int_cst_type(SIZE_T, i++),
- integer_zero_node );
- CONSTRUCTOR_APPEND_ELT( CONSTRUCTOR_ELTS(constr),
- build_int_cst_type(SIZE_T, i++),
- integer_zero_node );
- DECL_INITIAL(array_of_int) = constr;
-
- // There is, however, one thing left to do. If the command line says
- // that this module needs a main entry point, then this is where
- // we create a main() function. We build it at the end, so that all of
- // the .loc directives associated with it appear at the end of the
- // source code. We used to create the main() entry point at the beginning,
- // but that created confusion for GDB when trying to debug the generated
- // executable.
- if( main_entry_point )
- {
- next_program_is_main = false;
- build_main_that_calls_something(main_entry_point);
- free(main_entry_point);
- main_entry_point = NULL;
+ integer_zero_node );
+ DECL_INITIAL(array_of_int) = constr;
+
+ // There is, however, one thing left to do. If the command line says
+ // that this module needs a main entry point, then this is where
+ // we create a main() function. We build it at the end, so that all of
+ // the .loc directives associated with it appear at the end of the
+ // source code. We used to create the main() entry point at the beginning,
+ // but that created confusion for GDB when trying to debug the generated
+ // executable.
+ if( main_entry_point )
+ {
+ next_program_is_main = false;
+ build_main_that_calls_something(main_entry_point);
+ free(main_entry_point);
+ main_entry_point = NULL;
+ }
}
gg_leaving_the_source_code_file();
gg_assign(current_function->first_time_through, integer_zero_node);
- // Establish variables that are function-wide in scope:
- current_function->void_star_temp = gg_define_void_star("_void_star_temp");
-
current_function->perform_exit_address
= gg_define_void_star("_perform_exit_address");
*pretval = 1;
}
+#ifdef ENABLE_HIJACKING
if( strcmp(funcname, "dubner") == 0)
{
- // This should be enabled by an environment variable.
- // But for now I am being cutesy
+ fprintf(stderr, "This is a DUBNER hijacking\n");
hijack_for_development(funcname);
return;
}
+#endif
+
enter_program_common(funcname, funcname_);
current_function->is_function = is_function;
}
} label_verify;
+static void
+build_dispatch_switch(const std::vector<tree> &label_decls)
+ {
+ // This routine accepts vector of LABEL_DECLs. It creates a
+ // switch statement that's equivalent to
+ // switch(N)
+ // {
+ // default:
+ // case 0:
+ // goto label[0];
+ // case 1:
+ // goto label[1];
+ // ...
+ // case N-1:
+ // goto label[N-1];
+ // }
+
+ // If the vector of label_decls is empty, there is no need to create the
+ // switch statement.
+
+ if( !label_decls.empty() )
+ {
+ tree switch_statement_list = make_node(STATEMENT_LIST);
+ TREE_TYPE(switch_statement_list) = void_type_node;
+
+ tree switchexpr = build2(SWITCH_EXPR,
+ integer_type_node,
+ gg_call_expr( SIZE_T,
+ "__gg__pseudo_return_pop",
+ NULL_TREE),
+ switch_statement_list);
+
+
+ gg_append_statement(switchexpr);
+ current_function->statement_list_stack.push_back(switch_statement_list);
+
+ // Start off with a "default:" case
+ tree labeldecl = create_artificial_label(UNKNOWN_LOCATION);
+ DECL_CONTEXT(labeldecl) = current_function->function_decl;
+ TREE_USED(labeldecl) = 1;
+
+ tree caselabel;
+ caselabel = build_case_label(NULL_TREE,
+ NULL_TREE,
+ labeldecl);
+ gg_append_statement(caselabel);
+
+ for(size_t i = 0; i < label_decls.size(); ++i)
+ {
+ // Start with the case label for the pseudo-return location.
+ tree val = build_int_cst(SIZE_T, i);
+
+ labeldecl = create_artificial_label(UNKNOWN_LOCATION);
+ DECL_CONTEXT(labeldecl) = current_function->function_decl;
+
+ caselabel = build_case_label(val,
+ NULL_TREE,
+ labeldecl);
+ gg_append_statement(caselabel);
+
+ // And follow up with a goto expression for the pseudo-return location.
+ tree goto_expr = build1( GOTO_EXPR,
+ void_type_node,
+ label_decls[i]);
+ gg_append_statement(goto_expr);
+ }
+
+ current_function->statement_list_stack.pop_back();
+ }
+ }
+
+static void
+build_alter_switch(cbl_proc_t *proc, const std::vector<tree> &label_decls)
+ {
+ // This routine accepts a vector of LABEL_DECLs. It lays down code
+ // equivalent to
+ // if( label_decls.size() )
+ // {
+ // switch(N)
+ // {
+ // case 0:
+ // goto proc->no_alter_label;
+ // case 1:
+ // goto label[0];
+ // ...
+ // case N:
+ // goto label[N-1];
+ // default:
+ // }
+ // }
+ // goto proc->no_alter_label;
+
+ if( !label_decls.empty() )
+ {
+ tree switch_statement_list = make_node(STATEMENT_LIST);
+ TREE_TYPE(switch_statement_list) = void_type_node;
+
+ tree switchexpr = build2(SWITCH_EXPR,
+ integer_type_node,
+ proc->alter_index,
+ switch_statement_list);
+ gg_append_statement(switchexpr);
+ current_function->statement_list_stack.push_back(switch_statement_list);
+
+ tree caselabel;
+ tree labeldecl;
+
+ for(size_t i = 0; i < label_decls.size()+1; ++i)
+ {
+ // Start with the case label for the pseudo-return location.
+ tree val =
+ build_int_cst(TREE_TYPE(proc->alter_index), i);
+
+ labeldecl = create_artificial_label(UNKNOWN_LOCATION);
+ DECL_CONTEXT(labeldecl) = current_function->function_decl;
+
+ caselabel = build_case_label(val,
+ NULL_TREE,
+ labeldecl);
+ gg_append_statement(caselabel);
+
+ // And follow up with a goto expression for the pseudo-return location.
+ if( i == 0 )
+ {
+ gg_append_statement(proc->no_alter_goto);
+ }
+ else
+ {
+ tree goto_expr = build1( GOTO_EXPR,
+ void_type_node,
+ label_decls[i-1]);
+ gg_append_statement(goto_expr);
+ }
+ }
+
+ // End with a fall-through with "default:" case
+ labeldecl = create_artificial_label(UNKNOWN_LOCATION);
+ DECL_CONTEXT(labeldecl) = current_function->function_decl;
+ caselabel = build_case_label(NULL_TREE,
+ NULL_TREE,
+ labeldecl);
+ gg_append_statement(caselabel);
+
+ current_function->statement_list_stack.pop_back();
+ }
+ gg_append_statement(proc->no_alter_goto);
+
+ }
+
+static void
+build_entry_switch(const std::vector<tree> &goto_expr)
+ {
+ // This routine accepts a vector of GOTO_EXPRs. It lays down code
+ // equivalent to
+ // if( goto_expr.size() )
+ // {
+ // switch(var_decl_entry_index)
+ // {
+ // case 1:
+ // var_decl_entry_index = 0
+ // goto goto_expr[0]
+ // ...
+ // case N:
+ // var_decl_entry_index = 0
+ // goto goto_expr[N-1];
+ // default:
+ // abort();
+ // }
+ // }
+
+ if( !goto_expr.empty() )
+ {
+ tree switch_statement_list = make_node(STATEMENT_LIST);
+ TREE_TYPE(switch_statement_list) = void_type_node;
+
+ tree switchexpr = build2(SWITCH_EXPR,
+ integer_type_node,
+ var_decl_entry_index,
+ switch_statement_list);
+ gg_append_statement(switchexpr);
+ current_function->statement_list_stack.push_back(switch_statement_list);
+
+ tree caselabel;
+ tree labeldecl;
+
+ for(size_t i = 0; i < goto_expr.size(); ++i)
+ {
+ // Start with the case label for the pseudo-return location.
+ tree val = build_int_cst(SIZE_T, i+1);
+
+ labeldecl = create_artificial_label(UNKNOWN_LOCATION);
+ DECL_CONTEXT(labeldecl) = current_function->function_decl;
+
+ caselabel = build_case_label(val,
+ NULL_TREE,
+ labeldecl);
+ gg_append_statement(caselabel);
+
+ // Each case starts out by zeroing the global index:
+ gg_assign(var_decl_entry_index, size_t_zero_node);
+ // Followed by the goto
+ gg_append_statement(goto_expr[i]);
+ }
+
+ // End with a default: case specifying an abort();
+ labeldecl = create_artificial_label(UNKNOWN_LOCATION);
+ DECL_CONTEXT(labeldecl) = current_function->function_decl;
+ caselabel = build_case_label(NULL_TREE,
+ NULL_TREE,
+ labeldecl);
+ gg_append_statement(caselabel);
+ gg_abort();
+
+ current_function->statement_list_stack.pop_back();
+ }
+ }
+
+#pragma GCC diagnostic push
+#pragma GCC diagnostic ignored "-Wunused-function"
+static void
+build_perform_dispatcher()
+ {
+ // This routine lays down the dispatcher that handles the return from
+ // PERFORM <proc>
+
+ // We need to create an execution island. The switch() statement will
+ // live on it.
+
+ // Create the GOTO and the LABEL for this island
+ tree island_goto;
+ tree island_label;
+ gg_create_goto_pair(&island_goto, &island_label);
+ // GOTO the far side of the island.
+ gg_append_statement(island_goto);
+
+ // We need to build N switch statements, one for each paragraph that was
+ // the target of a perform:
+
+ // The list is a vector<void *>
+ for( auto it : current_function->list_of_procedures )
+ {
+ cbl_proc_t *proc = static_cast<cbl_proc_t *>(it);
+ // Each switch statement is the target of a GOTO at the end of a
+ // paragraph. In the case of a paragraph that was never called, the
+ // code targeting the label will never be executed; the GOTO will always
+ // be skipped by the end-of-paragraph code checking the top of the pseudo-
+ // return stack. But we need the label anyway, because otherwise the
+ // middle-end Control Flow Graph CFG processing crashes.
+ gg_append_statement(proc->dispatch_switch_label);
+
+ // And after each such label, the switch statement:
+ build_dispatch_switch(proc->pseudo_return_decls);
+
+ // Do something similar for ALTER
+ gg_append_statement(proc->alter_switch_label);
+ // And after each such label, the switch statement:
+ build_alter_switch(proc, proc->alter_decls);
+ }
+ // Do something similar for ENTER
+ tree label = current_function->entry_switch_label;
+ gg_append_statement(label);
+ // And after each such label, the switch statement:
+ build_entry_switch(current_function->entry_goto_expressions);
+
+ // Lay down the label for jumping over the island.
+ gg_append_statement(island_label);
+ }
+#pragma GCC diagnostic pop
+
void
parser_end_program(const char *prog_name )
{
gcc_unreachable();
}
+ if( !hijacked )
+ {
+ build_perform_dispatcher();
+ }
if( gg_trans_unit.function_stack.size() )
{
{
if( mode_syntax_only() ) return;
+ RETURN_WHEN_HIJACKED;
+
char ach[48];
sprintf(ach,
"..variables_to_init_" HOST_SIZE_T_PRINT_DEC,
tree array = gg_trans_unit_var_decl(ach);
gg_call(VOID,
"__gg__variables_to_init",
- gg_get_address_of(array),
+ gg_pointer_to_array(array),
wsclear() ? build_string_literal(
1,
reinterpret_cast<const char *>(wsclear()))
vs_static);
DECL_INITIAL(new_var_decl) = wide_int_to_tree(var_type, value);
TREE_CONSTANT(new_var_decl) = 1;
+ TREE_READONLY(new_var_decl) = 1;
field->data_decl_node = new_var_decl;
"__gg__alphabet_create",
build_int_cst_type(INT, alphabet.encoding),
build_int_cst_type(SIZE_T, alphabet_index),
- gg_get_address_of(table256),
+ gg_pointer_to_array(table256),
build_int_cst_type(INT, low_char),
build_int_cst_type(INT, high_char),
NULL_TREE );
rounded,
check_for_error,
true);
-
gg_assign(error_flag, gg_bitwise_or(error_flag, erf));
IF(error_flag, ne_op, integer_zero_node)
{
program_end_stuff(cbl_refer_t refer,
ec_type_t ec)
{
+ // Looking for hijack here puts the hijacked code just before the
+ // exit sequence
+#ifdef ENABLE_HIJACKING
+ static bool just_once = true;
+ // We need the just_once state because this routine can be called more than
+ // once. Usually the parser handles it, but we have a "just-in-case" call
+ // in parser_end_program() that sometimes is necessary.
+ if(just_once && strcmp(current_function->our_name, "hijack") == 0)
+ {
+ just_once = false;
+ fprintf(stderr, "This is a HIJACK BEFORE EXIT scenario.\n");
+ hijacker();
+ }
+#endif
+
// This is the moral equivalent of a C "return xyz;".
// There cannot be both a non-zero exit status and an exception condition.
tree array_type = build_array_type_nelts(UCHAR,
returner->data.capacity());
tree array = gg_define_variable(array_type, vs_static);
- gg_memcpy(gg_get_address_of(array),
+ gg_memcpy(gg_pointer_to_array(array),
member(returner->var_decl_node, "data"),
member(returner->var_decl_node, "capacity"));
- tree actual = gg_cast(COBOL_FUNCTION_RETURN_TYPE, gg_get_address_of(array));
+ tree actual = gg_cast(COBOL_FUNCTION_RETURN_TYPE,
+ gg_pointer_to_array(array));
restore_local_variables();
gg_return(actual);
}
else
{
- // There is no explicit value. This means, by default (according to)
- // IBM), we return the value found in RETURN-CODE:
+ // There is no explicit value. This means, by default (according to IBM),
+ // we return the value found in RETURN-CODE:
tree value = gg_define_variable(COBOL_FUNCTION_RETURN_TYPE);
- gg_assign(value,
- gg_cast(COBOL_FUNCTION_RETURN_TYPE,
- var_decl_return_code));
+ if( !hijacked )
+ {
+ gg_assign(value,
+ gg_cast(COBOL_FUNCTION_RETURN_TYPE,
+ current_function->var_decl_return));
+ }
+ else
+ {
+ gg_assign(value, gg_cast(COBOL_FUNCTION_RETURN_TYPE, integer_zero_node));
+ }
restore_local_variables();
gg_return(gg_cast(COBOL_FUNCTION_RETURN_TYPE, value));
}
TRACE1_END
}
+ if( hijacked )
+ {
+ // We need just_once because parser_exit gets called an extra time at the
+ // end of file, just in case. That should be tracked down and handled so
+ // that it gets called only once.
+ static bool just_once = true;
+ if( just_once )
+ {
+ just_once = false;
+ tree function_type =
+ TREE_TYPE(DECL_RESULT(current_function->function_decl));
+ tree operand = gg_define_variable(function_type);
+ gg_assign(operand, build_int_cst_type(function_type, 0));
+ tree modify = build2( MODIFY_EXPR,
+ function_type,
+ DECL_RESULT(current_function->function_decl),
+ gg_cast(function_type, operand));
+ tree stmt = build1(RETURN_EXPR, void_type_node, modify);
+ gg_append_statement(stmt);
+ }
+
+ return;
+ }
+
if( refer.prog_func )
{
// We are processing EXIT PROGRAM. If main() called us, we need to do
return label->structs.goto_trees;
}
+// This routine cloned from parse_ante.h
+static inline cbl_field_t *
+register_find( const char *name ) {
+ size_t iprog = current_program_index();
+ auto found = symbol_find( iprog, std::list<const char*>(1, name) );
+ gcc_assert(found.second);
+ return cbl_field_of(found.first);
+}
+
void
parser_xml_parse( cbl_label_t *instance,
cbl_refer_t input,
gg_return(0);
gg_append_statement(island_label);
+ // We need the three xml special registers:
+ cbl_field_t *xml_event = register_find("XML-EVENT");
+ cbl_field_t *xml_code = register_find("XML-CODE");
+ cbl_field_t *xml_text = register_find("XML-TEXT");
+
// With the callback in place, we are ready to call the library:
tree pcallback = gg_get_function_address(VOID, ach);
: null_pointer_node,
build_int_cst_type(INT, returns_national),
pcallback,
+ gg_get_address_of(xml_event->var_decl_node),
+ gg_get_address_of(xml_code ->var_decl_node),
+ gg_get_address_of(xml_text ->var_decl_node),
NULL_TREE));
IF( erc, ne_op, integer_zero_node )
{
static void
initialize_the_data()
{
+ RETURN_WHEN_HIJACKED;
+
if( initialized_data )
{
return;
NULL,
vs_static);
gg_assign( member(new_var->var_decl_node, "data"),
- gg_get_address_of(data_decl_node) );
+ gg_pointer_to_array(data_decl_node) );
// And then move it into place
gg_call(VOID,
{
Analyze();
- // Do some symbol table index bookkeeping. current_program_index() is valid
- // at this point in time:
+ RETURN_WHEN_HIJACKED;
+
+ // Do some symbol table index bookkeeping. current_program_index() is
+ // valid at this point in time:
current_function->our_symbol_table_index = current_program_index();
+ const cbl_label_t *prog = cbl_label_of(symbol_at(current_program_index()));
+ current_function->has_initial = prog->initial;
+ current_function->has_recursive = prog->recursive;
// We have some housekeeping to do to keep track of the list of functions
- // accessible by us:
+ // accessible by us.
// For every procedure, we need a variable that points to the list of
// available program names.
// We need a pointer to the array of program names
char ach[2*sizeof(cbl_name_t)];
- sprintf(ach,
- "..accessible_program_list_" HOST_SIZE_T_PRINT_DEC,
- (fmt_size_t)current_function->our_symbol_table_index);
- tree prog_list = gg_define_variable(build_pointer_type(CHAR_P),
- ach, vs_file_static);
-
- // Likewise, we need a pointer to the array of pointers to functions:
- tree function_type =
- build_varargs_function_type_array( SIZE_T,
- 0, // No parameters yet
- NULL); // And, hence, no types
- tree pointer_type = build_pointer_type(function_type);
- tree constructed_array_type = build_array_type_nelts(pointer_type, 1);
- sprintf(ach,
- "..accessible_program_pointers_" HOST_SIZE_T_PRINT_DEC,
- (fmt_size_t)current_function->our_symbol_table_index);
- tree prog_pointers = gg_define_variable(
- build_pointer_type(constructed_array_type),
- ach,
- vs_file_static);
- gg_call(VOID,
- "__gg__set_program_list",
- build_int_cst_type(INT, current_function->our_symbol_table_index),
- gg_get_address_of(prog_list),
- gg_get_address_of(prog_pointers),
- NULL_TREE);
-
- if( gg_trans_unit.function_stack.size() == 1 )
+ if( !current_function->initialized )
{
- gg_create_goto_pair(&label_list_out_goto,
- &label_list_out_label);
- gg_create_goto_pair(&label_list_back_goto,
- &label_list_back_label);
- gg_append_statement(label_list_out_goto);
- gg_append_statement(label_list_back_label);
- }
+ // Do some symbol table index bookkeeping. current_program_index() is valid
+ // at this point in time:
+ current_function->our_symbol_table_index = current_program_index();
- tree globals_are_initialized = gg_declare_variable( INT,
- "__gg__globals_are_initialized",
- NULL,
- vs_external_reference);
- IF( globals_are_initialized, eq_op, integer_zero_node )
- {
- // one-time initialization happens here
+ gg_create_goto_pair(¤t_function->entry_switch_goto,
+ ¤t_function->entry_switch_label);
- // We need to establish the initial value of the UPSI-1 switch register
- // We are using IBM's conventions:
- // https://www.ibm.com/docs/en/zvse/6.2?topic=SSB27H_6.2.0/fa2sf_communicate_appl_progs_via_job_control.html
- // UPSI 10000110 means that bits 0, 5, and 6 are on, which means that
- // SW-0, SW-5, and SW-6 are on.
+ // We have some housekeeping to do to keep track of the list of functions
+ // accessible by us:
+
+ // For every procedure, we need a variable that points to the list of
+ // available program names.
+
+ // We need a pointer to the array of program names
+ sprintf(ach,
+ "..accessible_program_list_" HOST_SIZE_T_PRINT_DEC,
+ (fmt_size_t)current_function->our_symbol_table_index);
+ tree prog_list = gg_define_variable(build_pointer_type(CHAR_P),
+ ach, vs_file_static);
+
+ // Likewise, we need a pointer to the array of pointers to functions:
+ tree function_type =
+ build_varargs_function_type_array( SIZE_T,
+ 0, // No parameters yet
+ NULL); // And, hence, no types
+ tree pointer_type = build_pointer_type(function_type);
+ tree constructed_array_type = build_array_type_nelts(pointer_type, 1);
+ sprintf(ach,
+ "..accessible_program_pointers_" HOST_SIZE_T_PRINT_DEC,
+ (fmt_size_t)current_function->our_symbol_table_index);
+ tree prog_pointers = gg_define_variable(
+ build_pointer_type(constructed_array_type),
+ ach,
+ vs_file_static);
gg_call(VOID,
- "__gg__onetime_initialization",
+ "__gg__set_program_list",
+ build_int_cst_type(INT, current_function->our_symbol_table_index),
+ gg_get_address_of(prog_list),
+ gg_get_address_of(prog_pointers),
NULL_TREE);
- // And then flag one-time initialization as having been done.
- gg_assign(globals_are_initialized, integer_one_node);
+ if( gg_trans_unit.function_stack.size() == 1 )
+ {
+ gg_create_goto_pair(&label_list_out_goto,
+ &label_list_out_label);
+ gg_create_goto_pair(&label_list_back_goto,
+ &label_list_back_label);
+ gg_append_statement(label_list_out_goto);
+ gg_append_statement(label_list_back_label);
+ }
+
+ tree globals_are_initialized = gg_declare_variable( INT,
+ "__gg__globals_are_initialized",
+ NULL,
+ vs_external_reference);
+ IF( globals_are_initialized, eq_op, integer_zero_node )
+ {
+ // one-time initialization happens here
+
+ // We need to establish the initial value of the UPSI-1 switch register
+ // We are using IBM's conventions:
+ // https://www.ibm.com/docs/en/zvse/6.2?topic=SSB27H_6.2.0/fa2sf_communicate_appl_progs_via_job_control.html
+ // UPSI 10000110 means that bits 0, 5, and 6 are on, which means that
+ // SW-0, SW-5, and SW-6 are on.
+ gg_call(VOID,
+ "__gg__onetime_initialization",
+ NULL_TREE);
+
+ // And then flag one-time initialization as having been done.
+ gg_assign(globals_are_initialized, integer_one_node);
+ }
+ ELSE
+ ENDIF
}
- ELSE
- ENDIF
gg_append_statement(current_function->skip_init_label);
// This is where we check to see if somebody tried to cancel us
// gg_printf("Somebody wants to cancel %s\n",
// gg_string_literal(current_function->our_unmangled_name),
// NULL_TREE);
- const cbl_label_t *prog = cbl_label_of(symbol_at(current_program_index()));
size_t initializer_index = prog->initial_section;
cbl_label_t *initializer = cbl_label_of(symbol_at(initializer_index));
parser_perform(initializer, true); // true means suppress nexting
// Stash the returning variables for use during parser_return()
current_function->returning = returning;
+ current_function->var_decl_return =
+ gg_indirect(gg_cast(SHORT_P,
+ member(cbl_field_of(symbol_at(return_code_register()))->var_decl_node,
+ "data")));
+
if( gg_trans_unit.function_stack.size() == 1 )
{
- // We are entering a new top-level program, so we need to set
- // RETURN-CODE to zero
- gg_assign(var_decl_return_code, build_int_cst_type(SHORT, 0));
+ // We are entering a new top-level program.
+
+ if( current_function->has_initial || current_function->has_recursive )
+ {
+ // According to the IBM COBOL Language Specification, there is a list
+ // of special registers that get cleared to zero or spaces when a
+ // program has the INITIAL or RECURSIVE attribute.
+ gg_assign(current_function->var_decl_return,
+ build_int_cst_type(SHORT, 0));
+ }
}
// The parameters passed to this program might be 64 bits or 128 bits in
// It is at this point that we check to see if the call to this function
// is a re-entry because of an ENTRY statement:
- IF( var_decl_entry_label, ne_op, null_pointer_node )
+ IF(var_decl_entry_index, ne_op, size_t_zero_node)
{
// This is an ENTRY re-entry. The processing of USING variables was
- // done in parser_entry, so now we jump to the label
- static tree loc = gg_define_variable(VOID_P, vs_static);
- gg_assign(loc, var_decl_entry_label);
- gg_assign(var_decl_entry_label, gg_cast(VOID_P, null_pointer_node));
- gg_goto(loc);
+ // done in parser_entry, so now we jump to the switch statement
+ gg_append_statement(current_function->entry_switch_goto);
}
ELSE
{
}
- ENDIF
+ ENDIF
+
+ current_function->pseudo_return_index =
+ gg_define_variable(SIZE_T, "_pseudo_return_index", vs_static);
+ // Establish the formal parameters from the USING clause.
establish_using(nusing, args);
+
+ current_function->initialized = true;
}
}
}
else
{
- gg_assign(returned_value, gg_cast(INT, var_decl_return_code));
+ gg_assign(returned_value, gg_cast(INT, current_function->var_decl_return));
TRACE1
{
gg_fprintf( trace_handle,
TRACE1_END
}
+ RETURN_WHEN_HIJACKED;
+
CHECK_LABEL(label);
label_verify.lay(label);
TRACE1_END
}
+ RETURN_WHEN_HIJACKED;
+
CHECK_LABEL(label);
label_verify.go_to(label);
gcc_assert(tgt);
cbl_field_t *count = how_many.field;
- if( how_many.is_reference() )
- {
- cbl_internal_error("%s:%d: ignoring subscripts", __func__, __LINE__);
- }
CHECK_FIELD(count);
// This has to be on the stack, because performs can be nested
get_binary_value( counter,
NULL,
count,
- size_t_zero_node);
+ refer_offset(how_many));
SHOW_PARSE
{
build_array_of_treeplets(1, pcbl_index, pcbl_refers.data());
// Do the actual call:
- gg_call(VOID,
- "__gg__inspect_format_1",
- backward ? integer_one_node : integer_zero_node,
- integers,
- NULL_TREE);
+ charmap_t *charmap = __gg__get_charmap(identifier_1.field->codeset.encoding);
+ if( charmap->stride() == 1 && !charmap->is_like_utf8() )
+ {
+ // The variables are ASCII or EBCDIC
+ gg_call(VOID,
+ "__gg__inspect_format_1_sbc",
+ backward ? integer_one_node : integer_zero_node,
+ integers,
+ NULL_TREE);
+ }
+ else
+ {
+ gg_call(VOID,
+ "__gg__inspect_format_1",
+ backward ? integer_one_node : integer_zero_node,
+ integers,
+ NULL_TREE);
+ }
}
static void
}
else
{
- // Because no explicit returning value is expected, we just call it. We
- // expect COBOL routines to set RETURN-CODE when they think it necessary.
+ // Because no explicit returning value is expected, we call the designated
+ // function and assign the return value to our RETURN-CODE
push_program_state();
- gg_append_statement(call_expr);
+ gg_assign(current_function->var_decl_return, gg_cast(SHORT, call_expr));
pop_program_state();
}
assert(iprog == symbol_elem_of(declarative)->program);
}
-static tree entry_goto;
-static tree entry_label;
-static tree entry_addr;
-
void
parser_entry( const cbl_field_t *name, size_t nusing, cbl_ffi_arg_t *args )
{
// Create a goto/label pair. The label will be set up here; the goto will
// be used when we re-enter the containing function:
+ tree entry_goto;
+ tree entry_label;
+
gg_create_goto_pair(&entry_goto,
- &entry_label,
- &entry_addr);
+ &entry_label);
+
+ size_t entry_index = current_function->entry_goto_expressions.size()+1;
+ current_function->entry_goto_expressions.push_back(entry_goto);
// Start creating the ENTRY function.
tree function_decl = gg_define_function( VOID,
// Put the entry_label into the global variable that will be picked up
// when the containing program-id is re-entered:
- gg_assign(var_decl_entry_label, entry_addr);
+ gg_assign(var_decl_entry_index, build_int_cst_type(SIZE_T, entry_index));
// Get the function address of the containing function.
tree gfa = gg_get_function_address(VOID, name_of_parent);
// We are done with the ENTRY function:
gg_finalize_function();
- // Lay down the address of the label that matches var_decl_entry_label;
+ // Lay down the address of the label that matches var_decl_entry_index;
// the containing program-id will jump to this point.
gg_append_statement(entry_label);
}
SHOW_PARSE_END
}
+ RETURN_WHEN_HIJACKED;
+
// This needs to be an island that doesn't execute in-line. This is necessary
// when there isn't a GOBACK or GOTO or STOP RUN at the point where a
// [possibly implicit] PROGRAM END is encountered
sprintf(ach, "..accessible_program_list_" HOST_SIZE_T_PRINT_DEC,
(fmt_size_t)caller);
tree accessible_list_var_decl = gg_trans_unit_var_decl(ach);
- gg_assign( accessible_list_var_decl, gg_get_address_of(the_names_table) );
+ gg_assign( accessible_list_var_decl, gg_pointer_to_array(the_names_table) );
sprintf(ach, "..accessible_program_pointers_" HOST_SIZE_T_PRINT_DEC,
(fmt_size_t)caller);
tree accessible_programs_decl = gg_trans_unit_var_decl(ach);
- gg_assign( accessible_programs_decl, gg_get_address_of(the_constructed_table) );
+ gg_assign( accessible_programs_decl, gg_pointer_to_array(the_constructed_table) );
callers.insert(caller);
}
}
}
+#ifdef ENABLE_HIJACKING
+#pragma GCC diagnostic push
+#pragma GCC diagnostic ignored "-Wunused-function"
+static tree
+build_temporaryN(int N)
+ {
+ // Creates a typical FldNumericBin5 intermediate.
+ char achName[32];
+ sprintf(achName,"_funky_%d", N);
+ char *pszdata = xasprintf("_funky%d_data", N);
+ size_t bytes_to_allocate = 16;
+ gg_variable_scope_t vs_scope = vs_stack;
+ tree array_type = build_array_type_nelts(UCHAR, bytes_to_allocate);
+ tree data_decl_node = gg_define_variable(
+ array_type,
+ pszdata,
+ vs_scope);
+//// data_decl_node = null_pointer_node;
+ free(pszdata);
+
+ // This is the holy grail. With the initializer set to gg_pointer_to_array,
+ // we get N-squared behavior. Set to null_pointer_node, linear.
+ tree data_area = null_pointer_node;
+ if( data_decl_node != null_pointer_node )
+ {
+ data_area = gg_pointer_to_array(data_decl_node);
+ }
+
+ char *psz = xasprintf("_funky%d", N);
+ tree cobfield = gg_define_variable(cblc_field_type_node, psz, vs_stack);
+ free(psz);
+
+ tree data = null_pointer_node; // UCHAR_P, "data",
+ tree capacity = build_int_cst_type(SIZE_T, 16); // SIZE_T, "capacity",
+ tree allocated = build_int_cst_type(SIZE_T, 16); // SIZE_T, "allocated",
+ tree offset = build_int_cst_type(SIZE_T, 0); // SIZE_T, "offset",
+ tree name = gg_string_literal(achName); // CHAR_P, "name",
+ tree picture = gg_string_literal(""); // CHAR_P, "picture",
+ tree initial = null_pointer_node; // CHAR_P, "initial",
+ tree parent = null_pointer_node; // CHAR_P, "parent",
+ tree occurs_lower = build_int_cst_type(SIZE_T, 0); // SIZE_T, "occurs_lower",
+ tree occurs_upper = build_int_cst_type(SIZE_T, 0); // SIZE_T, "occurs_upper");
+ tree attr = build_int_cst_type(SIZE_T, intermediate_e); // SIZE_T, "attr",
+ tree type = build_int_cst_type(SCHAR, FldNumericBin5); // SCHAR, "type",
+ tree level = build_int_cst_type(SCHAR, 0); // SCHAR, "level",
+ tree digits = build_int_cst_type(SCHAR, 0); // SCHAR, "digits",
+ tree rdigits = build_int_cst_type(SCHAR, 0); // SCHAR, "rdigits",
+ tree tencoding = build_int_cst_type(INT, 111); // INT, "encoding",
+ tree alphabet = build_int_cst_type(INT, 0); // INT, "alphabet",
+
+ gg_structure_type_constructor(
+ cobfield,
+ data , // UCHAR_P, "data",
+ capacity, // SIZE_T, "capacity",
+ allocated, // SIZE_T, "allocated",
+ offset, // SIZE_T, "offset",
+ name, // CHAR_P, "name",
+ picture, // CHAR_P, "picture",
+ initial, // CHAR_P, "initial",
+ parent, // CHAR_P, "parent",
+ occurs_lower, // SIZE_T, "occurs_lower",
+ occurs_upper, // SIZE_T, "occurs_upper");
+ attr, // SIZE_T, "attr",
+ type, // SCHAR, "type",
+ level, // SCHAR, "level",
+ digits, // SCHAR, "digits",
+ rdigits, // SCHAR, "rdigits",
+ tencoding, // INT, "encoding",
+ alphabet); // INT, "alphabet",
+
+ if( data_decl_node != null_pointer_node )
+ {
+ gg_call(VOID,
+ "__gg__set_data_member",
+ gg_get_address_of(cobfield),
+ data_area,
+ NULL_TREE);
+ }
+
+ return cobfield;
+ }
+#pragma GCC diagnostic pop
+
static void
hijack_for_development(const char *funcname)
{
- /*
+ static const int N = 10000;
+ /* This routine is designed to allow the creation of a program-id program
+ without requiring the parser to supply parser_xxx calls.
- To make sure that things like global symbols and whatnot get initialized, you
- should probably create a source file that looks like this:
-
- identification division.
- program-id. prog.
- procedure division.
- call "dubner".
- end program prog.
- identification division.
- program-id. dubner.
- procedure division.
- goback.
- end program dubner.
-
- The first program will cause all of the parser_enter_program() and
- parser_division(procedure_div_e) stuff to be initialized. The second program,
- named "dubner", will be hijacked and bring you here. */
+ When your source code is a "program-id. dubner.", this routine gets
+ generated instead of the one in the source.
+ */
+ hijacked = true;
+ funcname = "main";
// Assume that funcname is lowercase with no hyphens
- enter_program_common(funcname, funcname);
+ gg_define_function(COBOL_FUNCTION_RETURN_TYPE,
+ funcname,
+ funcname,
+ NULL_TREE);
+
parser_display_literal("You have been hijacked by a program named \"dubner\"");
- gg_insert_into_assemblerf("%s HIJACKED DUBNER CODE START", ASM_COMMENT_START);
+ gg_insert_into_assemblerf("%s HIJACKED CODE START", ASM_COMMENT_START);
+
- for(int i=0; i<10; i++)
+ tree xxx = gg_define_int("xxx");
+ tree yyy = gg_define_int("yyy");
+ tree zzz = gg_define_int("zzz");
+
+ fprintf(stderr, "N is %d\n", N);
+ for(int i=0; i<N; i++)
{
- char ach[64];
- sprintf(ach, "Hello, world - %d", i+1);
+ IF( gg_bitwise_and(xxx, integer_one_node), ne_op, integer_zero_node )
+ {
+ gg_assign(yyy, xxx);
+ }
+ ELSE
+ {
+ gg_assign(zzz, xxx);
+ }
+ ENDIF
+ }
- gg_call(VOID,
- "puts",
- build_string_literal(strlen(ach)+1, ach),
- NULL_TREE);
+ gg_insert_into_assemblerf("%s HIJACKED CODE END", ASM_COMMENT_START);
+ }
+
+static void
+hijacker()
+ {
+ /* The code here is activated when the program-id is "hijack". It's not
+ really a hijacking; all of the code in the "hijack" program gets laid
+ down. The code here is injected just prior to the parser_exit() stuff
+ in the COBOL source code. */
+
+ parser_display_literal("You have been hijacked by a program named \"hijack\"");
+ gg_insert_into_assemblerf("%s HIJACKED CODE START", ASM_COMMENT_START);
+
+#if 0
+
+ cbl_field_t *faaa = register_find("aaa");
+ cbl_field_t *fbbb = register_find("bbb");
+ cbl_field_t *fddd = register_find("ddd");
+ cbl_field_t *fxxx = register_find("xxx");
+
+ cbl_refer_t aaa(faaa);
+ cbl_refer_t bbb(fbbb);
+ cbl_refer_t ddd(fddd);
+
+ fxxx->var_decl_node = build_temporaryN(0);
+
+ static const int N = 1000;
+ fprintf(stderr, "N is %d\n", N);
+ for(int i=0; i<N; i++)
+ {
+ parser_op(ddd,
+ aaa,
+ '+',
+ bbb,
+ NULL);
}
+#endif
- gg_insert_into_assemblerf("%s HIJACKED DUBNER CODE END", ASM_COMMENT_START);
- gg_return(0);
+ gg_insert_into_assemblerf("%s HIJACKED CODE END", ASM_COMMENT_START);
}
+#endif
static void
conditional_abs(tree source, const cbl_field_t *field)
tree immediate_parent,
tree new_var_decl)
{
- tree constr = make_node(CONSTRUCTOR);
- TREE_TYPE(constr) = cblc_field_type_node;
- TREE_STATIC(constr) = 1;
- TREE_CONSTANT(constr) = 1;
-
- tree next_field = TYPE_FIELDS(cblc_field_type_node);
- // We are going to create the constructors by walking the linked
- // list of FIELD_DECLs. We must do it in the same order as the
- // structure creation code in create_cblc_field_t()
+ // There is a bug in the GCC compiler. For some optimizations and some
+ // settings of -fpie, pathological N-squared time in the middle end can
+ // happen when a structure on the stack has an initialized member pointing
+ // to another memory area on the stack. In those cases, we are going to
+ // initialize the pointer to zero, and then call a function to initialize
+ // the data member. That hides things from the compiler's optimization
+ // phases.
- // UCHAR_P, "data",
- CONSTRUCTOR_APPEND_ELT( CONSTRUCTOR_ELTS(constr),
- next_field,
- data_area );
- next_field = TREE_CHAIN(next_field);
-
- // SIZE_T, "capacity",
- CONSTRUCTOR_APPEND_ELT( CONSTRUCTOR_ELTS(constr),
- next_field,
- build_int_cst_type( SIZE_T,
- new_var->data.capacity()) );
- next_field = TREE_CHAIN(next_field);
-
- // SIZE_T, "allocated",
- if( data_area != null_pointer_node )
+ bool read_only = !!TREE_READONLY(new_var_decl);
+ if( new_var->type == FldLiteralN )
{
- CONSTRUCTOR_APPEND_ELT( CONSTRUCTOR_ELTS(constr),
- next_field,
- build_int_cst_type( SIZE_T,
- new_var->data.capacity()) );
+ // For a FldLiteralN the new_var_decl is a number, not a
+ // a cblc_field_t.
+ read_only = true;
}
- else
- {
- CONSTRUCTOR_APPEND_ELT( CONSTRUCTOR_ELTS(constr),
- next_field,
- build_int_cst_type( SIZE_T,
- 0) );
- }
-
- next_field = TREE_CHAIN(next_field);
-
- // SIZE_T, "offset",
- CONSTRUCTOR_APPEND_ELT( CONSTRUCTOR_ELTS(constr),
- next_field,
- build_int_cst_type(SIZE_T, new_var->offset) );
-
- next_field = TREE_CHAIN(next_field);
- // CHAR_P, "name",
- CONSTRUCTOR_APPEND_ELT( CONSTRUCTOR_ELTS(constr),
- next_field,
- gg_string_literal(new_var->name) );
- next_field = TREE_CHAIN(next_field);
-
- // CHAR_P, "picture",
- CONSTRUCTOR_APPEND_ELT( CONSTRUCTOR_ELTS(constr),
- next_field,
- gg_string_literal(new_var->data.picture) );
- next_field = TREE_CHAIN(next_field);
-
- // CHAR_P, "initial",
- if( length_of_initial_string == 0 || !new_var->data.has_initial_value() )
+ if( new_var->type == FldAlphanumeric && new_var->attr & intermediate_e )
{
- CONSTRUCTOR_APPEND_ELT( CONSTRUCTOR_ELTS(constr),
- next_field,
- null_pointer_node );
+ // We need not to mess with the intermediate malloc() logic.
+ read_only = true;
}
- else
+
+ if( new_var->attr & external_e )
{
- CONSTRUCTOR_APPEND_ELT( CONSTRUCTOR_ELTS(constr),
- next_field,
- build_string_literal(length_of_initial_string, new_initial) );
+ // We need not to mess with the intermediate malloc() logic.
+ read_only = true;
}
- next_field = TREE_CHAIN(next_field);
-
- // CHAR_P, "parent",
- CONSTRUCTOR_APPEND_ELT( CONSTRUCTOR_ELTS(constr),
- next_field,
- immediate_parent ? gg_get_address_of(immediate_parent) : null_pointer_node );
- next_field = TREE_CHAIN(next_field);
- // SIZE_T, "occurs_lower",
- CONSTRUCTOR_APPEND_ELT( CONSTRUCTOR_ELTS(constr),
- next_field,
- build_int_cst_type(SIZE_T, new_var->occurs.bounds.lower) );
- next_field = TREE_CHAIN(next_field);
-
- // SIZE_T, "occurs_upper");
- CONSTRUCTOR_APPEND_ELT( CONSTRUCTOR_ELTS(constr),
- next_field,
- build_int_cst_type(SIZE_T, new_var->occurs.bounds.upper) );
- next_field = TREE_CHAIN(next_field);
-
- // SIZE_T, "attr",
- CONSTRUCTOR_APPEND_ELT( CONSTRUCTOR_ELTS(constr),
- next_field,
- build_int_cst_type(SIZE_T, new_var->attr) );
- next_field = TREE_CHAIN(next_field);
-
- // SCHAR, "type",
- CONSTRUCTOR_APPEND_ELT( CONSTRUCTOR_ELTS(constr),
- next_field,
- build_int_cst_type(SCHAR, new_var->type) );
- next_field = TREE_CHAIN(next_field);
-
- // SCHAR, "level",
- CONSTRUCTOR_APPEND_ELT( CONSTRUCTOR_ELTS(constr),
- next_field,
- build_int_cst_type(SCHAR, new_var->level) );
- next_field = TREE_CHAIN(next_field);
-
- // SCHAR, "digits",
- CONSTRUCTOR_APPEND_ELT( CONSTRUCTOR_ELTS(constr),
- next_field,
- build_int_cst_type(SCHAR, new_var->data.digits) );
- next_field = TREE_CHAIN(next_field);
-
- // SCHAR, "rdigits",
- CONSTRUCTOR_APPEND_ELT( CONSTRUCTOR_ELTS(constr),
- next_field,
- build_int_cst_type(SCHAR, new_var->data.rdigits) );
- next_field = TREE_CHAIN(next_field);
-
- // INT, "encoding",
// For FldLiteralN we force the encoding to be ASCII.
// See initial_from_initial() for an explanation.
// For FldClass, we force the encoding to be UTF32; see
-
cbl_encoding_t encoding;
if( new_var->type == FldLiteralN )
{
encoding = new_var->codeset.encoding;
}
- CONSTRUCTOR_APPEND_ELT(CONSTRUCTOR_ELTS(constr),
- next_field,
- build_int_cst_type(INT, encoding));
- next_field = TREE_CHAIN(next_field);
-
- // INT, "alphabet",
- CONSTRUCTOR_APPEND_ELT(CONSTRUCTOR_ELTS(constr),
- next_field,
- build_int_cst_type(INT, new_var->codeset.alphabet));
- next_field = TREE_CHAIN(next_field);
-
- DECL_INITIAL(new_var_decl) = constr;
+ tree data = data_area ;
+ tree capacity = build_int_cst_type( SIZE_T, new_var->data.capacity());
+ tree allocated;
+ if( data_area != null_pointer_node )
+ {
+ allocated = build_int_cst_type(SIZE_T, new_var->data.capacity());
+ }
+ else
+ {
+ allocated = build_int_cst_type(SIZE_T, 0) ;
+ }
+ tree offset = build_int_cst_type(SIZE_T, new_var->offset);
+ tree name = gg_string_literal(new_var->name);
+ tree picture = gg_string_literal(new_var->data.picture);
+ tree initial;
+ if( length_of_initial_string == 0 || !new_var->data.has_initial_value() )
+ {
+ initial = null_pointer_node;
+ }
+ else
+ {
+ initial = build_string_literal(length_of_initial_string, new_initial);
+ }
+ tree parent = immediate_parent ? gg_get_address_of(immediate_parent)
+ : null_pointer_node ;
+ tree occurs_lower = build_int_cst_type(SIZE_T, new_var->occurs.bounds.lower);
+ tree occurs_upper = build_int_cst_type(SIZE_T, new_var->occurs.bounds.upper);
+ tree attr = build_int_cst_type(SIZE_T, new_var->attr) ;
+ tree type = build_int_cst_type(SCHAR, new_var->type) ;
+ tree level = build_int_cst_type(SCHAR, new_var->level) ;
+ tree digits = build_int_cst_type(SCHAR, new_var->data.digits) ;
+ tree rdigits = build_int_cst_type(SCHAR, new_var->data.rdigits) ;
+ tree tencoding = build_int_cst_type(INT, encoding);
+ tree alphabet = build_int_cst_type(INT, new_var->codeset.alphabet);
+
+ if( !read_only )
+ {
+ data = null_pointer_node;
+ }
+
+ gg_structure_type_constructor(
+ new_var_decl,
+ data , // UCHAR_P, "data",
+ capacity, // SIZE_T, "capacity",
+ allocated, // SIZE_T, "allocated",
+ offset, // SIZE_T, "offset",
+ name, // CHAR_P, "name",
+ picture, // CHAR_P, "picture",
+ initial, // CHAR_P, "initial",
+ parent, // CHAR_P, "parent",
+ occurs_lower, // SIZE_T, "occurs_lower",
+ occurs_upper, // SIZE_T, "occurs_upper");
+ attr, // SIZE_T, "attr",
+ type, // SCHAR, "type",
+ level, // SCHAR, "level",
+ digits, // SCHAR, "digits",
+ rdigits, // SCHAR, "rdigits",
+ tencoding, // INT, "encoding",
+ alphabet); // INT, "alphabet",
+
+
+ if( !read_only && data_area != null_pointer_node )
+ {
+ gg_call(VOID,
+ "__gg__set_data_member",
+ gg_get_address_of(new_var_decl),
+ data_area,
+ NULL_TREE);
+ }
}
static void
field->var_decl_node = gg_define_variable( cblc_field_type_node,
ach,
vs_file_static);
+ TREE_READONLY(field->var_decl_node) = 1;
+ TREE_USED(field->var_decl_node) = 1;
+ TREE_STATIC(field->var_decl_node) = 1;
+ DECL_PRESERVE_P (field->var_decl_node) = 1;
+
actually_create_the_static_field(
field,
converted,
field->data.original(),
NULL_TREE,
field->var_decl_node);
- TREE_READONLY(field->var_decl_node) = 1;
- TREE_USED(field->var_decl_node) = 1;
- TREE_STATIC(field->var_decl_node) = 1;
- DECL_PRESERVE_P (field->var_decl_node) = 1;
}
void
NULL,
vs_stack);
gg_assign( member(new_var->var_decl_node, "data"),
- gg_get_address_of(data_decl_node) );
+ gg_pointer_to_array(data_decl_node) );
}
cbl_refer_t wrapper;
wrapper.field = new_var;
SHOW_PARSE_END
}
+ RETURN_WHEN_HIJACKED;
+
if( new_var->level == 1 && new_var->occurs.bounds.upper )
{
if( new_var->data.memsize < new_var->data.capacity() * new_var->occurs.bounds.upper )
array_type,
achDataName,
vs_external);
- data_area = gg_get_address_of(new_var->data_decl_node);
+ data_area = gg_pointer_to_array(new_var->data_decl_node);
goto actual_allocate;
}
array_type,
achDataName,
vs_external);
- data_area = gg_get_address_of(new_var->data_decl_node);
+ data_area = gg_pointer_to_array(new_var->data_decl_node);
}
else
{
array_type,
achDataName,
vs_scope);
- data_area = gg_get_address_of(new_var->data_decl_node);
+ data_area = gg_pointer_to_array(new_var->data_decl_node);
}
}
}
#include "cobol-system.h"
#include "coretypes.h"
#include "tree.h"
+#include "langhooks.h"
#include "tree-iterator.h"
#include "stringpool.h"
#include "cgraph.h"
+#include "stor-layout.h"
#include "toplev.h"
#include "function.h"
#include "fold-const.h"
// ./libcpp/include/line-map.h
// ./libcpp/location-example.txt
+#if 0
+ if( TREE_CODE(stmt) == GOTO_EXPR )
+ {
+ fprintf(stderr, "Laying down a GOTO\n");
+ }
+#endif
+
gcc_assert( gg_trans_unit.function_stack.size() );
TREE_SIDE_EFFECTS(stmt) = 1; // If an expression has no side effects,
char *
gg_show_type(tree type)
{
+ tree original_type = type;
if( !type )
{
cbl_internal_error("The given type is NULL, and that is just not fair");
cbl_internal_error("Unknown type %d", TREE_CODE(type));
}
+ if( DECL_P(original_type) && TREE_STATIC(original_type) )
+ {
+ strcat(ach, " static");
+ }
+
+ if( DECL_P(original_type) && TREE_READONLY(original_type) )
+ {
+ strcat(ach, " readonly");
+ }
+
return ach;
}
}
tree
-gg_find_field_in_struct(const tree base, const char *field_name)
+gg_get_structure_type_decl(const char *type_name, ...)
{
- // Finds and returns the field_decl for the named member. 'base' can be
- // a structure or a pointer to a structure.
- tree type = TREE_TYPE(base);
- tree rectype;
- if( POINTER_TYPE_P (type) )
- {
- tree pointer_type = TREE_TYPE(base);
- rectype = TREE_TYPE(pointer_type);
- }
- else
- {
- // Assuming a struct (or union), pick up the record_type
- rectype = TREE_TYPE(base);
- }
+ tree record_type = make_node (RECORD_TYPE);
- tree id_of_field = get_identifier(field_name);
+ tree type_decl = build_decl(UNKNOWN_LOCATION,
+ TYPE_DECL,
+ get_identifier (type_name),
+ record_type);
+ TYPE_NAME (record_type) = type_decl;
+ TYPE_STUB_DECL (record_type) = type_decl;
+ DECL_ARTIFICIAL (type_decl) = 1;
- tree field_decl = NULL_TREE;
+ va_list ap;
+ va_start (ap, type_name);
- tree next_value = TYPE_FIELDS(rectype);
+ tree first = NULL_TREE;
+ tree *link = &first;
- // Look through the chain of fields for a match to ours. This is, in the
- // limit, an O(N^2) computational burden. But structures usually small, so we
- // probably don't have to figure out how to make it faster.
- while( next_value )
+ for (;;)
{
- if( DECL_NAME(next_value) == id_of_field )
+ tree arg_type = va_arg (ap, tree);
+ if (!arg_type)
{
- field_decl = next_value;
break;
}
- next_value = TREE_CHAIN(next_value);
- }
-
- if( !field_decl )
- {
- cbl_internal_error("Somebody asked for the field %s.%s, which does not exist",
- IDENTIFIER_POINTER(DECL_NAME(base)),
- field_name);
- }
-
- return field_decl;
- }
-
-static tree
-gg_start_building_a_union(const char *type_name, tree type_context)
- {
- // type_context is current_function->function_decl for union local
- // to a function.
-
- // It is translation_unit_decl for unions common to all functions
- // We want to return the type_decl for an empty union
+ const char *member_name = va_arg (ap, const char *);
- // First, create the record_type whose values will eventually
- // be the chain of of the struct's fields:
+ tree member_decl = build_decl (UNKNOWN_LOCATION,
+ FIELD_DECL,
+ get_identifier (member_name),
+ arg_type);
- tree uniontype = make_node(UNION_TYPE);
- TYPE_CONTEXT(uniontype) = type_context;
- TYPE_SIZE_UNIT(uniontype) = integer_zero_node;
- TYPE_SIZE(uniontype) = integer_zero_node;
- TYPE_NAME(uniontype) = get_identifier(type_name);
-
- TYPE_MODE_RAW(uniontype) = TYPE_MODE (intTI_type_node);
-
- // We need a type_decl for the record_type:
- tree typedecl = make_node(TYPE_DECL);
-
- // The type of the type_decl is the record_type:
- TREE_TYPE(typedecl) = uniontype;
-
- SET_TYPE_ALIGN(uniontype, 16);
-
- // The chain element of the record_type points back to the type_decl:
- TREE_CHAIN(uniontype) = typedecl;
-
- return typedecl;
- }
-
-static tree
-gg_start_building_a_struct(const char *type_name, tree type_context)
- {
- // type_context is current_function->function_decl for structures local
- // to a function.
-
- // It is translation_unit_decl for structures common to all functions
-
- // We want to return the type_decl for an empty struct
-
- // First, create the record_type whose values will eventually
- // be the chain of of the struct's fields:
-
- tree recordtype = make_node(RECORD_TYPE);
- TYPE_CONTEXT(recordtype) = type_context;
- TYPE_SIZE_UNIT(recordtype) = integer_zero_node;
- TYPE_SIZE(recordtype) = integer_zero_node;
- TYPE_NAME(recordtype) = get_identifier(type_name);
-
- TYPE_MODE_RAW(recordtype) = BLKmode;
-
- // We need a type_decl for the record_type:
- tree typedecl = make_node(TYPE_DECL);
+ DECL_CONTEXT (member_decl) = record_type;
+ *link = member_decl;
+ link = &DECL_CHAIN (member_decl);
+ }
+ va_end (ap);
- // The type of the type_decl is the record_type:
- TREE_TYPE(typedecl) = recordtype;
+ TYPE_FIELDS (record_type) = first;
- SET_TYPE_ALIGN(recordtype, 8);
+ layout_type (record_type);
+// lang_hooks.decls.pushdecl (type_decl);
- // The chain element of the record_type points back to the type_decl:
- TREE_CHAIN(recordtype) = typedecl;
+ gcc_assert (TREE_CODE (record_type) == RECORD_TYPE);
+ gcc_assert (TYPE_NAME (record_type));
+ gcc_assert (TREE_CODE (TYPE_NAME (record_type)) == TYPE_DECL);
+ gcc_assert (TREE_TYPE (TYPE_NAME (record_type)) == record_type);
- return typedecl;
+ return record_type;
}
-static void
-gg_add_field_to_structure(const tree type_of_field, const char *name_of_field, tree struct_type_decl)
+void
+gg_structure_type_constructor(tree record_decl, ...)
{
- // We're given the struct_type_decl.
- // Append the new field to that type_decl's record_type's chain:
- tree struct_record_type = TREE_TYPE(struct_type_decl);
-
- bool is_union = TREE_CODE((struct_record_type)) == UNION_TYPE;
-
- tree id_of_field = get_identifier (name_of_field);
-
- // Create the new field:
- tree new_field_decl = build_decl( gg_token_location(),
- FIELD_DECL,
- id_of_field,
- type_of_field);
-
- // Establish the machine mode for the field_decl:
- SET_DECL_MODE(new_field_decl, TYPE_MODE(type_of_field));
-
- // Establish the context of the new field as being the record_type
- DECL_CONTEXT (new_field_decl) = struct_record_type;
+ // Given a record_decl and a NULL_TREE-terminated list of initial values, one
+ // for each member of the record_decl's type, this routine constructs and
+ // applies the constructor for it.
- // Establish the size of the new field as being the same as its prototype:
- DECL_SIZE(new_field_decl) = TYPE_SIZE(type_of_field); // This is in bits
- DECL_SIZE_UNIT(new_field_decl) = TYPE_SIZE_UNIT(type_of_field); // This is in bytes
+ // Note that the NULL_TREE terminator is not actually accessed if the list
+ // of values equal to (or greater than) the number of elements in the
+ // record_type. But it's there to allow an early termination.
- // We need to establish the offset and bit offset of the new node.
- // Empirically, this seems to be done on 16-bit boundaries, with DECL_FIELD_OFFSET
- // in units of N*16 bytes, and FIELD_BIT_OFFSET being offsets in bits from the DECL_FIELD_OFFSET
+ // If the list is too short and is not terminated, then the behavior is
+ // unpredictable.
+ tree record_type = TREE_TYPE(record_decl);
- // We calculate our desired offset in bits:
-
- // Pick up the current size, in bytes, of the record_type:
- long offset_in_bytes = TREE_INT_CST_LOW(TYPE_SIZE_UNIT(struct_record_type));
-
- static const int MAGIC_NUMBER_SIXTEEN = 16 ;
- static const int BITS_IN_A_BYTE = 8 ;
-
- // We know the offset_in_bytes, which is the size, of the structure with
- // its current members.
-
- //long type_size = TREE_INT_CST_LOW(TYPE_SIZE_UNIT(type_of_field));
- long type_align_in_bits = TYPE_ALIGN(type_of_field);
- long type_align_in_bytes = type_align_in_bits/BITS_IN_A_BYTE;
-
- // As per the Amd64 ABI, we need to set the structure's type alignment to be
- // that of most strictly aligned component:
- // This is the current restriction:
- long struct_align_in_bits = TYPE_ALIGN(TREE_TYPE(struct_type_decl));
- if( type_align_in_bits > struct_align_in_bits )
+ int top_level_members = 0;
+ for(tree f = TYPE_FIELDS(record_type); f; f = TREE_CHAIN(f))
{
- // The new one is the new champion
- SET_TYPE_ALIGN(TREE_TYPE(struct_type_decl), type_align_in_bits );
+ top_level_members += 1;
}
- // We know struct_type_decl is a record_type, so we can sneak through this comparison
- if( type_of_field == TREE_TYPE(struct_type_decl) )
- {
- printf(" It is a record_type\n");
- }
+ vec<constructor_elt, va_gc> *elts = NULL;
+ tree next_field = TYPE_FIELDS(record_type);
- // Bump up the offset until we are aligned:
- while( offset_in_bytes % type_align_in_bytes)
- {
- offset_in_bytes += 1;
- }
+ va_list ap;
+ va_start (ap, record_decl);
- if( is_union )
- {
- // Turn that into the bytes/bits offsets of the new field:
- DECL_FIELD_OFFSET(new_field_decl) = build_int_cst_type (SIZE_T, 0);
- DECL_FIELD_BIT_OFFSET(new_field_decl) = build_int_cst_type (bitsizetype, 0);
+ // We are going to create the constructors by walking the linked
+ // list of FIELD_DECLs. We must do it in the same order as the
+ // structure creation code in create_cblc_field_t()
- // The size of a union is the size of its largest member:
- offset_in_bytes = std::max(offset_in_bytes, (long)TREE_INT_CST_LOW(DECL_SIZE_UNIT(new_field_decl)));
- }
- else
+ int index = 0;
+ while(index < top_level_members)
{
- // Turn that into the bytes/bits offsets of the new field:
- long field_offset = (offset_in_bytes/MAGIC_NUMBER_SIXTEEN)*MAGIC_NUMBER_SIXTEEN;
- long field_bit_offset = (offset_in_bytes - field_offset) * BITS_IN_A_BYTE;
- DECL_FIELD_OFFSET(new_field_decl) = build_int_cst_type (SIZE_T, field_offset);;
- DECL_FIELD_BIT_OFFSET(new_field_decl) = build_int_cst_type (bitsizetype, field_bit_offset);
-
- // This was done empirically to make our generated code match that of a C program
- SET_DECL_OFFSET_ALIGN(new_field_decl, 128);
+ tree value = va_arg (ap, tree);
+ if( !value )
+ {
+ break;
+ }
- // And now we need to update the size of the record type:
- offset_in_bytes += TREE_INT_CST_LOW(DECL_SIZE_UNIT(new_field_decl));
+ CONSTRUCTOR_APPEND_ELT( elts,
+ next_field,
+ value );
+ next_field = DECL_CHAIN(next_field);
+ index += 1;
}
+ va_end (ap);
- TYPE_SIZE_UNIT(struct_record_type) = build_int_cst_type (SIZE_T, offset_in_bytes); // In bytes
- TYPE_SIZE(struct_record_type) = build_int_cst_type (bitsizetype, offset_in_bytes*BITS_IN_A_BYTE); // In bits
-
- if( !TYPE_FIELDS(struct_record_type) )
- {
- // This is the first variable of the chain:
- TYPE_FIELDS(struct_record_type) = new_field_decl;
- }
- else
- {
- // We need to tack the new one onto an already existing chain:
- chainon(TYPE_FIELDS(struct_record_type), new_field_decl);
- }
+ tree constr = build_constructor (record_type, elts);
+ DECL_INITIAL(record_decl) = constr;
}
-void
-gg_get_struct_type_decl(tree struct_type_decl, int count, va_list params)
+tree
+gg_find_field_in_struct(const tree base, const char *field_name)
{
- while( count-- )
+ // Finds and returns the field_decl for the named member. 'base' can be
+ // a structure or a pointer to a structure.
+ tree type = TREE_TYPE(base);
+ tree rectype;
+ if( POINTER_TYPE_P (type) )
{
- tree field_type = va_arg(params, tree);
- const char *name = va_arg(params, const char *);
- gg_add_field_to_structure(field_type, name, struct_type_decl);
+ tree pointer_type = TREE_TYPE(base);
+ rectype = TREE_TYPE(pointer_type);
}
- // Note: On 2022-02-18 I removed the call to gg_append_var_decl, which
- // chains the type_decl on the function block. I don't remember why I
- // thought it was necessary. It makes no difference for COBOL compilations.
- //
- // But I must have copied it from a C compilation example.
- //
- // I removed it so that I could create type_decls outside of a function.
- // I know not what the long-term implications might be.
- //
- // You have been served notice.
- //
- // struct_type_decl is the type_decl for our structure. We need to
- // append it to the list of variables in order to use it:
- // The following function call is misnamed. It can take struct type_decls
- //gg_append_var_decl(struct_type_decl);
- }
-
-void
-gg_get_union_type_decl(tree union_type_decl, int count, va_list params)
- {
- while( count-- )
+ else
{
- tree field_type = va_arg(params, tree);
- const char *name = va_arg(params, const char *);
- gg_add_field_to_structure(field_type, name, union_type_decl);
+ // Assuming a struct (or union), pick up the record_type
+ rectype = TREE_TYPE(base);
}
- }
-
-tree
-gg_get_local_struct_type_decl(const char *type_name, int count, ...)
- {
- tree struct_type_decl = gg_start_building_a_struct(type_name, current_function->function_decl);
-
- va_list params;
- va_start(params, count);
-
- gg_get_struct_type_decl(struct_type_decl, count, params);
-
- va_end(params);
-
- // To use the struct_type_decl, you'll need to execute
- // the following to turn it into a var_decl:
- // tree var_decl = gg_define_variable( TREE_TYPE(struct_type_decl),
- // var_name,
- // vs_static);
- return struct_type_decl;
- }
-
-tree
-gg_get_filelevel_struct_type_decl(const char *type_name, int count, ...)
- {
- tree struct_type_decl = gg_start_building_a_struct(type_name, gg_trans_unit.trans_unit_decl);
-
- va_list params;
- va_start(params, count);
-
- gg_get_struct_type_decl(struct_type_decl, count, params);
-
- va_end(params);
-
- // To use the struct_type_decl, you'll need to execute
- // the following to turn it into a var_decl:
- // tree var_decl = gg_define_variable( TREE_TYPE(struct_type_decl),
- // var_name,
- // vs_static);
- return struct_type_decl;
- }
-
-tree
-gg_get_filelevel_union_type_decl(const char *type_name, int count, ...)
- {
- tree struct_type_decl = gg_start_building_a_union(type_name, gg_trans_unit.trans_unit_decl);
-
- va_list params;
- va_start(params, count);
-
- gg_get_union_type_decl(struct_type_decl, count, params);
-
- va_end(params);
- // To use the struct_type_decl, you'll need to execute
- // the following to turn it into a var_decl:
- // tree var_decl = gg_define_variable( TREE_TYPE(struct_type_decl),
- // var_name,
- // vs_static);
- return struct_type_decl;
- }
+ tree id_of_field = get_identifier(field_name);
-tree
-gg_define_local_struct(const char *type_name, const char * var_name, int count, ...)
- {
- // Builds a structure, declares it as a static variable in the current function,
- // and returns the var_decl for it.
- tree struct_type_decl = gg_start_building_a_struct(type_name, current_function->function_decl);
+ tree field_decl = NULL_TREE;
- va_list params;
- va_start(params, count);
+ tree next_value = TYPE_FIELDS(rectype);
- gg_get_struct_type_decl(struct_type_decl, count, params);
+ // Look through the chain of fields for a match to ours. This is, in the
+ // limit, an O(N^2) computational burden. But structures usually small, so we
+ // probably don't have to figure out how to make it faster.
+ while( next_value )
+ {
+ if( DECL_NAME(next_value) == id_of_field )
+ {
+ field_decl = next_value;
+ break;
+ }
+ next_value = TREE_CHAIN(next_value);
+ }
- va_end(params);
- // We now have a complete struct_type_decl, whose TREE_TYPE is the
- // the type we need when declaring it.
+ if( !field_decl )
+ {
+ cbl_internal_error("Somebody asked for the field %s.%s, which does not exist",
+ IDENTIFIER_POINTER(DECL_NAME(base)),
+ field_name);
+ }
- // And with that done, we can actually define the storage:
- tree var_decl = gg_define_variable( TREE_TYPE(struct_type_decl),
- var_name,
- vs_static);
- return var_decl;
+ return field_decl;
}
tree
return retval;
}
-tree
-gg_assign_to_structure(tree var_decl_struct, const char *field, const tree source)
- {
- // The C equivalent: "struct.field = source"
- tree component_ref = gg_struct_field_ref(var_decl_struct,field);
- gg_assign(component_ref,source);
- return component_ref;
- }
-
-tree
-gg_assign_to_structure(tree var_decl_struct, const char *field, int N)
- {
- // The C equivalent: "struct.field = N"
- tree component_ref = gg_struct_field_ref(var_decl_struct,field);
- gg_assign(component_ref,build_int_cst(integer_type_node, N));
- return component_ref;
- }
-
static tree
gg_create_assembler_name(const char *cobol_name)
{
}
extern tree
-gg_define_array(tree type_decl, const char *name, size_t size, gg_variable_scope_t scope)
+gg_define_array(tree type_decl,
+ const char *name,
+ size_t size,
+ gg_variable_scope_t scope)
{
tree array_type = build_array_type_nelts(type_decl, size);
return gg_define_variable(array_type, name, scope);
// In order to do that, this fellow's "addressable" bit has to be on, otherwise
// the GIMPLE reducer creates a temporary variable, sets its value to var_decl's,
- // and returns the pointer to the temp. I suppose this has something to do with
- // pass by reference and pass by value, but it makes my head hurt, and, frankly,
- // I'll take the dangerous road.
+ // and returns the pointer to the temp.
+
+ tree type = TREE_TYPE (var_decl);
+ if( TREE_CODE (type) == ARRAY_TYPE )
+ {
+ cbl_internal_error("%s:%d: Must not call here with %s",
+ __func__,
+ __LINE__,
+ "ARRAY_TYPE");
+ }
TREE_ADDRESSABLE(var_decl) = 1;
TREE_USED(var_decl) = 1;
- return build1( ADDR_EXPR,
- build_pointer_type (TREE_TYPE(var_decl)),
- var_decl);
+ return build_fold_addr_expr(var_decl);
+ }
+
+tree
+gg_pointer_to_array(tree expr)
+ {
+ tree type = TREE_TYPE (expr);
+
+ if (TREE_CODE (type) != ARRAY_TYPE)
+ {
+ cbl_internal_error("%s:%d: Must not call here with non-%s",
+ __func__,
+ __LINE__,
+ "ARRAY_TYPE");
+ }
+
+ /* Arrays: produce &(expr[lower_bound]), i.e. pointer to first element,
+ not &expr, which would be pointer-to-array. */
+ tree domain = TYPE_DOMAIN (type);
+ tree idx_type = domain ? TREE_TYPE (domain) : integer_type_node;
+ tree first_idx =
+ (domain && TYPE_MIN_VALUE (domain))
+ ? TYPE_MIN_VALUE (domain)
+ : build_int_cst (idx_type, 0);
+
+ tree elem_ref = build4 (ARRAY_REF,
+ TREE_TYPE (type), /* element type */
+ expr,
+ first_idx,
+ NULL_TREE,
+ NULL_TREE);
+
+ return build_fold_addr_expr (elem_ref);
}
tree
*label_expr = build1(LABEL_EXPR, void_type_node, label_decl);
}
-// Used for implementing SECTIONS and PARAGRAPHS. When you have a
-// void *pointer = &&label, gg_goto is the same as
-// goto *pointer
void
gg_goto(tree var_decl_pointer)
{
+ // This routine takes a label_decl_node, and creates a GOTO expression to it.
+ // Currently it is unused, and one should be very wary of using it. I used
+ // to use it for implementing things like computed gotos, and pseudo-returns
+ // from PERFORMs. The trouble is that it leads to explosions in the Control
+ // Flow Graph, because the middle end basically has to assume that a
+ // JMP *PTR could reference any of all the symbols in the program. So, when
+ // I did that, when any PERFORM returned through a JMP *PTR, it led to
+ // O(M*N) behavior, where M was the number of performs and N was the number
+ // of paragraph and section procedures.
+
+ // To speed things up, I learned how to create switch statements.
tree go_to = build1_loc(gg_token_location(),
GOTO_EXPR,
void_type_node,
#define SCHAR signed_char_type_node
#define UCHAR unsigned_char_type_node
#define SHORT short_integer_type_node
+#define SHORT_P build_pointer_type(short_integer_type_node)
#define USHORT short_unsigned_type_node
#define WCHAR short_unsigned_type_node
#define INT integer_type_node
// This structure contains state variables for a single function.
+ bool initialized; // Starts off false; used for one-time initialization
+
const char *our_unmangled_name; // This is the original name
const char *our_name; // This is our mangled name
tree function_address;
size_t our_symbol_table_index;
+ bool has_initial; // The program-id has the INITIAL clause.
+ bool has_recursive; // The program-id has the RECURSIVE clause.
// The function_decl is fundamental to many, many things
tree function_decl;
struct cbl_proc_t *current_section;
struct cbl_proc_t *current_paragraph;
- tree void_star_temp; // At the end of every paragraph and section, we
- // // we need a variable "void *temp" to hold a
- // // label for one instruction. Rather than clutter
- // // up the code with temporaries, we use this one
- // // instance instead.
+ // This carries an indirect pointer reference to RETURN-CODE
+ tree var_decl_return;
tree first_time_through;
// decremented and a return is created. When the counter is 1, the
// EXIT program is treated as a CONTINUE.
tree called_by_main_counter;
+
+ // We used to use indirect jumps to implement "pseudo-return" from PERFORM
+ // <proc> statements. But that led to N-squared complexity in the Control
+ // Flow Graph, because the middle-end can't make assumptions about the
+ // target of the JMP *%rax; as far as the middle-end is concerned *any*
+ // label in the program could be a target.
+ //
+ // We are now reducing the complexity to linear by using a switch()
+ // statement on an identifier. The following map collects the indexes
+ // used for the switch statement.
+
+ // In order to reduce the complexity of the Control Flow Graph, we build a
+ // an array of all paragraphs. For each such paragraph, we also build a
+ // vector of of the return locations of PERFORM statements that target it.
+ // Those tables are used to create one dispatching switch statement per
+ // paragraph. Each switch statements has exactly one CASE for each PERFORM
+ // of the paragraph, each CASE contains a GOTO the return location of that
+ // PERFORM.
+ //
+ // The map uses the paragraph's proc_t * as a key. The payload is the
+ // index into the vector of vectors.
+
+ std::vector<void *> list_of_procedures;
+
+ // The following is an SIZE_T variable node. It is set by every PERFORM
+ // statement to establish where the end-of-paragraph dispatch switch picks
+ // a GOTO statement for the return.
+ tree pseudo_return_index;
+
+ // The ENTRY statement creates alternative entry point to a program-id. We
+ // implement that as a SWITCH_EXPR. At the main entry point for a
+ // program-id, we check to see if an alternative entry point has been
+ // established. If so, we jump to the SWITCH statement which dispatches
+ // execution to the alternate location.
+ tree entry_switch_goto;
+ tree entry_switch_label;
+ std::vector<tree> entry_goto_expressions;
};
struct cbl_translation_unit_t
// struct creation and field access
// Create struct, and access a field in a struct
-extern tree gg_get_local_struct_type_decl(const char *type_name, int count, ...);
-extern tree gg_get_filelevel_struct_type_decl(const char *type_name, int count, ...);
-extern tree gg_get_filelevel_union_type_decl(const char *type_name, int count, ...);
-extern tree gg_define_local_struct(const char *type_name, const char * var_name, int count ,...);
+extern tree gg_get_structure_type_decl(const char *type_name, ...);
+extern void gg_structure_type_constructor(tree record_decl, ...);
+
extern tree gg_find_field_in_struct(const tree var_decl, const char *field_name);
extern tree gg_struct_field_ref(const tree struct_decl, const char *field);
-extern tree gg_assign_to_structure(tree var_decl_struct, const char *field, const tree source);
-extern tree gg_assign_to_structure(tree var_decl_struct, const char *field, int N);
-// Generalized variable declareres. This don't create storage
+// Generalized variable declarer. This doesn't create storage
extern tree gg_declare_variable(tree type_decl,
const char *name=NULL,
tree initial_value=NULL_TREE,
extern tree gg_define_uchar_star(tree var);
extern tree gg_define_uchar_star(const char *variable_name, tree var);
-// address_of operator; equivalent of C "&buffer"
+// address_of operator; equivalent of C "&var_decl"
extern tree gg_get_address_of(const tree var_decl);
+// equivalent of C "&array[0]"
+extern tree gg_pointer_to_array(tree array);
+
// Array creation and access:
extern tree gg_define_array(tree type_decl, size_t size);
tree var_decl_rdigits; // int __gg__rdigits;
tree var_decl_unique_prog_id; // size_t __gg__unique_prog_id;
-tree var_decl_entry_location; // This is for managing ENTRY statements
tree var_decl_exit_address; // This is for implementing pseudo_return_pop
tree var_decl_call_parameter_signature; // char *__gg__call_parameter_signature
tree var_decl_call_parameter_count; // int __gg__call_parameter_count
tree var_decl_call_parameter_lengths; // size_t *__gg__call_parameter_count
-tree var_decl_return_code; // short __gg__data_return_code
-
tree var_decl_arithmetic_rounds_size; // size_t __gg__arithmetic_rounds_size;
tree var_decl_arithmetic_rounds; // int* __gg__arithmetic_rounds;
tree var_decl_fourplet_flags_size; // size_t __gg__fourplet_flags_size;
// Indicates which routine main() called
tree var_decl_main_called; // int __gg__main_called;
-// Indicates the target label for an ENTRY statement
-tree var_decl_entry_label; // void* __gg__entry_label
+// Indicates the target index of an ENTRY statement
+tree var_decl_entry_index; // void* __gg__entry_index
#if 0
#define REFER(a)
extern tree var_decl_rdigits; // int __gg__rdigits;
extern tree var_decl_unique_prog_id; // size_t __gg__unique_prog_id;
-extern tree var_decl_entry_location; // This is for managing ENTRY statements
extern tree var_decl_exit_address; // This is for implementing pseudo_return_pop
extern tree var_decl_call_parameter_signature; // char *__gg__call_parameter_signature
extern tree var_decl_call_parameter_count; // int __gg__call_parameter_count
extern tree var_decl_call_parameter_lengths; // size_t *var_decl_call_parameter_lengths
-extern tree var_decl_return_code; // short __gg__data_return_code
-
extern tree var_decl_arithmetic_rounds_size; // size_t __gg__arithmetic_rounds_size;
extern tree var_decl_arithmetic_rounds; // int* __gg__arithmetic_rounds;
extern tree var_decl_fourplet_flags_size; // size_t __gg__fourplet_flags_size;
extern tree var_decl_treeplet_4s; // SIZE_T_P , "__gg__treeplet_4s"
extern tree var_decl_nop; // int __gg__nop
extern tree var_decl_main_called; // int __gg__main_called
-extern tree var_decl_entry_label; // void* __gg__entry_label
+extern tree var_decl_entry_index; // void* __gg__entry_index
int get_scaled_rdigits(cbl_field_t *field);
int get_scaled_digits(cbl_field_t *field);
}
if( !current.new_program(@name, LblProgram, name,
$program_as.data,
- $attr.common, $attr.initial) ) {
+ $attr.common,
+ $attr.initial,
+ $attr.recursive) ) {
auto L = symbol_program(current_program_index(), name);
assert(L);
error_msg(@name, "PROGRAM-ID %s already defined on line %d",
symbol_table_init();
}
if( !current.new_program(@NAME, LblFunction, $NAME,
- $program_as.data,
- $attr.common, $attr.initial) ) {
+ $program_as.data,
+ $attr.common,
+ $attr.initial,
+ $attr.recursive) ) {
auto e = symbol_function(current_program_index(), $NAME);
auto L = cbl_label_of(e);
error_msg(@NAME, "FUNCTION %s already defined on line %d",
error_msg(@2, "%s may not REDEFINE %s",
field->name, orig->name);
}
- cbl_field_t *super = symbol_redefines(orig);
- if( super ) {
- error_msg(@2, "%s may not REDEFINE %s, "
- "which redefines %s",
- field->name, orig->name, super->name);
- }
- if( field->level != orig->level ) {
+ // Resolve chained REDEFINES:
+ // treat "C REDEFINES B"
+ // with "B REDEFINES A"
+ // as "C" redefining the same storage as "A".
+
+ cbl_field_t *root = symbol_redefines_root(orig);
+ if( field->level != root->level ) {
error_msg(@2, "cannot redefine %s %s as %s %s "
"because they have different levels",
- orig->level_str(), name_of(orig),
+ root->level_str(), name_of(root),
field->level_str(), name_of(field));
}
// ISO 13.18.44.3
- auto parent( symbol_index(e) );
- auto p = std::find_if( symbol_elem_of(orig) + 1,
+ auto parent( symbol_index(symbol_elem_of(root)) );
+ auto p = std::find_if( symbol_elem_of(root) + 1,
symbol_elem_of(field),
[parent, level = field->level]( const auto& elem ) {
if( elem.type == SymField ) {
auto mid( cbl_field_of(p) );
error_msg(@2, "cannot redefine %s %s as %s %s "
"because %s %s intervenes",
- orig->level_str(), name_of(orig),
+ root->level_str(), name_of(root),
field->level_str(), name_of(field),
mid->level_str(), name_of(mid));
}
- if( valid_redefine(@2, field, orig) ) {
+ if( valid_redefine(@2, field, root) ) {
/*
* Defer "inheriting" the parent's description until the
* redefine is complete.
*/
- current_field()->parent = symbol_index(e);
+ current_field()->parent = symbol_index(symbol_elem_of(root));
}
}
;
{
cbl_refer_t& tgt( target.refer );
if( ! valid_target(tgt) ) return false;
-
+#if 0
+ if( field_index(target.refer.field) == return_code_register() ) return true;
+#endif
// Rule 1 c: is valid for VALUE, REPLACING, or DEFAULT
// If no VALUE (category none), set to blank/zero.
if( value_category == data_category_none && replacements.empty() ) {
bool new_program ( const YYLTYPE& loc, cbl_label_type_t type,
const char name[], const char os_name[],
- bool common, bool initial )
+ bool common, bool initial, bool recursive )
{
size_t parent = programs.empty()? 0 : programs.top().program_index;
cbl_label_t label = {};
label.line = yylineno;
label.common = common;
label.initial = initial;
+ label.recursive = recursive;
label.os_name = os_name;
if( !namcpy(loc, label.name, name) ) { gcc_unreachable(); }
bool fOK = symbol_at(programs.top().program_index) + 1 == symbols_end();
assert(fOK);
- if( (L = symbol_program_local(name)) != NULL ) {
+ auto extant = symbol_program_local(name);
+ if( extant && extant != L ) {
error_msg(loc, "program '%s' already defined on line %d",
- L->name, L->line);
+ extant->name, extant->line);
return false;
}
options_paragraph = cbl_options_t();
first_statement = 0;
+ if( programs.size() == 1 ) {
+ symbol_registers_add();
+ }
+
return fOK;
}
* whether to indicate a refmod to the parser with an LPAREN token, or not,
* with a '(' token. The input is known to have a first line that begins with
* '('., includes ':', and ends with ')'.
+ *
+ * Single forward pass: track paren depth, require exactly one ':' at depth 1,
+ * skip quoted regions (doubled quote is escape). Allows arithmetic and
+ * parentheses in the left part, e.g. ((LENGTH OF x/2) - (y/2)) : 1.
*/
static bool
is_refmod( const char input[], const char enput[] ) {
- if( input == enput ) return false;
-
- switch(*input) {
- case '(':
- input = std::find( ++input, enput, ')');
- if( input == enput ) return false;
- return is_refmod(++input, enput);
- case ':':
- return is_refmod(++input, enput);
- case ')':
- if( ++input == enput ) return true;
- return is_refmod(input, enput);
- default:
- if( ISSPACE(*input) ) {
- input = std::find_if( ++input, enput,
- []( char ch ) {
- return ! ISSPACE(ch);
- } );
- return is_refmod(input, enput);
- }
- break;
- }
- input = std::find_if( input, enput,
- [start = *input]( char ch ) {
- bool yes = false;
- if( ISDIGIT(start) ) {
- switch(ch) {
- case '+': case '-': case '*': case '/':
- yes = true; break;
- case '.': case ',':
- yes = true; break;
- default:
- yes = ISDIGIT(ch);
- break;
- }
- } else {
- assert(ISALNUM(start));
- switch(ch) {
- case '-':
- yes = true; break;
- default:
- yes = ISALNUM(ch);
- break;
- }
- }
- return !yes;
- } );
- return is_refmod(input, enput);
+ if( input == enput || *input != '(' ) return false;
+ int depth = 0;
+ bool colon_at_depth1 = false;
+ const char *p = input;
+
+ while( p < enput ) {
+ char ch = *p++;
+ if( ch == '"' || ch == '\'' ) {
+ /* Skip quoted region; doubled quote is escape. */
+ const char quote = ch;
+ while( p < enput ) {
+ ch = *p++;
+ if( ch == quote ) {
+ if( p < enput && *p == quote ) { p++; continue; }
+ break;
+ }
+ }
+ continue;
+ }
+ if( ch == '(' ) {
+ depth++;
+ continue;
+ }
+ if( ch == ')' ) {
+ depth--;
+ if( depth < 0 ) return false;
+ if( depth == 0 ) return colon_at_depth1;
+ continue;
+ }
+ if( ch == ':' && depth == 1 ) {
+ if( colon_at_depth1 ) return false;
+ colon_at_depth1 = true;
+ continue;
+ }
+ }
+ return false;
}
int alphabet; // Same as cbl_field_t::codeset::language
} cblc_field_t;
*/
- tree retval = NULL_TREE;
- retval = gg_get_filelevel_struct_type_decl( "cblc_field_t",
- 17,
- UCHAR_P, "data",
- SIZE_T, "capacity",
- SIZE_T, "allocated",
- SIZE_T, "offset",
- CHAR_P, "name",
- CHAR_P, "picture",
- CHAR_P, "initial",
- CHAR_P, "parent",
- SIZE_T, "occurs_lower",
- SIZE_T, "occurs_upper",
- ULONGLONG, "attr",
- SCHAR, "type",
- SCHAR, "level",
- SCHAR, "digits",
- SCHAR, "rdigits",
- INT, "encoding",
- INT, "alphabet");
- retval = TREE_TYPE(retval);
-
+ tree retval = gg_get_structure_type_decl("cblc_field_t",
+ UCHAR_P, "data",
+ SIZE_T, "capacity",
+ SIZE_T, "allocated",
+ SIZE_T, "offset",
+ CHAR_P, "name",
+ CHAR_P, "picture",
+ CHAR_P, "initial",
+ CHAR_P, "parent",
+ SIZE_T, "occurs_lower",
+ SIZE_T, "occurs_upper",
+ ULONGLONG, "attr",
+ SCHAR, "type",
+ SCHAR, "level",
+ SCHAR, "digits",
+ SCHAR, "rdigits",
+ INT, "encoding",
+ INT, "alphabet",
+ NULL_TREE);
return retval;
}
int dummy // We need an even number of INT
} cblc_file_t;
*/
-
- tree retval = NULL_TREE;
- retval = gg_get_filelevel_struct_type_decl( "cblc_file_t",
- 33,
- CHAR_P, "name",
- ULONGLONG, "symbol_table_index",
- CHAR_P, "filename",
- FILE_P, "file_pointer",
- cblc_field_p_type_node, "default_record",
- SIZE_T, "record_area_min",
- SIZE_T, "record_area_max",
- build_pointer_type(cblc_field_p_type_node), "keys",
- build_pointer_type(INT),"key_numbers",
- build_pointer_type(INT),"uniques",
- cblc_field_p_type_node, "password",
- cblc_field_p_type_node, "status",
- cblc_field_p_type_node, "user_status",
- cblc_field_p_type_node, "vsam_status",
- cblc_field_p_type_node, "record_length",
- VOID_P, "supplemental",
- VOID_P, "implementation",
- SIZE_T, "reserve",
- LONG, "prior_read_location",
- INT, "org",
- INT, "access",
- INT, "mode_char",
- INT, "errnum",
- INT, "io_status",
- INT, "padding",
- UINT, "delimiter",
- INT, "stride",
- INT, "flags",
- UINT, "recent_char",
- INT, "recent_key",
- INT, "prior_op",
- INT, "encoding", // Actually cbl_encoding_t
- INT, "alphabet",
- INT, "dummy");
- retval = TREE_TYPE(retval);
+ tree retval = gg_get_structure_type_decl("cblc_file_t",
+ CHAR_P, "name",
+ ULONGLONG, "symbol_table_index",
+ CHAR_P, "filename",
+ FILE_P, "file_pointer",
+ cblc_field_p_type_node, "default_record",
+ SIZE_T, "record_area_min",
+ SIZE_T, "record_area_max",
+ build_pointer_type(cblc_field_p_type_node), "keys",
+ build_pointer_type(INT),"key_numbers",
+ build_pointer_type(INT),"uniques",
+ cblc_field_p_type_node, "password",
+ cblc_field_p_type_node, "status",
+ cblc_field_p_type_node, "user_status",
+ cblc_field_p_type_node, "vsam_status",
+ cblc_field_p_type_node, "record_length",
+ VOID_P, "supplemental",
+ VOID_P, "implementation",
+ SIZE_T, "reserve",
+ LONG, "prior_read_location",
+ INT, "org",
+ INT, "access",
+ INT, "mode_char",
+ INT, "errnum",
+ INT, "io_status",
+ INT, "padding",
+ UINT, "delimiter",
+ INT, "stride",
+ INT, "flags",
+ UINT, "recent_char",
+ INT, "recent_key",
+ INT, "prior_op",
+ INT, "encoding", // Actually cbl_encoding_t
+ INT, "alphabet",
+ NULL_TREE);
return retval;
}
size_t capacity, nelem;
size_t first_program, procedures;
struct registers_t {
- size_t file_status, linage_counter, return_code,
+ size_t file_status, linage_counter,
exception_condition, very_true, very_false;
registers_t() {
- file_status = linage_counter = return_code =
+ file_status = linage_counter =
exception_condition = very_true = very_false = 0;
}
} registers;
static char decimal_point = '.';
size_t file_status_register() { return symbols.registers.file_status; }
-size_t return_code_register() { return symbols.registers.return_code; }
size_t very_true_register() { return symbols.registers.very_true; }
size_t very_false_register() { return symbols.registers.very_false; }
size_t ec_register() { return symbols.registers.exception_condition; }
+size_t return_code_register() {
+ // Every top-level program has a global return-code register.
+ auto iprog = current_program_index();
+ static const char name[] = "RETURN-CODE";
+ auto found = symbol_find( iprog, std::list<const char*>(1, name) );
+ gcc_assert(found.second);
+ return symbol_index(found.first);
+}
+
cbl_refer_t *
cbl_refer_t::empty() {
static cbl_refer_t empty;
return NULL;
}
+cbl_field_t *
+symbol_redefines_root( const struct cbl_field_t *field ) {
+ cbl_field_t *root = const_cast<cbl_field_t *>(field);
+ cbl_field_t *r;
+ while( (r = symbol_redefines(root)) != NULL )
+ root = r;
+ return root;
+}
+
static cbl_field_t *
symbol_explicitly_redefines( const cbl_field_t *field ) {
auto f = symbol_redefines(field);
}
}
- if( ! field->codeset.consistent() ) {
- if( ! field->codeset.valid() ) {
- switch(field->type) {
- case FldForward:
- case FldInvalid:
- gcc_unreachable();
- case FldAlphaEdited:
- case FldAlphanumeric:
- case FldDisplay:
- case FldGroup:
- case FldLiteralA:
- case FldLiteralN:
- case FldNumericDisplay:
- case FldNumericEdited:
+ // This test is a little too broad, but avoids a special attribute bit for
+ // things like the XML registers. The tests are only internal checks anyway.
+ if( ! (is_numeric(field) ||
+ field->has_attr(register_e) ||
+ field->has_attr(global_e)) ) {
+ if( ! field->codeset.consistent() ) {
+ if( ! field->codeset.valid() ) {
+ switch(field->type) {
+ case FldForward:
+ case FldInvalid:
+ gcc_unreachable();
+ case FldAlphaEdited:
+ case FldAlphanumeric:
+ case FldDisplay:
+ case FldGroup:
+ case FldLiteralA:
+ case FldLiteralN:
+ case FldNumericDisplay:
+ case FldNumericEdited:
+ if( ! (field->has_attr(register_e) || field->has_attr(hex_encoded_e)) ) {
+ error_msg(symbol_field_location(field_index(field)),
+ "internal: %qs encoding not defined", field->name);
+ }
+ break;
+ case FldClass:
+ case FldConditional:
+ case FldFloat:
+ case FldIndex:
+ case FldNumericBin5:
+ case FldNumericBinary:
+ case FldPacked:
+ case FldPointer:
+ case FldSwitch:
+ break;
+ }
+ } else {
if( ! (field->has_attr(register_e) || field->has_attr(hex_encoded_e)) ) {
error_msg(symbol_field_location(field_index(field)),
- "internal: %qs encoding not defined", field->name);
+ "internal: %qs encoding %qs inconsistent",
+ field->name,
+ cbl_alphabet_t::encoding_str(field->codeset.encoding) );
}
- break;
- case FldClass:
- case FldConditional:
- case FldFloat:
- case FldIndex:
- case FldNumericBin5:
- case FldNumericBinary:
- case FldPacked:
- case FldPointer:
- case FldSwitch:
- break;
- }
- } else {
- if( ! (field->has_attr(register_e) || field->has_attr(hex_encoded_e)) ) {
- error_msg(symbol_field_location(field_index(field)),
- "internal: %qs encoding %qs inconsistent",
- field->name,
- cbl_alphabet_t::encoding_str(field->codeset.encoding) );
}
}
}
-
assert( ! field->is_typedef() );
if( parsed_ok ) parser_symbol_add(field);
const struct symbol_elem_t *first = symbols.elems + symbols.first_program;
for( ; field->parent == 0 && e >= first; e-- ) {
+ if( e->type == SymDataSection ) {
+ return NULL; // parent cannot be in another section
+ }
if( ! (e->type == SymField && cbl_field_of(e)->level > 0) ) {
continue; // level 0 fields are not user-declared symbols
}
{1,1,0,0, "\"\0\xFF"}, 0, "QUOTES", cp1252 },
{ FldPointer, constq | register_e ,
{8,8,0,0, zeroes_for_null_pointer}, 0, "NULLS", cp1252 },
- // IBM defines TALLY
- // 01 TALLY GLOBAL PICTURE 9(5) USAGE BINARY VALUE ZERO.
- { FldNumericBin5, signable_e | register_e,
- {16, 16, MAX_FIXED_POINT_DIGITS, 0, NULL}, 0, "_TALLY", cp1252 },
// 01 ARGI is the current index into the argv array
{ FldNumericBin5, signable_e | register_e,
{16, 16, MAX_FIXED_POINT_DIGITS, 0, NULL}, 0, "_ARGI", cp1252 },
static cbl_field_t special_registers[] = {
{ FldNumericDisplay, register_e, {2,2,2,0, NULL}, 0, "_FILE_STATUS", cp1252 },
{ FldNumericBin5, register_e, {2,2,4,0, NULL}, 0, "UPSI-0", cp1252 },
- { FldNumericBin5, signable_e|register_e, {2,2,4,0, NULL}, 0, "RETURN-CODE", cp1252 },
- { FldNumericBin5, register_e, {2,2,4,0, NULL}, 0, "LINAGE-COUNTER", cp1252 },
+ { FldNumericBin5, global_e, {2,2,4,0, NULL}, 0, "LINAGE-COUNTER", cp1252 },
{ FldLiteralA, register_e, {0,0,0,0, "/dev/stdin"}, 0, "_dev_stdin", cp1252 },
{ FldLiteralA, constq|register_e, {0,0,0,0, "/dev/stdout"}, 0, "_dev_stdout", cp1252 },
{ FldLiteralA, constq|register_e, {0,0,0,0, "/dev/stderr"}, 0, "_dev_stderr", cp1252 },
table.nelem = p - table.elems;
assert(table.nelem < table.capacity);
- const static auto reg_based_any = cbl_field_attr_t(register_e | based_e | any_length_e);
- // xml registers
- static cbl_field_t xml_registers[] = {
- { FldNumericBin5, register_e, {4,4,9,0, "0"}, 1, "XML-CODE", cp1252 },
- { FldAlphanumeric, register_e, {30,30,0,0, " "}, 1, "XML-EVENT", cp1252 },
- { FldNumericBin5, register_e, {4,4,9,0, "0"}, 1, "XML-INFORMATION", cp1252 },
- { FldAlphanumeric, reg_based_any, {1,1,0,0, nullptr}, 1, "XML-NAMESPACE", cp1252 },
- { FldAlphanumeric, reg_based_any, {1,1,0,0, nullptr}, 1, "XML-NNAMESPACE", cp1252 },
- { FldAlphanumeric, reg_based_any, {1,1,0,0, nullptr}, 1, "XML-NAMESPACE-PREFIX", cp1252 },
- { FldAlphanumeric, reg_based_any, {1,1,0,0, nullptr}, 1, "XML-NNAMESPACE-PREFIX", cp1252 },
- { FldAlphanumeric, reg_based_any, {1,1,0,0, nullptr}, 1, "XML-TEXT", cp1252 },
- { FldAlphanumeric, reg_based_any, {1,1,0,0, nullptr}, 1, "XML-NTEXT", cp1252 },
- }, * const eoxml = xml_registers + COUNT_OF(xml_registers);
-
- assert(table.nelem + COUNT_OF(xml_registers) < table.capacity);
-
- p = table.elems + table.nelem;
- p = std::transform(xml_registers, eoxml, p, elementize);
- table.nelem = p - table.elems;
- assert(table.nelem < table.capacity);
-
// Add any CDF values defined on the command line.
// After symbols are ready, the CDF adds them directly.
const std::list<cbl_field_t> cdf_values = cdf_literalize();
symbols.registers.linage_counter = symbol_index(symbol_field(0,0,
"LINAGE-COUNTER"));
symbols.registers.file_status = symbol_index(symbol_field(0,0, "_FILE_STATUS"));
- symbols.registers.return_code = symbol_index(symbol_field(0,0, "RETURN-CODE"));
symbols.registers.very_true = symbol_index(symbol_field(0,0, "_VERY_TRUE"));
symbols.registers.very_false = symbol_index(symbol_field(0,0, "_VERY_FALSE"));
}
return e;
}
+void
+symbol_registers_add() {
+ /*
+ * awk -F\\t '$5 == "X" {print $1 "\t" $7}' r
+ * IBM per-program "registers" are really implied working storage data items
+ * for top-level programs.
+ */
+ const static cbl_field_t::codeset_t cp1252(CP1252_e);
+ const static auto based_any = cbl_field_attr_t(global_e | based_e | any_length_e);
+ const static auto glosig = cbl_field_attr_t(global_e | signable_e);
+ // The data.initial of these fields is used verbatim by parser_symbol_add.
+ const static char zero[4] = {0};
+ static char spc[160] = " ";
+
+ if( spc[1] != 0x20 ) {
+ std::fill( spc, spc + sizeof(spc), 0x20 );
+ }
+
+ /* In the following table, the FldNumericBin5 initial values are strings with
+ NUL characters in them. That's because this table bypasses the encode_numeric
+ function and the values are passed directly to parser_symbol_add(), which
+ for FldNumericBin5 expects the non-null .initial value to be exactly the
+ memory representation of the run-time variable. */
+
+ static const cbl_field_t ibm_registers[] = {
+#if COBOL_JSON_READY
+ { FldNumericBin5, glosig, {4,4,5,0, zero }, 1, "JSON-CODE", cp1252 },
+ { FldNumericBin5, glosig, {4,4,5,0, zero }, 1, "JSON-STATUS", cp1252 },
+#endif
+ { FldNumericBin5, glosig, {2,2,4,0, zero }, 0, "RETURN-CODE", cp1252 },
+ { FldAlphanumeric, glosig, {160,160,0,0, spc }, 1, "SORT-CONTROL", cp1252 },
+ { FldNumericBin5, glosig, {4,4,5,0, zero }, 1, "SORT-CORE-SIZE", cp1252 },
+ { FldNumericBin5, glosig, {4,4,5,0, zero }, 1, "SORT-FILE-SIZE", cp1252 },
+ { FldAlphanumeric, global_e, {8,8,0,0, spc }, 1, "SORT-MESSAGE", cp1252 },
+ { FldNumericBin5, glosig, {4,4,5,0, zero }, 1, "SORT-MODE-SIZE", cp1252 },
+ { FldNumericBin5, glosig, {4,4,5,0, zero }, 1, "SORT-RETURN", cp1252 },
+ // 01 TALLY GLOBAL PICTURE 9(5) USAGE BINARY VALUE ZERO.
+ { FldNumericBin5, global_e, {4,4,5,0, zero }, 1, "_TALLY", cp1252 },
+ { FldAlphanumeric, global_e, {16,16,0,0, spc }, 1, "WHEN-COMPILED", cp1252 },
+ // xml registers
+ { FldNumericBin5, glosig, {4,4,9,0, zero }, 1, "XML-CODE", cp1252 },
+ { FldAlphanumeric, global_e, {30,30,0,0, spc }, 1, "XML-EVENT", cp1252 },
+ { FldNumericBin5, glosig, {4,4,9,0, zero }, 1, "XML-INFORMATION", cp1252 },
+ { FldAlphanumeric, based_any, {1,1,0,0, nullptr }, 1, "XML-NAMESPACE", cp1252 },
+ { FldAlphanumeric, based_any, {1,1,0,0, nullptr }, 1, "XML-NNAMESPACE", cp1252 },
+ { FldAlphanumeric, based_any, {1,1,0,0, nullptr }, 1, "XML-NAMESPACE-PREFIX", cp1252 },
+ { FldAlphanumeric, based_any, {1,1,0,0, nullptr }, 1, "XML-NNAMESPACE-PREFIX", cp1252 },
+ { FldAlphanumeric, based_any, {1,1,0,0, nullptr }, 1, "XML-TEXT", cp1252 },
+ { FldAlphanumeric, based_any, {1,1,0,0, nullptr }, 1, "XML-NTEXT", cp1252 },
+ };
+
+ size_t program = symbols.nelem - 1;
+ auto e = symbol_at(program);
+ const cbl_label_t *L = cbl_label_of(e);
+ assert(L->type == LblProgram || L->type == LblFunction);
+
+ for( auto field : ibm_registers ) {
+ auto elem = elementize(field);
+ elem.program = program;
+ update_symbol_map2( symbol_append(elem) );
+ }
+}
+
cbl_label_t *
cbl_perform_tgt_t::finally( size_t program ) {
assert(0 < ito);
if( erc == size_t(-1) ) {
if( outbytesleft == 0 ) { // input doesn't fit
gcc_assert(0 < inbytesleft);
- gcc_assert(0 < level);
if( loc.first_line == 0 )
loc = symbol_field_location(field_index(this));
if( type == FldNumericEdited ) {
struct cbl_proc_addresses_t top;
struct cbl_proc_addresses_t exit;
struct cbl_proc_addresses_t bottom;
- tree alter_location; // The altered value if this paragraph is the target of an ALTER
+
+ // The following members implement the return location for a PERFORM to this
+ // procedure. The dispatch_switch_label is where the switch() statement for
+ // this procedure is found; the dispatch_switch_goto is how you get there.
+ // The switch statement itself is made up of GOTO statements built from the
+ // label_decls found in pseudo_return_decls.
+ tree dispatch_switch_goto;
+ tree dispatch_switch_label;
+ std::vector<tree> pseudo_return_decls;
+
+ // The following members do the analogous process for a paragraph that is
+ // the target of an ALTER statement
+ tree alter_switch_goto;
+ tree alter_switch_label;
+ tree no_alter_goto;
+ tree no_alter_label;
+ std::vector<tree> alter_decls;
+ tree alter_index; // The integer index to the switch statement
};
struct cbl_label_addresses_t {
symbol_elem_t * symbols_begin( size_t first = 0 );
symbol_elem_t * symbols_end(void);
cbl_field_t * symbol_redefines( const cbl_field_t *field );
+cbl_field_t * symbol_redefines_root( const cbl_field_t *field );
void build_symbol_map();
bool update_symbol_map( symbol_elem_t *e );
symbol_elem_t * symbol_section_add( size_t program,
cbl_section_t *section );
+void symbol_registers_add();
+
void symbol_field_location( size_t ifield, const YYLTYPE& loc );
YYLTYPE symbol_field_location( size_t ifield );
cbl_field_t *new_alphanumeric(const cbl_name_t name=nullptr,
cbl_encoding_t encoding=no_encoding_e );
+
+// ENABLE_HIJACKING allows for code generation to be "hijacked" when the
+// program-id is "dubner" or "hijack". See the mainline code in genapi.cc.
+
+// To enable hijacking, use
+//
+// make ... CPPFLAGS=-DENABLE_HIJACKING
+//
+// taking care to recaptulate whatever CPPFLAGS were set when configure was
+// run.
+
#endif
data = build_real(float128_type_node, value);
// Turn that back into a REAL_VALUE_TYPE with
// REAL_VALUE_TYPE readback_value = TREE_REAL_CST(data.etc.value);
-
-#define FOR_JIM 0
-#if FOR_JIM
- {
- // When you know data.etc.value was created with build_real()
- enum tree_code code = TREE_CODE(TREE_TYPE(data.etc.value));
- // code will be REAL_TYPE
-
- REAL_VALUE_TYPE readback_value = TREE_REAL_CST(data.etc.value);
- char ach[48];
- size_t number_of_digits = 33;
- bool crop_trailing_zeroes = true;
- real_to_decimal(ach,
- &readback_value,
- sizeof(ach),
- number_of_digits,
- crop_trailing_zeroes);
- fprintf(stderr, "FOR_JIM: %s real_value: %s\n", get_tree_code_name(code), ach);
- }
-#endif
-
unsigned char *retval =
static_cast<unsigned char *>(xmalloc(data.capacity()));
assert(retval);
data = wide_int_to_tree(intTI_type_node, value);
// turn that back into a FIXED_WIDE_INT with
// wi::tree_to_wide_ref iii = wi::to_wide( data.etc.value );
-
-#if FOR_JIM
- {
- // When you know data.etc.value was created with wide_int_to_tree.
- enum tree_code code = TREE_CODE(TREE_TYPE(data.etc.value));
- // code will be INTEGER_TYPE
-
- wi::tree_to_wide_ref iii = wi::to_wide( data.etc.value );
- char ach[60];
- print_dec(iii, ach, SIGNED);
- fprintf(stderr, "FOR_JIM: %s fixed_value: %s\n", get_tree_code_name(code), ach);
- }
-#endif
-
if( data.capacity() == 0 )
{
// It falls to us to establish these parameters:
constants.cc \
gfileio.cc \
gmath.cc \
+ inspect.cc \
intrinsic.cc \
io.cc \
libgcobol.cc \
am__dirstamp = $(am__leading_dot)dirstamp
@BUILD_LIBGCOBOL_TRUE@am_libgcobol_la_OBJECTS = charmaps.lo \
@BUILD_LIBGCOBOL_TRUE@ constants.lo gfileio.lo gmath.lo \
-@BUILD_LIBGCOBOL_TRUE@ intrinsic.lo io.lo libgcobol.lo \
-@BUILD_LIBGCOBOL_TRUE@ posix/shim/errno.lo \
+@BUILD_LIBGCOBOL_TRUE@ inspect.lo intrinsic.lo io.lo \
+@BUILD_LIBGCOBOL_TRUE@ libgcobol.lo posix/shim/errno.lo \
@BUILD_LIBGCOBOL_TRUE@ posix/shim/localtime.lo \
@BUILD_LIBGCOBOL_TRUE@ posix/shim/open.lo posix/shim/stat.lo \
@BUILD_LIBGCOBOL_TRUE@ stringbin.lo valconv.lo xmlparse.lo
@BUILD_LIBGCOBOL_TRUE@ constants.cc \
@BUILD_LIBGCOBOL_TRUE@ gfileio.cc \
@BUILD_LIBGCOBOL_TRUE@ gmath.cc \
+@BUILD_LIBGCOBOL_TRUE@ inspect.cc \
@BUILD_LIBGCOBOL_TRUE@ intrinsic.cc \
@BUILD_LIBGCOBOL_TRUE@ io.cc \
@BUILD_LIBGCOBOL_TRUE@ libgcobol.cc \
@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/constants.Plo@am__quote@
@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/gfileio.Plo@am__quote@
@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/gmath.Plo@am__quote@
+@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/inspect.Plo@am__quote@
@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/intrinsic.Plo@am__quote@
@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/io.Plo@am__quote@
@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/libgcobol.Plo@am__quote@
extern unsigned char __gg__data_high_values[1] ;
extern unsigned char __gg__data_quotes[1] ;
extern unsigned char __gg__data_upsi_0[2] ;
-extern short __gg__data_return_code ;
// These are the various hardcoded tables used for conversions.
extern const unsigned short __gg__one_to_one_values[256];
};
-unsigned char __gg__data_linage_counter[2] = {0,0};
-struct cblc_field_t __ggsr___14_linage_counter6 = {
- .data = __gg__data_linage_counter ,
- .capacity = 2 ,
- .allocated = 2 ,
- .offset = 0 ,
- .name = "LINAGE-COUNTER" ,
- .picture = "" ,
- .initial = "" ,
- .parent = NULL,
- .occurs_lower = 0 ,
- .occurs_upper = 0 ,
- .attr = 0x0 ,
- .type = FldNumericBin5 ,
- .level = 0 ,
- .digits = 4 ,
- .rdigits = 0 ,
- .encoding = iconv_CP1252_e ,
- .alphabet = 0 ,
- };
-
-
unsigned char __gg__data_upsi_0[2] = {0,0};
struct cblc_field_t __ggsr__upsi_0 = {
.data = __gg__data_upsi_0 ,
.alphabet = 0 ,
};
-short __gg__data_return_code = 0;
-struct cblc_field_t __ggsr__return_code = {
- .data = (unsigned char *)&__gg__data_return_code ,
- .capacity = 2 ,
- .allocated = 2 ,
- .offset = 0 ,
- .name = "RETURN-CODE" ,
- .picture = "" ,
- .initial = "" ,
- .parent = NULL,
- .occurs_lower = 0 ,
- .occurs_upper = 0 ,
- .attr = signable_e ,
- .type = FldNumericBin5 ,
- .level = 0 ,
- .digits = 4 ,
- .rdigits = 0 ,
- .encoding = iconv_CP1252_e ,
- .alphabet = 0 ,
- };
-
unsigned char __gg___data_dev_stdin[] = "/dev/stdin";
struct cblc_field_t __ggsr___dev_stdin = {
.data = __gg___data_dev_stdin ,
.alphabet = 0 ,
};
-unsigned char __gg__data_tally[] = {0,0};
-struct cblc_field_t __ggsr___tally = {
- .data = __gg__data_tally ,
- .capacity = 4 ,
- .allocated = 4 ,
- .offset = 0 ,
- .name = "_TALLY" ,
- .picture = "" ,
- .initial = "" ,
- .parent = NULL,
- .occurs_lower = 0 ,
- .occurs_upper = 0 ,
- .attr = global_e ,
- .type = FldNumericBin5 ,
- .level = 0 ,
- .digits = 5 ,
- .rdigits = 0 ,
- .encoding = iconv_CP1252_e ,
- .alphabet = 0 ,
- };
-
unsigned char __gg__data_argi[] = {0,0};
struct cblc_field_t __ggsr__argi = {
.data = __gg__data_argi ,
.alphabet = 0 ,
};
-/*
- * Special registers used by the XML parser
- */
-// XML-CODE PICTURE S9(9) USAGE BINARY VALUE ZERO *> status of XML event
-static int __gg__data_xml_code = 0;
-struct cblc_field_t __ggsr__xml_code = {
- .data = reinterpret_cast<unsigned char*>(&__gg__data_xml_code),
- .capacity = 4 ,
- .allocated = 4 ,
- .offset = 0 ,
- .name = "XML-CODE" ,
- .picture = "" ,
- .initial = "" ,
- .parent = NULL,
- .occurs_lower = 0 ,
- .occurs_upper = 0 ,
- .attr = register_e,
- .type = FldNumericBin5 ,
- .level = 0 ,
- .digits = 9 ,
- .rdigits = 0 ,
- .encoding = iconv_CP1252_e ,
- .alphabet = 0 ,
- };
-
-// XML-EVENT PICTURE X(30) USAGE DISPLAY VALUE SPACE *> name of XML event
-static unsigned char __gg__data_xml_event[30];
-struct cblc_field_t __ggsr__xml_event = {
- .data = __gg__data_xml_event,
- .capacity = 30 ,
- .allocated = 30 ,
- .offset = 0 ,
- .name = "XML-EVENT" ,
- .picture = "" ,
- .initial = NULL,
- .parent = NULL,
- .occurs_lower = 0 ,
- .occurs_upper = 0 ,
- .attr = register_e ,
- .type = FldAlphanumeric ,
- .level = 0 ,
- .digits = 0 ,
- .rdigits = 0 ,
- .encoding = iconv_CP1252_e ,
- .alphabet = 0 ,
- };
-
-// XML-INFORMATION PICTURE S9(9) USAGE BINARY VALUE ZERO
-static int __gg__data_xml_information = 0;
-struct cblc_field_t __ggsr__xml_information = {
- .data = reinterpret_cast<unsigned char*>(&__gg__data_xml_information),
- .capacity = 4 ,
- .allocated = 4 ,
- .offset = 0 ,
- .name = "XML-INFORMATION" ,
- .picture = "" ,
- .initial = "" ,
- .parent = NULL,
- .occurs_lower = 0 ,
- .occurs_upper = 0 ,
- .attr = register_e,
- .type = FldNumericBin5 ,
- .level = 0 ,
- .digits = 9 ,
- .rdigits = 0 ,
- .encoding = iconv_CP1252_e ,
- .alphabet = 0 ,
- };
-
-// XML-NAMESPACE Variable-length based alphanumeric item
-struct cblc_field_t __ggsr__xml_namespace = {
- .data = nullptr ,
- .capacity = 1 ,
- .allocated = 1 ,
- .offset = 0 ,
- .name = "XML-NAMESPACE" ,
- .picture = "" ,
- .initial = "" ,
- .parent = NULL,
- .occurs_lower = 0 ,
- .occurs_upper = 0 ,
- .attr = register_e | based_e | any_length_e,
- .type = FldAlphanumeric ,
- .level = 0 ,
- .digits = 0 ,
- .rdigits = 0 ,
- .encoding = iconv_CP1252_e ,
- .alphabet = 0 ,
- };
-
-// XML-NNAMESPACE Variable-length national item
-struct cblc_field_t __ggsr__xml_nnamespace = {
- .data = nullptr ,
- .capacity = 1 ,
- .allocated = 1 ,
- .offset = 0 ,
- .name = "XML-NNAMESPACE" ,
- .picture = "" ,
- .initial = "" ,
- .parent = NULL,
- .occurs_lower = 0 ,
- .occurs_upper = 0 ,
- .attr = register_e | based_e | any_length_e,
- .type = FldAlphanumeric ,
- .level = 0 ,
- .digits = 0 ,
- .rdigits = 0 ,
- .encoding = iconv_CP1252_e ,
- .alphabet = 0 ,
- };
-
-// XML-NAMESPACE-PREFIX Variable-length based alphanumeric item
-struct cblc_field_t __ggsr__xml_namespace_prefix = {
- .data = nullptr ,
- .capacity = 1 ,
- .allocated = 1 ,
- .offset = 0 ,
- .name = "XML-NAMESPACE-PREFIX" ,
- .picture = "" ,
- .initial = "" ,
- .parent = NULL,
- .occurs_lower = 0 ,
- .occurs_upper = 0 ,
- .attr = register_e | based_e | any_length_e,
- .type = FldAlphanumeric ,
- .level = 0 ,
- .digits = 0 ,
- .rdigits = 0 ,
- .encoding = iconv_CP1252_e ,
- .alphabet = 0 ,
- };
-
-// XML-NNAMESPACE_PREFIX Variable-length national item
-struct cblc_field_t __ggsr__xml_nnamespace_prefix = {
- .data = nullptr ,
- .capacity = 1 ,
- .allocated = 1 ,
- .offset = 0 ,
- .name = "XML-NNAMESPACE-PREFIX" ,
- .picture = "" ,
- .initial = "" ,
- .parent = NULL,
- .occurs_lower = 0 ,
- .occurs_upper = 0 ,
- .attr = register_e | based_e | any_length_e,
- .type = FldAlphanumeric ,
- .level = 0 ,
- .digits = 0 ,
- .rdigits = 0 ,
- .encoding = iconv_CP1252_e ,
- .alphabet = 0 ,
- };
-
-// XML-TEXT Variable-length based alphanumeric item
-struct cblc_field_t __ggsr__xml_text = {
- .data = nullptr ,
- .capacity = 1 ,
- .allocated = 1 ,
- .offset = 0 ,
- .name = "XML-TEXT" ,
- .picture = "" ,
- .initial = "" ,
- .parent = NULL,
- .occurs_lower = 0 ,
- .occurs_upper = 0 ,
- .attr = register_e | based_e | any_length_e ,
- .type = FldAlphanumeric ,
- .level = 0 ,
- .digits = 0 ,
- .rdigits = 0 ,
- .encoding = iconv_CP1252_e ,
- .alphabet = 0 ,
- };
-
-// XML-NTEXT Variable-length national item
-struct cblc_field_t __ggsr__xml_ntext = {
- .data = nullptr ,
- .capacity = 1 ,
- .allocated = 1 ,
- .offset = 0 ,
- .name = "XML-NTEXT" ,
- .picture = "" ,
- .initial = "" ,
- .parent = NULL,
- .occurs_lower = 0 ,
- .occurs_upper = 0 ,
- .attr = register_e | based_e | any_length_e,
- .type = FldAlphanumeric ,
- .level = 0 ,
- .digits = 0 ,
- .rdigits = 0 ,
- .encoding = iconv_CP1252_e ,
- .alphabet = 0 ,
- };
/* The following defines storage for the global DEBUG-ITEM:
cblc_file_prior_op_t prior_op; // run-time type is INT
cbl_encoding_t encoding; // We assume size int
int alphabet; // Actually cbl_encoding_t
- int dummy;
} cblc_file_t;
--- /dev/null
+/*
+ * Copyright (c) 2021-2026 Symas Corporation
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are
+ * met:
+ *
+ * * Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above
+ * * Redistributions in binary form must reproduce the above
+ * copyright notice, this list of conditions and the following disclaimer
+ * in the documentation and/or other materials provided with the
+ * distribution.
+ * * Neither the name of the Symas Corporation nor the names of its
+ * contributors may be used to endorse or promote products derived from
+ * this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+#include <algorithm>
+#include <cctype>
+#include <cstdio>
+#include <cstdlib>
+#include <cstring>
+#include <ctime>
+#include <set>
+#include <stack>
+#include <string>
+#include <unordered_map>
+#include <vector>
+#include <cwctype>
+
+#include <dirent.h>
+#include <dlfcn.h>
+#include <err.h>
+#include <fcntl.h>
+#include <fenv.h>
+#include <math.h> // required for fpclassify(3), not in cmath
+#include <setjmp.h>
+#include <signal.h>
+#include <syslog.h>
+#include <unistd.h>
+#include <stdarg.h>
+#if __has_include(<errno.h>)
+# include <errno.h> // for program_invocation_short_name
+#endif
+#include <langinfo.h>
+
+#include "config.h"
+#include "libgcobol-fp.h"
+
+#include "ec.h"
+#include "common-defs.h"
+#include "io.h"
+#include "gcobolio.h"
+#include "libgcobol.h"
+#include "gfileio.h"
+#include "charmaps.h"
+#include "valconv.h"
+#include <sys/mman.h>
+#include <sys/resource.h>
+#include <sys/stat.h>
+#include <sys/types.h>
+#include <sys/time.h>
+#include <execinfo.h>
+#include "exceptl.h"
+#include "stringbin.h"
+
+#define NO_RDIGITS (0)
+
+typedef std::vector<cbl_char_t>::const_iterator char_it_c ;
+typedef std::vector<cbl_char_t>::iterator char_it ;
+
+static const char *
+funky_find( const char *piece,
+ const char *piece_end,
+ const char *whole,
+ const char *whole_end )
+ {
+ const char *retval = NULL;
+
+ size_t length_of_piece = piece_end - piece;
+ if(length_of_piece == 0)
+ {
+ __gg__abort("funky_find() length_of_piece shouldn't be zero");
+ }
+
+ whole_end -= length_of_piece;
+
+ while( whole <= whole_end )
+ {
+ if( memcmp( piece, whole, length_of_piece) == 0 )
+ {
+ retval = whole;
+ break;
+ }
+ whole += 1;
+ }
+ return retval;
+ }
+
+static char_it_c
+funky_find_wide( char_it_c needle,
+ char_it_c needle_end, // Actually end+1
+ char_it_c haystack,
+ char_it_c haystack_end, // Actually end+1
+ char_it_c notfound)
+ {
+ // We are looking for the needle in the haystack
+
+ char_it_c retval = notfound;
+
+ size_t length_of_piece = needle_end - needle;
+ if(length_of_piece == 0)
+ {
+ __gg__abort("funky_find_wide() length_of_piece shouldn't be zero");
+ }
+
+ haystack_end -= length_of_piece;
+
+ while( haystack <= haystack_end )
+ {
+ // Compare the memory at needle to the memory at haystack
+ if( memcmp( &(*needle),
+ &(*haystack),
+ length_of_piece*sizeof(cbl_char_t)) == 0 )
+ {
+ // They are the same; return where needle was found
+ retval = haystack;
+ break;
+ }
+ // Not found; move to the next location in the haystach
+ haystack += 1;
+ }
+ return retval;
+ }
+
+static const char *
+funky_find_backward(const char *piece,
+ const char *piece_end,
+ const char *whole,
+ const char *whole_end )
+ {
+ const char *retval = NULL;
+
+ size_t length_of_piece = piece_end - piece;
+ if(length_of_piece == 0)
+ {
+ __gg__abort("funky_find_backward() length_of_piece shouldn't be zero");
+ }
+
+ whole_end -= length_of_piece;
+
+ while( whole <= whole_end )
+ {
+ if( memcmp( piece, whole_end, length_of_piece) == 0 )
+ {
+ retval = whole_end;
+ break;
+ }
+ whole_end -= 1;
+ }
+ return retval;
+ }
+
+static char_it_c
+funky_find_wide_backward( char_it_c needle,
+ char_it_c needle_end, // Actually end+1
+ char_it_c haystack,
+ char_it_c haystack_end, // Actually end+1
+ char_it_c notfound)
+ {
+ // We are looking for the needle in the haystack
+
+ char_it_c retval = notfound;
+
+ size_t length_of_piece = needle_end - needle;
+ if(length_of_piece == 0)
+ {
+ __gg__abort("funky_find_wide_backward() length_of_piece shouldn't be zero");
+ }
+
+ haystack_end -= length_of_piece;
+
+ while( haystack <= haystack_end )
+ {
+ if( memcmp( &(*needle),
+ &(*haystack_end),
+ length_of_piece*sizeof(cbl_char_t)) == 0 )
+ {
+ // They are the same; return where needle was found
+ retval = haystack_end;
+ break;
+ }
+ // Not found; move to the next location in the haystack
+ haystack_end -= 1;
+ }
+ return retval;
+ }
+
+typedef struct normalized_operand
+ {
+ // These are the characters of the string. When the field is NumericDisplay
+ // any leading or trailing +/- characters are removed, and any embedded
+ // minus bits are removed.
+
+ // In order for INSPECT to handle things like UTF-8, which often has
+ // multi-byte codepoints, and UTF-16, which sometimes has multi-pair
+ // codepoints we are going to convert everything to UTF-32 for internal
+ // calculations and searches.
+ std::string the_characters;
+ std::vector<cbl_char_t>the_vectorxxxx;
+
+ // offset and length are maintained in characters, not bytes
+ size_t offset; // Usually zero. Increased by one for leading separate sign.
+ size_t length; // Usually the same as the original. But it is one less
+ // // than the original when there is a trailing separate sign.
+ } normalized_operand;
+
+typedef struct comparand
+ {
+ size_t id_2_index;
+ cbl_inspect_bound_t operation;
+ normalized_operand identifier_3; // The thing to be found
+ normalized_operand identifier_5; // The replacement, for FORMAT 2
+ const char *alpha; // The start location within normalized_id_1
+ const char *omega; // The end+1 location within normalized_id_1
+ char_it_c alpha_it; // The start location within normalized_id_1
+ char_it_c omega_it; // The end+1 location within normalized_id_1
+ size_t leading_count;
+ bool leading;
+ bool first;
+ } comparand;
+
+typedef struct comparand_sbc
+ {
+ size_t id_2_index;
+ cbl_inspect_bound_t operation;
+ std::string identifier_3; // The thing to be found
+//q std::string identifier_5; // The replacement, for FORMAT 2
+ size_t alpha; // The start location within normalized_id_1
+ size_t omega; // The end+1 location within normalized_id_1
+ size_t leading_count;
+ bool leading;
+ bool first;
+ } comparand_sbc;
+
+typedef struct id_2_result
+ {
+ cblc_field_t *id2;
+ size_t id2_o;
+ size_t id2_s;
+ size_t result;
+ } id_2_result;
+
+static normalized_operand
+normalize_id( const cblc_field_t *field,
+ size_t field_o,
+ size_t field_s,
+ cbl_encoding_t encoding )
+ {
+ normalized_operand retval;
+
+ if( field )
+ {
+ charmap_t *charmap = __gg__get_charmap(encoding);
+
+ // This is the old-style byte-based assumption
+ const unsigned char *data = field->data + field_o;
+ cbl_figconst_t figconst
+ = (cbl_figconst_t)(field->attr & FIGCONST_MASK);
+
+ retval.offset = 0;
+ retval.length = field_s;
+
+ if( field->type == FldNumericDisplay )
+ {
+ // The value is NumericDisplay.
+ if( field->attr & separate_e )
+ {
+ // Because the sign is a separate plus or minus, the length
+ // gets reduced by one:
+ retval.length = field_s - 1;
+ if( field->attr & leading_e )
+ {
+ // Because the sign character is LEADING, we increase the
+ // offset by one
+ retval.offset = 1;
+ }
+ }
+ for( size_t i=retval.offset; i<retval.length; i+=1 )
+ {
+ // Because we are dealing with a NumericDisplay that might have
+ // the minus bit turned on, we will to mask it off as we copy the
+ // input characters over to retval:
+ retval.the_characters += charmap->set_digit_negative(data[i], false);
+ }
+ }
+ else
+ {
+ // We are set up to create the_characters;
+ if( figconst == normal_value_e )
+ {
+ for( size_t i=retval.offset; i<retval.length; i+=1 )
+ {
+ retval.the_characters += data[i];
+ }
+ }
+ else
+ {
+ char ch = charmap->figconst_character(figconst);
+ for( size_t i=retval.offset; i<retval.length; i+=1 )
+ {
+ retval.the_characters += ch;
+ }
+ }
+ }
+ }
+ else
+ {
+ // There is no field, so leave the_characters empty.
+ retval.offset = 0;
+ retval.length = 0;
+ }
+
+ if( field )
+ {
+ cbl_encoding_t source_encoding = field->encoding;
+ const charmap_t *charmap_source = __gg__get_charmap(source_encoding);
+ charmap_t *charmap = __gg__get_charmap(encoding);
+ int stride = charmap->stride();
+
+ const unsigned char *data = field->data + field_o;
+ cbl_figconst_t figconst = (cbl_figconst_t)(field->attr & FIGCONST_MASK);
+ if( figconst == normal_value_e )
+ {
+ retval.offset = 0;
+ retval.length = field_s / stride;
+
+ if( field->type == FldNumericDisplay )
+ {
+ // The value is NumericDisplay, so we might need to adjust the offset
+ // and length:
+ if( field->attr & separate_e )
+ {
+ // Because the sign is a separate plus or minus, the length
+ // gets reduced by one:
+ retval.length = field_s - 1;
+ if( field->attr & leading_e )
+ {
+ // Because the sign character is LEADING, we increase the
+ // offset by one
+ retval.offset = 1;
+ }
+ }
+ }
+ // We are ready to convert from the input to UTF32
+ size_t converted_characters;
+ const char *converted = __gg__iconverter(source_encoding,
+ DEFAULT_32_ENCODING,
+ data+retval.offset * stride,
+ retval.length * stride,
+ &converted_characters);
+ // We are ready to copy the characters over:
+ for( size_t i=0; i<converted_characters; i+=width_of_utf32 )
+ {
+ // Because we are dealing with a NumericDisplay that might have
+ // the minus bit turned on, we will to mask it off as we copy the
+ // input characters over to retval:
+ cbl_char_t ch = charmap->getch(converted, i);
+ if( field->type == FldNumericDisplay )
+ {
+ if( charmap_source->is_like_ebcdic() )
+ {
+ // In EBCDIC, a flagged negative digit 0xF0 through 0xF9 becomes
+ // 0xD0 through 0xD9. Those represent the characters
+ // "}JKLMNOPQR", which, now that we are in UTF32 space, don't have
+ // the right bit pattern to be fixed with set_digit_negative().
+ // So, we fix it separately with this table: Note that location
+ // 0x7D, which is ASCII '{', becomes 0x30 '0'. See also that
+ // locations 0x4A through 0x52 become 0x31 through 0x39.
+ static const uint8_t fixit[256] =
+ {
+ 0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, 0x80, 0x09, 0x0a, 0x0b, 0x0c, 0x0d, 0x0e, 0x0f,
+ 0x10, 0x11, 0x12, 0x13, 0x14, 0x15, 0x16, 0x17, 0x81, 0x19, 0x1a, 0x1b, 0x1c, 0x1d, 0x1e, 0x1f,
+ 0x20, 0x21, 0x22, 0x23, 0x24, 0x25, 0x26, 0x27, 0x82, 0x29, 0x2a, 0x2b, 0x2c, 0x2d, 0x2e, 0x2f,
+ 0x30, 0x31, 0x32, 0x33, 0x34, 0x35, 0x36, 0x37, 0x83, 0x39, 0x3a, 0x3b, 0x3c, 0x3d, 0x3e, 0x3f,
+ 0x40, 0x41, 0x42, 0x43, 0x44, 0x45, 0x46, 0x47, 0x84, 0x49, 0x31, 0x32, 0x33, 0x34, 0x35, 0x36,
+ 0x37, 0x38, 0x39, 0x53, 0x54, 0x55, 0x56, 0x57, 0x85, 0x59, 0x5a, 0x5b, 0x5c, 0x5d, 0x5e, 0x5f,
+ 0x60, 0x61, 0x62, 0x63, 0x64, 0x65, 0x66, 0x67, 0x86, 0x69, 0x6a, 0x6b, 0x6c, 0x6d, 0x6e, 0x6f,
+ 0x70, 0x71, 0x72, 0x73, 0x74, 0x75, 0x76, 0x77, 0x87, 0x79, 0x7a, 0x7b, 0x7c, 0x30, 0x7e, 0x7f,
+ 0x80, 0x81, 0x82, 0x83, 0x84, 0x85, 0x86, 0x87, 0x88, 0x89, 0x8a, 0x8b, 0x8c, 0x8d, 0x8e, 0x8f,
+ 0x90, 0x91, 0x92, 0x93, 0x94, 0x95, 0x96, 0x97, 0x89, 0x99, 0x9a, 0x9b, 0x9c, 0x9d, 0x9e, 0x9f,
+ 0xa0, 0xa1, 0xa2, 0xa3, 0xa4, 0xa5, 0xa6, 0xa7, 0x8a, 0xa9, 0xaa, 0xab, 0xac, 0xad, 0xae, 0xaf,
+ 0xb0, 0xb1, 0xb2, 0xb3, 0xb4, 0xb5, 0xb6, 0xb7, 0x8b, 0xb9, 0xba, 0xbb, 0xbc, 0xbd, 0xbe, 0xbf,
+ 0xc0, 0xc1, 0xc2, 0xc3, 0xc4, 0xc5, 0xc6, 0xc7, 0x8c, 0xc9, 0xca, 0xcb, 0xcc, 0xcd, 0xce, 0xcf,
+ 0xd0, 0xd1, 0xd2, 0xd3, 0xd4, 0xd5, 0xd6, 0xd7, 0x8d, 0xd9, 0xda, 0xdb, 0xdc, 0xdd, 0xde, 0xdf,
+ 0xe0, 0xe1, 0xe2, 0xe3, 0xe4, 0xe5, 0xe6, 0xe7, 0x8e, 0xe9, 0xea, 0xeb, 0xec, 0xed, 0xee, 0xef,
+ 0xf0, 0xf1, 0xf2, 0xf3, 0xf4, 0xf5, 0xf6, 0xf7, 0x8f, 0xf9, 0xfa, 0xfb, 0xfc, 0xfd, 0xfe, 0xff,
+ };
+ ch = fixit[ch & 0xFF];
+ }
+ else
+ {
+ ch = charmap->set_digit_negative(ch, false);
+ }
+ }
+ retval.the_vectorxxxx.push_back(ch);
+ }
+ }
+ else
+ {
+ // We need to fill the field with a figurative constant:
+ // We are set up to create the_characters;
+ charmap_t *charmap32 = __gg__get_charmap(DEFAULT_32_ENCODING);
+ char ch = charmap32->figconst_character(figconst);
+ for( size_t i=retval.offset; i<retval.length; i+=1 )
+ {
+ retval.the_characters += ch;
+ retval.the_vectorxxxx.push_back(ch);
+ }
+ }
+ }
+ else
+ {
+ // There is no field, so leave the_characters empty.
+ retval.offset = 0;
+ retval.length = 0;
+ }
+
+ return retval;
+ }
+
+static std::string
+normalize_id_sbc( const cblc_field_t *field,
+ size_t field_o,
+ size_t field_s,
+ cbl_encoding_t encoding )
+ {
+ // We know that the field is ASCII or EBCDIC
+ std::string retval;
+
+ if( field && field_s )
+ {
+ charmap_t *charmap = __gg__get_charmap(encoding);
+
+ const unsigned char *data = field->data + field_o;
+ cbl_figconst_t figconst
+ = (cbl_figconst_t)(field->attr & FIGCONST_MASK);
+
+ if( field->type == FldNumericDisplay )
+ {
+ // The value is NumericDisplay.
+ if( field->attr & separate_e )
+ {
+ // Because the sign is a separate plus or minus, the length
+ // gets reduced by one:
+ field_s -= 1;
+ if( field->attr & leading_e )
+ {
+ // Because the sign character is LEADING, we increase the
+ // offset by one
+ data += 1;
+ }
+ }
+ // At this point, the bytes start at data, and there are field_s of them.
+ retval.assign(reinterpret_cast<const char *>(data), field_s);
+ if( field->attr & signable_e )
+ {
+ if( field->attr & leading_e )
+ {
+ // The sign might be in the first byte; get rid of it
+ retval[0] = charmap->set_digit_negative(data[0], false);
+ }
+ else
+ {
+ // The sign might be in the last byte; get rid of it
+ retval[0] = charmap->set_digit_negative(data[field_s-1], false);
+ }
+ }
+ }
+ else
+ {
+ // We aren't dealing with numeric-display, so
+ if( figconst == normal_value_e )
+ {
+ retval.assign(reinterpret_cast<const char *>(data), field_s);
+ }
+ else
+ {
+ // This field is flagged as figconst
+ char ch = charmap->figconst_character(figconst);
+ retval.assign(field_s, ch);
+ }
+ }
+ }
+ else
+ {
+ // There is no field, so leave retval empty
+ }
+
+ return retval;
+ }
+
+static void
+match_lengths( normalized_operand &id_target,
+ const normalized_operand &id_source)
+ {
+ // This routine gets called when id_source is a figurative constant and
+ // we need the target to be the same length as the source
+
+ char ch = id_target.the_characters[0];
+ id_target.the_characters.clear();
+ for(size_t i=0; i<id_source.length; i++)
+ {
+ id_target.the_characters += ch;
+ }
+
+ cbl_char_t wch = id_target.the_vectorxxxx[0];
+ id_target.the_vectorxxxx.clear();
+ for(size_t i=0; i<id_source.length; i++)
+ {
+ id_target.the_vectorxxxx.push_back(wch);
+ }
+ id_target.length = id_source.length;
+ }
+
+static void
+the_alpha_and_omega(const normalized_operand &id_before,
+ const normalized_operand &id_after,
+ const char * &alpha,
+ const char * &omega,
+ char_it_c &alpha_it,
+ char_it_c &omega_it,
+ char_it_c notfound)
+ {
+ /* The 2023 ISO description of the AFTER and BEFORE phrases of the INSPECT
+ statement is, in a word, garbled.
+
+ IBM's COBOL for Linux 1.2 is a little better, but still a bit confusing
+ because the description for AFTER neglects to specifically state that
+ the scan starts one character to the right of the *first* occurrence of
+ the AFTER value.
+
+ Micro Focus 9.2.5 has the advantage of being ungarbled, succinct, and
+ unambiguous.
+
+ The BEFORE phrase modifies the character position to use as the rightmost
+ position in source for the corresponding comparison operation. Comparisons
+ in source occur only to the left of the first occurrence of delimiter. If
+ delimiter is not present in source, then the comparison proceeds as if
+ there were no BEFORE phrase.
+
+ The AFTER phrase modifies the character position to use as the leftmost
+ position in source for the corresponding comparison operation. Comparisons
+ in source occur only to the right of the first occurrence of delimiter.
+ This character position is the one immediately to the right of the
+ rightmost character of the delimiter found. If delimiter is not found in
+ source, the INSPECT statement has no effect (no tallying or replacement
+ occurs).
+
+ "xyzxyzAFTERxyzxyzxyzxyzBEFORExyzxyzAFTERxyzxyz"
+ ^ ^
+ | |
+ | |-- omega
+ ----------------alpha
+ */
+
+ if( id_before.length )
+ {
+ // This is the BEFORE delimiter. We look for the first occurrence of that
+ // delimiter starting at the left of id_1
+
+ const char *start = id_before.the_characters.c_str();
+ const char *end = start + id_before.length;
+ const char *found = funky_find(start, end, alpha, omega);
+ if( found )
+ {
+ // We found id_before within alpha/omega, so reduce omega
+ // to the found location.
+ omega = found;
+ // If not found, we just leave omega alone.
+ }
+
+ char_it_c omega_found = funky_find_wide(id_before.the_vectorxxxx.begin(),
+ id_before.the_vectorxxxx.end(),
+ alpha_it,
+ omega_it,
+ notfound );
+ if( omega_found != notfound )
+ {
+ // We found id_before within alpha/omega, so reduce omega
+ // to the found location.
+ omega_it = omega_found;
+ }
+ }
+
+ if( id_after.length )
+ {
+ // This is the AFTER delimiter. We look for the first occurrence of that
+ // delimiter in id_1
+
+ const char *start = id_after.the_characters.c_str();
+ const char *end = start + id_after.length;
+ const char *found = funky_find(start, end, alpha, omega);
+ if( found )
+ {
+ // We found id_after in the alpha/omega segment. We update alpha
+ // be the character after the id_after substring.
+ alpha = found + (end-start);
+ }
+ else
+ {
+ // We didn't find the id_after string, so we set the alpha to be
+ // omega. That means that no tally or replace operation will take
+ // because no characters will qualify.
+ alpha = omega;
+ }
+
+ char_it_c omega_found = funky_find_wide(id_after.the_vectorxxxx.begin(),
+ id_after.the_vectorxxxx.end(),
+ alpha_it,
+ omega_it,
+ notfound );
+ if( omega_found != notfound)
+ {
+ // We found id_after in the alpha/omega segment. We update alpha
+ // be the character after the id_after substring.
+ alpha_it = omega_found + (end-start);
+ }
+ else
+ {
+ // We didn't find the id_after string, so we set the alpha to be
+ // omega. That means that no tally or replace operation will take
+ // because no characters will qualify.
+ alpha_it = omega_it;
+ }
+ }
+ }
+
+static void
+the_alpha_and_omega_sbc(const std::string &id_before,
+ const std::string &id_after,
+ const std::string &haystack,
+ size_t &alpha,
+ size_t &omega)
+ {
+ /* The 2023 ISO description of the AFTER and BEFORE phrases of the INSPECT
+ statement is, in a word, garbled.
+
+ IBM's COBOL for Linux 1.2 is a little better, but still a bit confusing
+ because the description for AFTER neglects to specifically state that
+ the scan starts one character to the right of the *first* occurrence of
+ the AFTER value.
+
+ Micro Focus 9.2.5 has the advantage of being ungarbled, succinct, and
+ unambiguous.
+
+ The BEFORE phrase modifies the character position to use as the rightmost
+ position in source for the corresponding comparison operation. Comparisons
+ in source occur only to the left of the first occurrence of delimiter. If
+ delimiter is not present in source, then the comparison proceeds as if
+ there were no BEFORE phrase.
+
+ The AFTER phrase modifies the character position to use as the leftmost
+ position in source for the corresponding comparison operation. Comparisons
+ in source occur only to the right of the first occurrence of delimiter.
+ This character position is the one immediately to the right of the
+ rightmost character of the delimiter found. If delimiter is not found in
+ source, the INSPECT statement has no effect (no tallying or replacement
+ occurs).
+
+ "xyzxyzAFTERxyzxyzxyzxyzBEFORExyzxyzAFTERxyzxyz"
+ ^ ^
+ | |
+ | |-- omega
+ ----------------alpha
+ */
+
+ if( id_before.length() )
+ {
+ // Look for BEFORE in the haystack.
+ omega = haystack.find(id_before);
+ if( omega == std::string::npos )
+ {
+ // If BEFORE isn't found, we use the whole haystack.
+ omega = haystack.length();
+ }
+ }
+ else
+ {
+ omega = haystack.length();
+ }
+
+ if( id_after.length() )
+ {
+ // This is the AFTER delimiter. We look for the first occurrence of that
+ // delimiter in id_1 that occurs to the left of BEFORE/omega
+
+ alpha = haystack.substr(0, omega).find(id_after);
+ if( alpha == std::string::npos )
+ {
+ // If there is no AFTER to the left of omega, then we can't find anything
+ // in this haystack.
+ alpha = haystack.length();
+ }
+ else
+ {
+ alpha += id_after.length();
+ }
+ }
+ else
+ {
+ alpha = 0;
+ }
+ }
+
+static void
+the_alpha_and_omega_backward( const normalized_operand &id_before,
+ const normalized_operand &id_after,
+ const char * &alpha,
+ const char * &omega,
+ char_it_c &alpha_it,
+ char_it_c &omega_it,
+ char_it_c notfound)
+ {
+ /* Like the_alpha_and_omega(), but for handling BACKWARD.
+
+ "xyzxyzBEFORExyzxyzAFTERxyzxyzxyzxyzBEFORExyzxyzAFTERxyzxyz"
+ ^ ^
+ | |
+ | -- omega
+ |--------alpha
+ */
+
+ const char *id_1 = alpha;
+ const char *id_1_end = omega;
+
+ if( id_before.length )
+ {
+ // This is the BEFORE delimiter. We look for the first occurrence of it
+ // from the right end of id_1
+
+ const char *start = id_before.the_characters.c_str();
+ const char *end = start + id_before.length;
+ const char *found = funky_find_backward(start, end, id_1, id_1_end);
+ if( found )
+ {
+ // We found id_before within id_1, so change alpha to the character just
+ // to the right of BEFORE. Otherwise, we will leave alpha alone, so that
+ // it stays at the beginning of id_1. That's because if you can't find
+ // id_before, it's as if there were no BEFORE phrase.
+ alpha = found + id_before.length;
+ }
+
+ char_it_c omega_found = funky_find_wide_backward(id_before.the_vectorxxxx.begin(),
+ id_before.the_vectorxxxx.end(),
+ alpha_it,
+ omega_it,
+ notfound );
+ if( omega_found != notfound )
+ {
+ // We found id_before within id_1, so change alpha to the character just
+ // to the right of BEFORE. Otherwise, we will leave alpha alone, so that
+ // it stays at the beginning of id_1
+ alpha_it = omega_found + id_before.length;
+ }
+ }
+
+ if( id_after.length )
+ {
+ // This is the AFTER delimiter. We look for the first occurrence in id_1
+
+ const char *start = id_after.the_characters.c_str();
+ const char *end = start + id_after.length;
+ const char *found = funky_find_backward(start, end, alpha, omega);
+ if( found )
+ {
+ // We found id_after in id_1. We update omega to be
+ // at that location.
+ omega = found;
+ }
+ else
+ {
+ // If the AFTER isn't found, we need to adjust things so that nothing
+ // happens.
+ omega = alpha;
+ }
+
+ char_it_c omega_found = funky_find_wide_backward(id_after.the_vectorxxxx.begin(),
+ id_after.the_vectorxxxx.end(),
+ alpha_it,
+ omega_it,
+ notfound );
+ if( omega_found != notfound)
+ {
+ // We found id_after in id_1. We update omega to be
+ // at that location.
+ omega_it = omega_found;
+ }
+ else
+ {
+ // If the AFTER isn't found, we need to adjust things so that nothing
+ // happens.
+ omega_it = alpha_it;
+ }
+ }
+ }
+
+static
+void
+inspect_backward_format_1(const size_t integers[])
+ {
+ size_t int_index = 0;
+ size_t cblc_index = 0;
+
+ // Reference the language specification for the meanings of identifier_X
+
+ // Pick up the number of identifier_2 loops in this INSPECT statement
+ size_t n_identifier_2 = integers[int_index++];
+
+ std::vector<id_2_result> id_2_results(n_identifier_2);
+
+ // Pick up identifier_1, which is the string being inspected
+ const cblc_field_t *id1 = __gg__treeplet_1f[cblc_index];
+ size_t id1_o = __gg__treeplet_1o[cblc_index];
+ size_t id1_s = __gg__treeplet_1s[cblc_index];
+ cblc_index += 1;
+ // normalize it, according to the language specification.
+ normalized_operand normalized_id_1 = normalize_id(id1, id1_o, id1_s, id1->encoding);
+
+ std::vector<comparand> comparands;
+
+ for(size_t i=0; i<n_identifier_2; i++)
+ {
+ // For each identifier_2, we pick up its value:
+
+ id_2_results[i].id2 = __gg__treeplet_1f [cblc_index];
+ id_2_results[i].id2_o = __gg__treeplet_1o[cblc_index];
+ id_2_results[i].id2_s = __gg__treeplet_1s[cblc_index];
+
+ cblc_index += 1;
+ id_2_results[i].result = 0;
+
+ // For each identifier 2, there is a count of operations:
+ size_t nbounds = integers[int_index++];
+
+ for(size_t j=0; j<nbounds; j++ )
+ {
+ // each operation has a bound code:
+ cbl_inspect_bound_t operation
+ = (cbl_inspect_bound_t)integers[int_index++];
+ switch( operation )
+ {
+ case bound_characters_e:
+ {
+ // We are counting characters. There is no identifier-3,
+ // but we we hard-code the length to one to represent a
+ // single character.
+ comparand next_comparand = {};
+ next_comparand.id_2_index = i;
+ next_comparand.operation = operation;
+ next_comparand.identifier_3.length = 1;
+
+ const cblc_field_t *id4_before = __gg__treeplet_1f [cblc_index];
+ size_t id4_before_o = __gg__treeplet_1o[cblc_index];
+ size_t id4_before_s = __gg__treeplet_1s[cblc_index];
+ cblc_index += 1;
+
+ const cblc_field_t *id4_after = __gg__treeplet_1f [cblc_index];
+ size_t id4_after_o = __gg__treeplet_1o[cblc_index];
+ size_t id4_after_s = __gg__treeplet_1s[cblc_index];
+ cblc_index += 1;
+
+ normalized_operand normalized_id_4_before
+ = normalize_id(id4_before, id4_before_o, id4_before_s, id1->encoding);
+
+ normalized_operand normalized_id_4_after
+ = normalize_id(id4_after, id4_after_o, id4_after_s, id1->encoding);
+
+ next_comparand.alpha
+ = normalized_id_1.the_characters.c_str();
+
+ next_comparand.omega
+ = next_comparand.alpha + normalized_id_1.length;
+
+ next_comparand.alpha_it = normalized_id_1.the_vectorxxxx.begin();
+ next_comparand.omega_it = normalized_id_1.the_vectorxxxx.end();
+
+ the_alpha_and_omega_backward(normalized_id_4_before,
+ normalized_id_4_after,
+ next_comparand.alpha,
+ next_comparand.omega,
+ next_comparand.alpha_it,
+ next_comparand.omega_it,
+ normalized_id_1.the_vectorxxxx.end());
+
+ comparands.push_back(next_comparand);
+ break;
+ }
+ default:
+ {
+ // We have some number of identifer-3 values,
+ // each with possible PHRASE1 modifiers.
+ size_t pair_count = integers[int_index++];
+
+ // We need to build up pair_count comparand structures:
+
+ for(size_t k=0; k<pair_count; k++)
+ {
+ comparand next_comparand = {};
+ next_comparand.id_2_index = i;
+ next_comparand.operation = operation;
+
+ const cblc_field_t *id3 = __gg__treeplet_1f[cblc_index];
+ size_t id3_o = __gg__treeplet_1o[cblc_index];
+ size_t id3_s = __gg__treeplet_1s[cblc_index];
+ cblc_index += 1;
+
+ const cblc_field_t *id4_before = __gg__treeplet_1f[cblc_index];
+ size_t id4_before_o = __gg__treeplet_1o[cblc_index];
+ size_t id4_before_s = __gg__treeplet_1s[cblc_index];
+ cblc_index += 1;
+
+ const cblc_field_t *id4_after = __gg__treeplet_1f[cblc_index];
+ size_t id4_after_o = __gg__treeplet_1o[cblc_index];
+ size_t id4_after_s = __gg__treeplet_1s[cblc_index];
+ cblc_index += 1;
+
+ next_comparand.identifier_3
+ = normalize_id(id3, id3_o, id3_s, id1->encoding);
+
+ next_comparand.alpha
+ = normalized_id_1.the_characters.c_str();
+ next_comparand.omega
+ = next_comparand.alpha + normalized_id_1.length;
+
+ normalized_operand normalized_id_4_before
+ = normalize_id(id4_before, id4_before_o, id4_before_s, id1->encoding);
+
+ normalized_operand normalized_id_4_after
+ = normalize_id(id4_after, id4_after_o, id4_after_s, id1->encoding);
+
+ next_comparand.alpha_it = normalized_id_1.the_vectorxxxx.begin();
+ next_comparand.omega_it = normalized_id_1.the_vectorxxxx.end();
+
+ the_alpha_and_omega_backward(normalized_id_4_before,
+ normalized_id_4_after,
+ next_comparand.alpha,
+ next_comparand.omega,
+ next_comparand.alpha_it,
+ next_comparand.omega_it,
+ normalized_id_1.the_vectorxxxx.end());
+
+ next_comparand.leading = true;
+ next_comparand.leading_count = 0;
+ comparands.push_back(next_comparand);
+ }
+ }
+ }
+ }
+ }
+
+ // We are now ready to walk through identifier-1, character by
+ // character, checking each of the comparands for a match:
+
+ // We are now set up to accomplish the data flow described
+ // in the language specification. We loop through the
+ // the character positions in normalized_id_1:
+ char_it_c leftmost = normalized_id_1.the_vectorxxxx.begin();
+ char_it_c rightmost = leftmost + normalized_id_1.length;
+ char_it_c the_end_of_the_world = rightmost;
+
+ while( leftmost < rightmost )
+ {
+ size_t rightmost_delta = 0;
+ rightmost -= 1;
+ // We look at the rightmost position. If that position is within the
+ // alpha-to-omega qualified range, we check all possible matches:
+
+ for(size_t k=0; k<comparands.size(); k++)
+ {
+ if( rightmost < comparands[k].alpha_it )
+ {
+ // This can't be a match, because rightmost is
+ // to the left of the comparand's alpha.
+ continue;
+ }
+ if( rightmost + comparands[k].identifier_3.length >
+ comparands[k].omega_it )
+ {
+ // This can't be a match, because the rightmost
+ // character of the comparand falls to the right
+ // of the comparand's omega
+ continue;
+ }
+ if( rightmost + comparands[k].identifier_3.length >
+ the_end_of_the_world )
+ {
+ // This can't be a match, because the rightmost character of the
+ // comparand falls past the new edge of id_1 established by a prior
+ // match.
+ continue;
+ }
+ // A match is theoretically possible, because all
+ // the characters of the comparand fall between
+ // alpha and omega:
+ bool possible_match = true;
+
+ if( comparands[k].operation != bound_characters_e )
+ {
+ for(size_t m=0; m<comparands[k].identifier_3.length; m++)
+ {
+ if( comparands[k].identifier_3.the_vectorxxxx[m] != rightmost[m] )
+ {
+ possible_match = false;
+ break;
+ }
+ }
+ }
+ if( possible_match )
+ {
+ // The characters of the comparand match the
+ // characters at rightmost.
+ bool match = false;
+ switch( comparands[k].operation )
+ {
+ case bound_first_e:
+ // This can't happen in a FORMAT_1
+ warnx("The compiler goofed: "
+ "INSPECT FORMAT 1 "
+ "shouldn't have "
+ "bound_first_e");
+ abort();
+ break;
+
+ case bound_characters_e:
+ match = 1;
+ break;
+
+ case bound_all_e:
+ {
+ // We have a match.
+ match = true;
+ break;
+ }
+
+ case bound_leading_e:
+ {
+ // We have a match at rightmost. But we need to figure out if this
+ // particular match is valid for LEADING.
+
+ if( comparands[k].leading )
+ {
+ if( rightmost + comparands[k].identifier_3.length
+ == comparands[k].omega_it)
+ {
+ // This means that the match here is just the latest of a
+ // string of LEADING matches that started at .omega
+ comparands[k].leading_count += 1;
+ match = true;
+ comparands[k].omega_it -= comparands[k].identifier_3.length;
+ the_end_of_the_world = rightmost;
+ rightmost_delta = comparands[k].identifier_3.length-1;
+ }
+ }
+ break;
+ }
+
+ case bound_trailing_e:
+ {
+ // We have a match at rightmost.
+ //
+ // We want to know if this is a trailing match. For that to be,
+ // all of the possible matches from here leftward to the alpha have
+ // to be true as well:
+
+ if( (rightmost - comparands[k].alpha_it )
+ % comparands[k].identifier_3.length == 0 )
+ {
+ // The remaining number of characters is correct for a match.
+ // Keep checking.
+
+ // Assume a match until we learn otherwise:
+ match = true;
+ char_it_c local_left = rightmost;
+ local_left -= comparands[k].identifier_3.length;
+ while( local_left >= comparands[k].alpha_it )
+ {
+ for(size_t m=0; m<comparands[k].identifier_3.length; m++)
+ {
+ if( comparands[k].identifier_3.the_vectorxxxx[m]
+ != local_left[m] )
+ {
+ // We have a mismatched character, so no trailing match is
+ // possible
+ match = false;
+ break;
+ }
+ }
+ local_left -= comparands[k].identifier_3.length;
+ }
+ }
+ break;
+ }
+ }
+
+ if( match )
+ {
+ // We have a match at rightmost:
+ // Bump the result counter
+ id_2_results[comparands[k].id_2_index].result += 1;
+
+ // We have a match here at rightmost, so we need to set the end of
+ // the world here
+ the_end_of_the_world = rightmost;
+
+ // Adjust rightmost by the additional characters in a BACKWARD
+ // LEADING search:
+ rightmost -= rightmost_delta;
+ break;
+ }
+ }
+ else
+ {
+ // We are within alpha/omega, but there was no
+ // match, which permanently disqualifies the
+ // possibility of LEADING
+ comparands[k].leading = false;
+ }
+ }
+ }
+
+ // Add our results to the identifier_2 values:
+
+ for(size_t i = 0; i<id_2_results.size(); i++)
+ {
+ int rdigits;
+ __int128 id_2_value
+ = __gg__binary_value_from_qualified_field(&rdigits,
+ id_2_results[i].id2,
+ id_2_results[i].id2_o,
+ id_2_results[i].id2_s);
+ while(rdigits--)
+ {
+ id_2_value /= 10.0;
+ }
+
+ // Accumulate what we've found into it
+ id_2_value += id_2_results[i].result;
+
+ // And put it back:
+ __gg__int128_to_qualified_field(id_2_results[i].id2,
+ id_2_results[i].id2_o,
+ id_2_results[i].id2_s,
+ id_2_value,
+ 0,
+ truncation_e,
+ NULL);
+ }
+ }
+
+extern "C"
+void
+__gg__inspect_format_1(int backward, size_t integers[])
+ {
+ if( backward )
+ {
+ return inspect_backward_format_1(integers);
+ }
+
+ size_t int_index = 0;
+ size_t cblc_index = 0;
+
+ // Reference the language specification for the meanings of identifier_X
+
+ // Pick up the number of identifier_2 loops in this INSPECT statement
+ size_t n_identifier_2 = integers[int_index++];
+
+ std::vector<id_2_result> id_2_results(n_identifier_2);
+
+ // Pick up identifier_1, which is the string being inspected
+ const cblc_field_t *id1 = __gg__treeplet_1f[cblc_index];
+ size_t id1_o = __gg__treeplet_1o[cblc_index];
+ size_t id1_s = __gg__treeplet_1s[cblc_index];
+ cblc_index += 1;
+ // normalize it, according to the language specification.
+ normalized_operand normalized_id_1
+ = normalize_id(id1, id1_o, id1_s, id1->encoding);
+
+ std::vector<comparand> comparands;
+
+ for(size_t i=0; i<n_identifier_2; i++)
+ {
+ // For each identifier_2, we pick up its value:
+
+ id_2_results[i].id2 = __gg__treeplet_1f [cblc_index];
+ id_2_results[i].id2_o = __gg__treeplet_1o[cblc_index];
+ id_2_results[i].id2_s = __gg__treeplet_1s[cblc_index];
+
+ cblc_index += 1;
+ id_2_results[i].result = 0;
+
+ // For each identifier 2, there is a count of operations:
+ size_t nbounds = integers[int_index++];
+
+ for(size_t j=0; j<nbounds; j++ )
+ {
+ // each operation has a bound code:
+ cbl_inspect_bound_t operation
+ = (cbl_inspect_bound_t)integers[int_index++];
+ switch( operation )
+ {
+ case bound_characters_e:
+ {
+ // We are counting characters. There is no identifier-3,
+ // but we we hard-code the length to one to represent a
+ // single character.
+ comparand next_comparand = {};
+ next_comparand.id_2_index = i;
+ next_comparand.operation = operation;
+ next_comparand.identifier_3.length = 1;
+
+ const cblc_field_t *id4_before = __gg__treeplet_1f [cblc_index];
+ size_t id4_before_o = __gg__treeplet_1o[cblc_index];
+ size_t id4_before_s = __gg__treeplet_1s[cblc_index];
+ cblc_index += 1;
+
+ const cblc_field_t *id4_after = __gg__treeplet_1f [cblc_index];
+ size_t id4_after_o = __gg__treeplet_1o[cblc_index];
+ size_t id4_after_s = __gg__treeplet_1s[cblc_index];
+ cblc_index += 1;
+
+ normalized_operand normalized_id_4_before
+ = normalize_id(id4_before, id4_before_o, id4_before_s, id1->encoding);
+
+ normalized_operand normalized_id_4_after
+ = normalize_id(id4_after, id4_after_o, id4_after_s, id1->encoding);
+
+ next_comparand.alpha
+ = normalized_id_1.the_characters.c_str();
+
+ next_comparand.omega
+ = next_comparand.alpha + normalized_id_1.length;
+
+ next_comparand.alpha_it = normalized_id_1.the_vectorxxxx.begin();
+ next_comparand.omega_it = normalized_id_1.the_vectorxxxx.end();
+
+ the_alpha_and_omega(normalized_id_4_before,
+ normalized_id_4_after,
+ next_comparand.alpha,
+ next_comparand.omega,
+ next_comparand.alpha_it,
+ next_comparand.omega_it,
+ normalized_id_1.the_vectorxxxx.end());
+
+ comparands.push_back(next_comparand);
+ break;
+ }
+ default:
+ {
+ // We have some number of identifer-3 values,
+ // each with possible PHRASE1 modifiers.
+ size_t pair_count = integers[int_index++];
+
+ // We need to build up pair_count comparand structures:
+
+ for(size_t k=0; k<pair_count; k++)
+ {
+ comparand next_comparand = {};
+ next_comparand.id_2_index = i;
+ next_comparand.operation = operation;
+
+ const cblc_field_t *id3 = __gg__treeplet_1f[cblc_index];
+ size_t id3_o = __gg__treeplet_1o[cblc_index];
+ size_t id3_s = __gg__treeplet_1s[cblc_index];
+ cblc_index += 1;
+
+ const cblc_field_t *id4_before = __gg__treeplet_1f[cblc_index];
+ size_t id4_before_o = __gg__treeplet_1o[cblc_index];
+ size_t id4_before_s = __gg__treeplet_1s[cblc_index];
+ cblc_index += 1;
+
+ const cblc_field_t *id4_after = __gg__treeplet_1f[cblc_index];
+ size_t id4_after_o = __gg__treeplet_1o[cblc_index];
+ size_t id4_after_s = __gg__treeplet_1s[cblc_index];
+ cblc_index += 1;
+
+ next_comparand.identifier_3
+ = normalize_id(id3,
+ id3_o,
+ id3_s,
+ id1->encoding);
+
+ next_comparand.alpha
+ = normalized_id_1.the_characters.c_str();
+ next_comparand.omega
+ = next_comparand.alpha + normalized_id_1.length;
+
+ next_comparand.alpha_it = normalized_id_1.the_vectorxxxx.begin();
+ next_comparand.omega_it = normalized_id_1.the_vectorxxxx.end();
+
+ normalized_operand normalized_id_4_before
+ = normalize_id(id4_before, id4_before_o, id4_before_s, id1->encoding);
+
+ normalized_operand normalized_id_4_after
+ = normalize_id(id4_after, id4_after_o, id4_after_s, id1->encoding);
+
+ the_alpha_and_omega(normalized_id_4_before,
+ normalized_id_4_after,
+ next_comparand.alpha,
+ next_comparand.omega,
+ next_comparand.alpha_it,
+ next_comparand.omega_it,
+ normalized_id_1.the_vectorxxxx.end());
+
+ next_comparand.leading = true;
+ next_comparand.leading_count = 0;
+ comparands.push_back(next_comparand);
+ }
+ }
+ }
+ }
+ }
+
+ // We are now ready to walk through identifier-1, character by
+ // character, checking each of the comparands for a match:
+
+ // We are now set up to accomplish the data flow described
+ // in the language specification. We loop through the
+ // the character positions in normalized_id_1:
+ char_it_c leftmost = normalized_id_1.the_vectorxxxx.begin();
+ char_it_c rightmost = leftmost + normalized_id_1.length;
+
+ while( leftmost < rightmost )
+ {
+ // For each leftmost position, we check each of the
+ // pairs:
+
+ for(size_t k=0; k<comparands.size(); k++)
+ {
+ if( leftmost < comparands[k].alpha_it )
+ {
+ // This can't be a match, because leftmost is
+ // to the left of the comparand's alpha.
+ continue;
+ }
+ if( leftmost + comparands[k].identifier_3.length > comparands[k].omega_it )
+ {
+ // This can't be a match, because the rightmost
+ // character of the comparand falls to the right
+ // of the comparand's omega
+ continue;
+ }
+ // A match is theoretically possible, because all
+ // the characters of the comparand fall between
+ // alpha and omega:
+ bool possible_match = true;
+
+ if( comparands[k].operation != bound_characters_e )
+ {
+ for(size_t m=0; m<comparands[k].identifier_3.length; m++)
+ {
+ if( comparands[k].identifier_3.the_vectorxxxx[m] != leftmost[m] )
+ {
+ possible_match = false;
+ break;
+ }
+ }
+ }
+ if( possible_match )
+ {
+ // The characters of the comparand match the
+ // characters at leftmost.
+ bool match = false;
+ switch( comparands[k].operation )
+ {
+ case bound_first_e:
+ // This can't happen in a FORMAT_1
+ warnx("The compiler goofed: "
+ "INSPECT FORMAT 1 "
+ "shouldn't have "
+ "bound_first_e");
+ abort();
+ break;
+
+ case bound_characters_e:
+ match = true;
+ break;
+
+ case bound_all_e:
+ {
+ // We have a match.
+ match = true;
+ break;
+ }
+
+ case bound_leading_e:
+ {
+ // We have a match at leftmost. But we need to figure out if this
+ // particular match is valid for LEADING.
+
+ // Hang onto your hat. This is delightfully clever.
+ //
+ // This position is LEADING if:
+ // 1) .leading is still true
+ // 2) leftmost / (length_of_comparand ) = current_count
+ //
+ // I get chills every time I look at that.
+
+ if( comparands[k].leading )
+ {
+ // So far, so good.
+ size_t count = ((leftmost - comparands[k].alpha_it))
+ / comparands[k].identifier_3.length;
+ if( count == comparands[k].leading_count )
+ {
+ // This means that the match here is just the latest of a
+ // string of LEADING matches that started at .alpha
+ comparands[k].leading_count += 1;
+ match = true;
+ }
+ }
+ break;
+ }
+
+ case bound_trailing_e:
+ {
+ // We have a match at leftmost.
+ //
+ // We want to know if this is a trailing match. For that to be,
+ // all of the possible matches from here to the omega have to be
+ // true as well:
+
+ if( (comparands[k].omega_it-leftmost)
+ % comparands[k].identifier_3.length == 0 )
+ {
+ // The remaining number of characters is correct for a match.
+ // Keep checking.
+
+ // Assume a match until we learn otherwise:
+ match = true;
+ char_it_c local_left = leftmost;
+ local_left += comparands[k].identifier_3.length;
+ while( match && local_left < comparands[k].omega_it )
+ {
+ for(size_t m=0; m<comparands[k].identifier_3.length; m++)
+ {
+ if( comparands[k].identifier_3.the_vectorxxxx[m]
+ != local_left[m] )
+ {
+ // We have a mismatched character, so no trailing match is
+ // possible
+ match = false;
+ break;
+ }
+ }
+ local_left += comparands[k].identifier_3.length;
+ }
+ }
+ break;
+ }
+ }
+
+ if( match )
+ {
+ // We have a match at leftmost:
+
+ // Bump the result counter
+ id_2_results[comparands[k].id_2_index].result += 1;
+
+ // Adjust the leftmost pointer to point to
+ // the rightmost character of the matched
+ // string, keeping in mind that it will be
+ // bumped again after we break out of the
+ // k<pair_count loop:
+ leftmost += comparands[k].identifier_3.length - 1;
+ break;
+ }
+ }
+ else
+ {
+ // We are within alpha/omega, but there was no
+ // match, which permanently disqualifies the
+ // possibility of LEADING
+ comparands[k].leading = false;
+ }
+ }
+ leftmost += 1;
+ }
+
+ // Add our results to the identifier_2 values:
+
+ for(size_t i = 0; i<id_2_results.size(); i++)
+ {
+ int rdigits;
+ __int128 id_2_value
+ = __gg__binary_value_from_qualified_field(&rdigits,
+ id_2_results[i].id2,
+ id_2_results[i].id2_o,
+ id_2_results[i].id2_s);
+ while(rdigits--)
+ {
+ id_2_value /= 10.0;
+ }
+
+ // Accumulate what we've found into it
+ id_2_value += id_2_results[i].result;
+
+ // And put it back:
+ __gg__int128_to_qualified_field(id_2_results[i].id2,
+ id_2_results[i].id2_o,
+ id_2_results[i].id2_s,
+ id_2_value,
+ 0,
+ truncation_e,
+ NULL);
+ }
+ }
+
+static
+void
+inspect_backward_format_2(const size_t integers[])
+ {
+ size_t int_index = 0;
+ size_t cblc_index = 0;
+
+ // Reference the language specification for the meanings of identifier_X
+
+ // Pick up identifier_1, which is the string being inspected
+ cblc_field_t *id1 = __gg__treeplet_1f[cblc_index];
+ size_t id1_o = __gg__treeplet_1o[cblc_index];
+ size_t id1_s = __gg__treeplet_1s[cblc_index];
+ cblc_index += 1;
+
+ // normalize it, according to the language specification.
+ normalized_operand normalized_id_1
+ = normalize_id(id1, id1_o, id1_s, id1->encoding);
+
+ std::vector<comparand> comparands;
+
+ // Pick up the count of operations:
+ size_t nbounds = integers[int_index++];
+
+ for(size_t j=0; j<nbounds; j++ )
+ {
+ // each operation has a bound code:
+ cbl_inspect_bound_t operation = (cbl_inspect_bound_t)integers[int_index++];
+ switch( operation )
+ {
+ case bound_characters_e:
+ {
+ comparand next_comparand = {};
+ next_comparand.operation = operation;
+
+ const cblc_field_t *id5 = __gg__treeplet_1f[cblc_index];
+ size_t id5_o = __gg__treeplet_1o[cblc_index];
+ size_t id5_s = __gg__treeplet_1s[cblc_index];
+ cblc_index += 1;
+
+ const cblc_field_t *id4_before = __gg__treeplet_1f[cblc_index];
+ size_t id4_before_o = __gg__treeplet_1o[cblc_index];
+ size_t id4_before_s = __gg__treeplet_1s[cblc_index];
+ cblc_index += 1;
+
+ const cblc_field_t *id4_after = __gg__treeplet_1f [cblc_index];
+ size_t id4_after_o = __gg__treeplet_1o[cblc_index];
+ size_t id4_after_s = __gg__treeplet_1s[cblc_index];
+ cblc_index += 1;
+
+ next_comparand.identifier_5
+ = normalize_id(id5, id5_o, id5_s, id1->encoding);
+ normalized_operand normalized_id_4_before
+ = normalize_id(id4_before, id4_before_o, id4_before_s, id1->encoding);
+ normalized_operand normalized_id_4_after
+ = normalize_id(id4_after, id4_after_o, id4_after_s, id1->encoding);
+
+ // Because this is a CHARACTER operation, the lengths of
+ // identifier-3 and identifier-5 should be one. Let's avoid the
+ // chaos that will otherwise ensue should the lengths *not* be
+ // one.
+ next_comparand.identifier_3.length = 1;
+ next_comparand.identifier_5.length = 1;
+
+ next_comparand.alpha = normalized_id_1.the_characters.c_str();
+ next_comparand.omega
+ = next_comparand.alpha + normalized_id_1.length;
+
+ next_comparand.alpha_it = normalized_id_1.the_vectorxxxx.begin();
+ next_comparand.omega_it = normalized_id_1.the_vectorxxxx.end();
+
+ the_alpha_and_omega_backward(normalized_id_4_before,
+ normalized_id_4_after,
+ next_comparand.alpha,
+ next_comparand.omega,
+ next_comparand.alpha_it,
+ next_comparand.omega_it,
+ normalized_id_1.the_vectorxxxx.end());
+
+
+ comparands.push_back(next_comparand);
+ break;
+ }
+ default:
+ {
+ // We have some number of identifer-3/identifier-5 pairs,
+ // each with possible PHRASE1 modifiers.
+ size_t pair_count = integers[int_index++];
+
+ for(size_t k=0; k<pair_count; k++)
+ {
+ comparand next_comparand = {};
+ next_comparand.operation = operation;
+
+ const cblc_field_t *id3 = __gg__treeplet_1f[cblc_index];
+ size_t id3_o = __gg__treeplet_1o[cblc_index];
+ size_t id3_s = __gg__treeplet_1s[cblc_index];
+ cblc_index += 1;
+
+ const cblc_field_t *id5 = __gg__treeplet_1f[cblc_index];
+ size_t id5_o = __gg__treeplet_1o[cblc_index];
+ size_t id5_s = __gg__treeplet_1s[cblc_index];
+ cblc_index += 1;
+
+ const cblc_field_t *id4_before = __gg__treeplet_1f[cblc_index];
+ size_t id4_before_o = __gg__treeplet_1o[cblc_index];
+ size_t id4_before_s = __gg__treeplet_1s[cblc_index];
+ cblc_index += 1;
+
+ const cblc_field_t *id4_after = __gg__treeplet_1f[cblc_index];
+ size_t id4_after_o = __gg__treeplet_1o[cblc_index];
+ size_t id4_after_s = __gg__treeplet_1s[cblc_index];
+ cblc_index += 1;
+
+ next_comparand.identifier_3 = normalize_id(id3, id3_o, id3_s, id1->encoding);
+ next_comparand.identifier_5 = normalize_id(id5, id5_o, id5_s, id1->encoding);
+
+ // Identifiers 3 and 5 have to be the same length. But
+ // but either, or both, can be figurative constants. If
+ // they are figurative constants, they start off with a
+ // length of one. We will expand figurative constants to
+ // match the length of the other one:
+
+ if( id3->attr & FIGCONST_MASK )
+ {
+ match_lengths( next_comparand.identifier_3,
+ next_comparand.identifier_5);
+ }
+ else if( id5->attr & FIGCONST_MASK )
+ {
+ match_lengths( next_comparand.identifier_5,
+ next_comparand.identifier_3);
+ }
+
+ next_comparand.alpha
+ = normalized_id_1.the_characters.c_str();
+ next_comparand.omega
+ = next_comparand.alpha + normalized_id_1.length;
+
+ normalized_operand normalized_id_4_before
+ = normalize_id(id4_before, id4_before_o, id4_before_s, id1->encoding);
+ normalized_operand normalized_id_4_after
+ = normalize_id(id4_after, id4_after_o, id4_after_s, id1->encoding);
+
+ next_comparand.alpha_it = normalized_id_1.the_vectorxxxx.begin();
+ next_comparand.omega_it = normalized_id_1.the_vectorxxxx.end();
+
+ the_alpha_and_omega_backward(normalized_id_4_before,
+ normalized_id_4_after,
+ next_comparand.alpha,
+ next_comparand.omega,
+ next_comparand.alpha_it,
+ next_comparand.omega_it,
+ normalized_id_1.the_vectorxxxx.end());
+
+ next_comparand.leading = true;
+ next_comparand.leading_count = 0;
+ next_comparand.first = true;
+ comparands.push_back(next_comparand);
+ }
+ }
+ }
+ }
+
+ // We can now look through normalized_id_1 and replace characters:
+
+ char_it_c leftmost = normalized_id_1.the_vectorxxxx.begin();
+ char_it_c rightmost = leftmost + normalized_id_1.length;
+ char_it_c the_end_of_the_world = rightmost;
+
+ while( leftmost < rightmost )
+ {
+ size_t rightmost_delta = 0;
+
+ rightmost -= 1;
+ // We look at the rightmost position. If that position is within the
+ // alpha-to-omega qualified range, we check all possible matches:
+
+ for(size_t k=0; k<comparands.size(); k++)
+ {
+ if( rightmost < comparands[k].alpha_it )
+ {
+ // This can't be a match, because rightmost is
+ // to the left of the comparand's alpha.
+ continue;
+ }
+ if( rightmost + comparands[k].identifier_3.length > comparands[k].omega_it )
+ {
+ // This can't be a match, because the rightmost
+ // character of the comparand falls to the right
+ // of the comparand's omega
+ continue;
+ }
+ if( rightmost + comparands[k].identifier_3.length > the_end_of_the_world )
+ {
+ // This can't be a match, because the rightmost character of the
+ // comparand falls past the new edge of id_1 established by a prior
+ // match.
+ continue;
+ }
+ // A match is theoretically possible, because all
+ // the characters of the comparand fall between
+ // alpha and omega:
+ bool possible_match = true;
+
+ if( comparands[k].operation != bound_characters_e )
+ {
+ for(size_t m=0; m<comparands[k].identifier_3.length; m++)
+ {
+ if( comparands[k].identifier_3.the_vectorxxxx[m] != rightmost[m] )
+ {
+ possible_match = false;
+ break;
+ }
+ }
+ }
+ if( possible_match )
+ {
+ // The characters of the comparand match the
+ // characters at rightmost.
+ bool match = false;
+ switch( comparands[k].operation )
+ {
+ case bound_first_e:
+ // This can't happen in a FORMAT_2
+ warnx("The compiler goofed: "
+ "INSPECT FORMAT 2 "
+ "shouldn't have "
+ "bound_first_e");
+ abort();
+ break;
+
+ case bound_characters_e:
+ match = 1;
+ break;
+
+ case bound_all_e:
+ {
+ // We have a match.
+ match = true;
+ break;
+ }
+
+ case bound_leading_e:
+ {
+ // We have a match at rightmost. But we need to figure out if this
+ // particular match is valid for LEADING.
+
+ if( comparands[k].leading )
+ {
+ if( rightmost
+ + comparands[k].identifier_3.length * (comparands[k].leading_count +1)
+ == comparands[k].omega_it)
+ {
+ // This means that the match here is just the latest of a
+ // string of LEADING matches that started at .omega
+ comparands[k].leading_count += 1;
+ match = true;
+ rightmost_delta = comparands[k].identifier_3.length-1;
+ }
+ }
+ break;
+ }
+
+ case bound_trailing_e:
+ {
+ // We have a match at rightmost.
+ //
+ // We want to know if this is a trailing match. For that to be,
+ // all of the possible matches from here leftward to the alpha have
+ // to be true as well:
+
+ if( (rightmost - comparands[k].alpha_it )
+ % comparands[k].identifier_3.length == 0 )
+ {
+ // The remaining number of characters is correct for a match.
+ // Keep checking.
+
+ // Assume a match until we learn otherwise:
+ match = true;
+ char_it_c local_left = rightmost;
+ local_left -= comparands[k].identifier_3.length;
+ while( local_left >= comparands[k].alpha_it )
+ {
+ for(size_t m=0; m<comparands[k].identifier_3.length; m++)
+ {
+ if( comparands[k].identifier_3.the_vectorxxxx[m]
+ != local_left[m] )
+ {
+ // We have a mismatched character, so no trailing match is
+ // possible
+ match = false;
+ break;
+ }
+ }
+ local_left -= comparands[k].identifier_3.length;
+ }
+ }
+ break;
+ }
+ }
+
+ if( match )
+ {
+ // We have a match at rightmost. We need to
+ // to replace the characters in normalized_id_1
+ // with the characters from normalized_id_5
+ //fprintf(stderr, "Rule: %ld %p %s\n", k+1, rightmost, rightmost);
+
+ size_t index = rightmost - normalized_id_1.the_vectorxxxx.begin();
+ for( size_t l = 0;
+ l < comparands[k].identifier_5.length;
+ l++ )
+ {
+ cbl_char_t ch = comparands[k].identifier_5.
+ the_vectorxxxx[l];
+ normalized_id_1.the_vectorxxxx[index++] = ch;
+ }
+
+ the_end_of_the_world = rightmost;
+ rightmost -= rightmost_delta;
+ break;
+ }
+ }
+ else
+ {
+ comparands[k].leading = false;
+ }
+ }
+ }
+
+ // Here is where we take the characters from normalized_id_1 and put them
+ // back into identifier_1.
+
+ charmap_t *charmap = __gg__get_charmap(id1->encoding);
+ // Wastefully prefill id_1 with spaces in case the processing resulted in a
+ // string shorter than the original. (There is always the possiblity that
+ // a UTF-8 or UTF-16 codeset pair got replaced with a single character.) Do
+ // this before calling __gg__converter, because both mapped_character and
+ // __gg__iconverter use the same static buffer.
+ unsigned char *id1_data = id1->data + id1_o;
+ charmap->memset(id1_data, charmap->mapped_character(ascii_space), id1_s);
+
+ // We've been working in UTF32; we convert back to the original id1 encoding.
+ size_t bytes_converted;
+ const char *converted = __gg__iconverter( DEFAULT_32_ENCODING,
+ id1->encoding,
+ normalized_id_1.the_vectorxxxx.data(),
+ normalized_id_1.length*width_of_utf32,
+ &bytes_converted) ;
+ // And move those characters into place in id_1:
+ memcpy(id1_data,
+ converted,
+ std::min(bytes_converted, id1_s));
+
+ return;
+ }
+
+extern "C"
+void
+__gg__inspect_format_2(int backward, size_t integers[])
+ {
+ if( backward )
+ {
+ return inspect_backward_format_2(integers);
+ }
+ size_t int_index = 0;
+ size_t cblc_index = 0;
+
+ // Reference the language specification for the meanings of identifier_X
+
+ // Pick up identifier_1, which is the string being inspected
+ cblc_field_t *id1 = __gg__treeplet_1f[cblc_index];
+ size_t id1_o = __gg__treeplet_1o[cblc_index];
+ size_t id1_s = __gg__treeplet_1s[cblc_index];
+ cblc_index += 1;
+
+ // normalize it, according to the language specification.
+ normalized_operand normalized_id_1
+ = normalize_id(id1, id1_o, id1_s, id1->encoding);
+
+ std::vector<comparand> comparands;
+
+ // Pick up the count of operations:
+ size_t nbounds = integers[int_index++];
+
+ for(size_t j=0; j<nbounds; j++ )
+ {
+ // each operation has a bound code:
+ cbl_inspect_bound_t operation
+ = (cbl_inspect_bound_t)integers[int_index++];
+ switch( operation )
+ {
+ case bound_characters_e:
+ {
+ comparand next_comparand = {} ;
+ next_comparand.operation = operation;
+
+ const cblc_field_t *id5 = __gg__treeplet_1f[cblc_index];
+ size_t id5_o = __gg__treeplet_1o[cblc_index];
+ size_t id5_s = __gg__treeplet_1s[cblc_index];
+ cblc_index += 1;
+
+ const cblc_field_t *id4_before = __gg__treeplet_1f[cblc_index];
+ size_t id4_before_o = __gg__treeplet_1o[cblc_index];
+ size_t id4_before_s = __gg__treeplet_1s[cblc_index];
+ cblc_index += 1;
+
+ const cblc_field_t *id4_after = __gg__treeplet_1f [cblc_index];
+ size_t id4_after_o = __gg__treeplet_1o[cblc_index];
+ size_t id4_after_s = __gg__treeplet_1s[cblc_index];
+ cblc_index += 1;
+
+ next_comparand.identifier_5
+ = normalize_id(id5, id5_o, id5_s, id1->encoding);
+ normalized_operand normalized_id_4_before
+ = normalize_id(id4_before, id4_before_o, id4_before_s, id1->encoding);
+ normalized_operand normalized_id_4_after
+ = normalize_id(id4_after, id4_after_o, id4_after_s, id1->encoding);
+
+ // Because this is a CHARACTER operation, the lengths of
+ // identifier-3 and identifier-5 should be one. Let's avoid the
+ // chaos that will otherwise ensue should the lengths *not* be
+ // one.
+ next_comparand.identifier_3.length = 1;
+ next_comparand.identifier_5.length = 1;
+
+ next_comparand.alpha = normalized_id_1.the_characters.c_str();
+ next_comparand.omega
+ = next_comparand.alpha + normalized_id_1.length;
+
+ next_comparand.alpha_it = normalized_id_1.the_vectorxxxx.begin();
+ next_comparand.omega_it = normalized_id_1.the_vectorxxxx.end();
+
+ the_alpha_and_omega(normalized_id_4_before,
+ normalized_id_4_after,
+ next_comparand.alpha,
+ next_comparand.omega,
+ next_comparand.alpha_it,
+ next_comparand.omega_it,
+ normalized_id_1.the_vectorxxxx.end());
+ comparands.push_back(next_comparand);
+ break;
+ }
+ default:
+ {
+ // We have some number of identifer-3/identifier-5 pairs,
+ // each with possible PHRASE1 modifiers.
+ size_t pair_count = integers[int_index++];
+
+ for(size_t k=0; k<pair_count; k++)
+ {
+ comparand next_comparand = {};
+ next_comparand.operation = operation;
+
+ const cblc_field_t *id3 = __gg__treeplet_1f[cblc_index];
+ size_t id3_o = __gg__treeplet_1o[cblc_index];
+ size_t id3_s = __gg__treeplet_1s[cblc_index];
+ cblc_index += 1;
+
+ const cblc_field_t *id5 = __gg__treeplet_1f[cblc_index];
+ size_t id5_o = __gg__treeplet_1o[cblc_index];
+ size_t id5_s = __gg__treeplet_1s[cblc_index];
+ cblc_index += 1;
+
+ const cblc_field_t *id4_before = __gg__treeplet_1f[cblc_index];
+ size_t id4_before_o = __gg__treeplet_1o[cblc_index];
+ size_t id4_before_s = __gg__treeplet_1s[cblc_index];
+ cblc_index += 1;
+
+ const cblc_field_t *id4_after = __gg__treeplet_1f[cblc_index];
+ size_t id4_after_o = __gg__treeplet_1o[cblc_index];
+ size_t id4_after_s = __gg__treeplet_1s[cblc_index];
+ cblc_index += 1;
+
+ next_comparand.identifier_3 = normalize_id(id3,
+ id3_o,
+ id3_s,
+ id1->encoding);
+ next_comparand.identifier_5 = normalize_id(id5,
+ id5_o,
+ id5_s,
+ id1->encoding);
+
+ // Identifiers 3 and 5 have to be the same length. But
+ // but either, or both, can be figurative constants. If
+ // they are figurative constants, they start off with a
+ // length of one. We will expand figurative constants to
+ // match the length of the other one:
+
+ if( id3->attr & FIGCONST_MASK )
+ {
+ match_lengths( next_comparand.identifier_3,
+ next_comparand.identifier_5);
+ }
+ else if( id5->attr & FIGCONST_MASK )
+ {
+ match_lengths( next_comparand.identifier_5,
+ next_comparand.identifier_3);
+ }
+
+ next_comparand.alpha
+ = normalized_id_1.the_characters.c_str();
+ next_comparand.omega
+ = next_comparand.alpha + normalized_id_1.length;
+
+ normalized_operand normalized_id_4_before
+ = normalize_id(id4_before, id4_before_o, id4_before_s, id1->encoding);
+ normalized_operand normalized_id_4_after
+ = normalize_id(id4_after, id4_after_o, id4_after_s, id1->encoding);
+
+ next_comparand.alpha_it = normalized_id_1.the_vectorxxxx.begin();
+ next_comparand.omega_it = normalized_id_1.the_vectorxxxx.end();
+
+ the_alpha_and_omega(normalized_id_4_before,
+ normalized_id_4_after,
+ next_comparand.alpha,
+ next_comparand.omega,
+ next_comparand.alpha_it,
+ next_comparand.omega_it,
+ normalized_id_1.the_vectorxxxx.end());
+
+ next_comparand.leading = true;
+ next_comparand.leading_count = 0;
+ next_comparand.first = true;
+ comparands.push_back(next_comparand);
+ }
+ }
+ }
+ }
+
+ // We are now set up to accomplish the data flow described
+ // in the language specification. We loop through the
+ // the character positions in normalized_id_1:
+ char_it_c leftmost = normalized_id_1.the_vectorxxxx.begin();
+ char_it_c rightmost = leftmost + normalized_id_1.length;
+
+ while( leftmost < rightmost )
+ {
+ // For each leftmost position, we check each of the
+ // comparands
+
+ for(size_t k=0; k<comparands.size(); k++)
+ {
+ if( leftmost < comparands[k].alpha_it )
+ {
+ // This can't be a match, because leftmost is
+ // to the left of the comparand's alpha.
+ continue;
+ }
+ if( leftmost + comparands[k].identifier_3.length
+ > comparands[k].omega_it )
+ {
+ // This can't be a match, because the rightmost
+ // character of the comparand falls to the right
+ // of the comparand's omega
+ continue;
+ }
+ // A match is theoretically possible, because all
+ // the characters of the comparand fall between
+ // alpha and omega:
+ bool possible_match = true;
+ if( comparands[k].operation != bound_characters_e)
+ {
+ for(size_t m=0; m<comparands[k].identifier_3.length; m++)
+ {
+ if( comparands[k].identifier_3.the_vectorxxxx[m]
+ != leftmost[m] )
+ {
+ possible_match = false;
+ break;
+ }
+ }
+ }
+ if( possible_match )
+ {
+ // The characters of the comparand match the
+ // characters at leftmost. See if further processing is
+ // indicated:
+
+ bool match = false;
+ switch( comparands[k].operation )
+ {
+ case bound_characters_e:
+ match = true;
+ break;
+
+ case bound_first_e:
+ if( comparands[k].first )
+ {
+ match = true;
+ comparands[k].first = false;
+ }
+ break;
+
+ case bound_all_e:
+ {
+ // We have a match.
+ match = true;
+ break;
+ }
+
+ case bound_leading_e:
+ {
+ // We have a match at leftmost. But we need to figure out if this
+ // particular match is valid for LEADING.
+
+ // Hang onto your hat. This is delightfully clever.
+ //
+ // This position is LEADING if:
+ // 1) .leading is still true
+ // 2) leftmost / (length_of_comparand ) = current_count
+ //
+ // I get chills every time I look at that.
+ if( comparands[k].leading )
+ {
+ // So far, so good.
+ size_t count = (leftmost - comparands[k].alpha_it)
+ / comparands[k].identifier_3.length;
+ if( count == comparands[k].leading_count )
+ {
+ // This means that the match here is just the latest of a
+ // string of LEADING matches that started at .alpha
+ comparands[k].leading_count += 1;
+ match = true;
+ }
+ }
+ break;
+ }
+
+ case bound_trailing_e:
+ {
+ // We have a match at leftmost.
+ //
+ // We want to know if this is a trailing match. For that to be,
+ // all of the possible matches from here to the omega have to be
+ // true as well:
+
+ if( (comparands[k].omega_it-leftmost)
+ % comparands[k].identifier_3.length == 0 )
+ {
+ // The remaining number of characters is correct for a match.
+ // Keep checking.
+
+ // Assume a match until we learn otherwise:
+ match = true;
+ char_it_c local_left = leftmost;
+ local_left += comparands[k].identifier_3.length;
+ while( local_left < comparands[k].omega_it )
+ {
+ for(size_t m=0; m<comparands[k].identifier_3.length; m++)
+ {
+ if( comparands[k].identifier_3.the_vectorxxxx[m]
+ != local_left[m] )
+ {
+ // We have a mismatched character, so no trailing match is
+ // possible
+ match = false;
+ break;
+ }
+ }
+ local_left += comparands[k].identifier_3.length;
+ }
+ }
+ break;
+ }
+ }
+ if( match )
+ {
+ // We have a match at leftmost. We need to
+ // to replace the characters in normalized_id_1
+ // with the characters from normalized_id_5
+
+ size_t index = leftmost
+ - normalized_id_1.the_vectorxxxx.begin();
+ for( size_t l = 0;
+ l < comparands[k].identifier_5.length;
+ l++ )
+ {
+ char ch = comparands[k].identifier_5.
+ the_vectorxxxx[l];
+ normalized_id_1.the_vectorxxxx[index++] = ch;
+ }
+ // Adjust the leftmost pointer to point to
+ // the rightmost character of the matched
+ // string, keeping in mind that it will be
+ // bumped again after we break out of the
+ // k<pair_count loop:
+ leftmost += comparands[k].identifier_3.length - 1;
+ break;
+ }
+ }
+ else
+ {
+ comparands[k].leading = false;
+ }
+ }
+ leftmost += 1;
+ }
+
+ // Here is where we take the characters from normalized_id_1 and put them
+ // back into identifier_1.
+
+ charmap_t *charmap = __gg__get_charmap(id1->encoding);
+ // Wastefully prefill id_1 with spaces in case the processing resulted in a
+ // string shorter than the original. (There is always the possiblity that
+ // a UTF-8 or UTF-16 codeset pair got replaced with a single character.) Do
+ // this before calling __gg__converter, because both mapped_character and
+ // __gg__iconverter use the same static buffer.
+ unsigned char *id1_data = id1->data + id1_o;
+ charmap->memset(id1_data, charmap->mapped_character(ascii_space), id1_s);
+
+ // We've been working in UTF32; we convert back to the original id1 encoding.
+ size_t bytes_converted;
+ const char *converted = __gg__iconverter( DEFAULT_32_ENCODING,
+ id1->encoding,
+ normalized_id_1.the_vectorxxxx.data(),
+ normalized_id_1.length*width_of_utf32,
+ &bytes_converted) ;
+ // And move those characters into place in id_1:
+ memcpy(id1_data,
+ converted,
+ std::min(bytes_converted, id1_s));
+ return;
+ }
+
+static std::u32string
+normalize_for_inspect_format_4(const cblc_field_t *var,
+ size_t var_offset,
+ size_t var_size,
+ cbl_encoding_t source_encoding)
+ {
+ std::u32string retval;
+ if(var)
+ {
+ const charmap_t *charmap_var = __gg__get_charmap(source_encoding);
+ charmap_t *charmap32 = __gg__get_charmap(DEFAULT_32_ENCODING);
+
+ cbl_figconst_t figconst =
+ static_cast<cbl_figconst_t>(var->attr & FIGCONST_MASK);
+ // We have a corner case to deal with:
+ if( strcmp(var->name, "NULLS") == 0 )
+ {
+ figconst = null_value_e;
+ }
+
+ if( figconst )
+ {
+ // Build up an var_size array of figconst characters
+ cbl_char_t figchar = '\0';
+ switch( figconst )
+ {
+ case low_value_e :
+ figchar = charmap32->low_value_character();
+ break;
+ case zero_value_e :
+ figchar = charmap32->mapped_character(ascii_0);
+ break;
+ case space_value_e :
+ figchar = charmap32->mapped_character(ascii_space);
+ break;
+ case quote_value_e :
+ figchar = charmap32->quote_character();
+ break;
+ case high_value_e :
+ {
+ if( __gg__high_value_character == DEFAULT_HIGH_VALUE_8 )
+ {
+ // See the comments where these constants are defined.
+ if(charmap_var->stride() == 1)
+ {
+ if(charmap_var->is_like_ebcdic())
+ {
+ // This maps back to 0xFF in CP1140
+ figchar = EBCDIC_HIGH_VALUE_32;
+ }
+ else
+ {
+ // This maps back to 0xFF in CP1252
+ figchar = ASCII_HIGH_VALUE_32;
+ }
+ }
+ else if(charmap_var->stride() == 2)
+ {
+ figchar = UTF16_HIGH_VALUE_32;
+ }
+ else
+ {
+ figchar = UTF32_HIGH_VALUE_32;
+ }
+ }
+ else
+ {
+ figchar = charmap32->mapped_character(__gg__high_value_character);
+ }
+ break;
+ }
+ case null_value_e:
+ break;
+ default:
+ figchar = '\0';
+ abort();
+ break;
+ }
+ retval.push_back(figchar);
+ }
+ else
+ {
+ // It's not a figurative constant, so convert var to UTF32.
+ size_t converted_bytes;
+ const char *converted = __gg__iconverter(
+ var->encoding,
+ DEFAULT_32_ENCODING,
+ var->data + var_offset,
+ var_size,
+ &converted_bytes);
+ void *duped = __gg__memdup(converted, converted_bytes);
+ for(size_t i=0; i<converted_bytes; i+=width_of_utf32)
+ {
+ cbl_char_t ch = charmap32->getch(duped, i);
+ retval.push_back(ch);
+ }
+ free(duped);
+ }
+ }
+ return retval;
+ }
+
+extern "C"
+void
+__gg__inspect_format_4( int backward,
+ cblc_field_t *input, // identifier-1
+ size_t input_offset,
+ size_t input_size,
+ const cblc_field_t *original, // id-6 / literal-4
+ size_t original_offset,
+ size_t original_size,
+ const cblc_field_t *replacement, // id-7 / literal-5
+ size_t replacement_offset,
+ size_t replacement_size,
+ const cblc_field_t *after, // id-4 / literal-2
+ size_t after_offset,
+ size_t after_size,
+ const cblc_field_t *before, // id-4 / literal-2
+ size_t before_offset,
+ size_t before_size
+ )
+ {
+ // We need to cope with multiple encodings; the ISO specification says only
+ // that identifier-1 and -3 through -n are display or national.
+
+ // We will leave the input encoded as whatever it is, and we will convert the
+ // others to match.
+
+ // We also need to cope with anything except identifier-1 being a figurative
+ // constant.
+
+ cbl_figconst_t figconst_original =
+ static_cast<cbl_figconst_t>(original->attr & FIGCONST_MASK);
+ cbl_figconst_t figconst_replacement =
+ static_cast<cbl_figconst_t>(replacement->attr & FIGCONST_MASK);
+ int figswitch = (figconst_original ? 2 : 0) + (figconst_replacement ? 1 : 0);
+ switch( figswitch )
+ {
+ case 0:
+ // Neither are figconst; we leave the sizes alone
+ break;
+ case 1:
+ // Only replacement is figconst, so we make its size -1
+ // This will cause CONVERTING "ABC" TO ZERO to be the same as
+ // CONVERTING "ABC" TO "000"
+ replacement_size = (size_t)(-1LL);
+ break;
+ case 2:
+ // Only original is figconst. Set the size to one. (This is necessary
+ // because the size of NULL is eight, since NULL does double-duty as both
+ // a character (this is a MicroFocus specification) and a pointer.
+ original_size = 1;
+ break;
+ case 3:
+ // Both are figconst
+ replacement_size = original_size = 1;
+ break;
+ }
+
+ // Because before and after can be figurative constant NULL, we have to make
+ // sure that in such cases the size is 1:
+ if(before && before_size && before->attr & FIGCONST_MASK)
+ {
+ before_size = 1;
+ }
+ if(after && after_size && after->attr & FIGCONST_MASK)
+ {
+ after_size = 1;
+ }
+
+ bool all = (replacement_size == (size_t)(-1LL));
+ if( all )
+ {
+ // A replacement_size of -1 means that the statement is something like
+ // INSPECT XYZ CONVERTING "abcxyz" to ALL "?" That means replacement is
+ // a single character. We need to convert it to the target encoding.
+ const charmap_t * charmap = __gg__get_charmap(input->encoding);
+ replacement_size = charmap->stride();
+ }
+
+ std::u32string str_input = normalize_for_inspect_format_4(input , input_offset , input_size , input->encoding);
+ std::u32string str_original = normalize_for_inspect_format_4(original , original_offset , original_size , input->encoding);
+ std::u32string str_replacement = normalize_for_inspect_format_4(replacement, replacement_offset, replacement_size, input->encoding);
+ std::u32string str_after = normalize_for_inspect_format_4(after , after_offset , after_size , input->encoding);
+ std::u32string str_before = normalize_for_inspect_format_4(before , before_offset , before_size , input->encoding);
+
+ if( all )
+ {
+ // We now expand the single-character replacement to be the same length as
+ // original.
+ cbl_char_t ch = str_replacement[0];
+ str_replacement.clear();
+ for(size_t i=0; i<str_original.size(); i++)
+ {
+ str_replacement.push_back(ch);
+ }
+ }
+
+ // Use a map to make this O(N), rather than an O(N-squared),
+ // computational complexity
+ std::unordered_map<cbl_char_t, cbl_char_t>map;
+ typedef std::unordered_map<cbl_char_t, cbl_char_t>::const_iterator map_it_t ;
+
+ // The rule is, if the same character appears more than once in the
+ // original (which is identifier-6), then the first occurrence of the
+ // matching character in replacement is used. So, we create the map
+ // backwards. The one closest to zero will win.
+ for(size_t i=str_original.size()-1; i<str_original.size(); i--)
+ {
+ map[str_original[i]] = str_replacement[i];
+ }
+
+ size_t leftmost_i; // Leftmost index to replace at.
+ size_t rightmost_i; // Rightmost+1 index to replace at.
+
+ if( !backward )
+ {
+ // This is a forward conversion. We look for the first instance
+ // of str_after from the left. And then we look for the first instance
+ // of str_before after that. When there is no str_before, we move the
+ // rightmost limit to the end of str_input, as if there were no BEFORE
+ // phrase:
+
+ if( str_after.empty() )
+ {
+ // There is no AFTER phrase, so we start from the left.
+ leftmost_i = 0;
+ }
+ else
+ {
+ size_t nfound = str_input.find(str_after);
+ if( nfound != std::u32string::npos )
+ {
+ // Move the left limit to one character past the found element
+ leftmost_i = nfound + str_after.size();
+ }
+ else
+ {
+ // We didn't find the after phrase, so we move the left limit to the
+ // end of input, which means nothing will be replaced
+ leftmost_i = str_input.size();
+ }
+ }
+
+ // At this point, leftmost_i has been set to something. Look for the
+ // BEFORE phrase somewhere to the right of it:
+
+ if( str_before.empty() )
+ {
+ // There is no BEFORE phrase, so set rightmost to the end of the input
+ rightmost_i = str_input.size();
+ }
+ else
+ {
+ // Look for BEFORE to the right of leftmost_i:
+ size_t nfound = str_input.find(str_before, leftmost_i);
+ if( nfound != std::u32string::npos )
+ {
+ // We found the BEFORE phrase.
+ rightmost_i = nfound;
+ }
+ else
+ {
+ // We didn't find the BEFORE phrase; IOS says to treat this situation
+ // as if there were no BEFORE phrase
+ rightmost_i = str_input.size();
+ }
+ }
+ }
+ else
+ {
+ // We are doing a BACKWARD conversion. So, we look for the AFTER phrase
+ // and use that to establish the rightmost limit. And we look for the
+ // BEFORE to the left of AFTER phrase and use that to establish the
+ // leftmost limit
+
+ if( str_after.empty() )
+ {
+ // There is no AFTER phrase, so we set the rightmost limit to the end
+ // of the input:
+ rightmost_i = str_input.size();
+ }
+ else
+ {
+ // Start from the right and look for AFTER
+ size_t nfound = str_input.rfind(str_after, str_input.size());
+ if( nfound != std::u32string::npos )
+ {
+ // We found str_after, so its location becomes rightmost
+ rightmost_i = nfound;
+ }
+ else
+ {
+ // We didn't find str_after, so we move rightmost all the way to the
+ // left, so that nothing will ever be found.
+ rightmost_i = 0;
+ }
+ }
+ // rightmost_i has been established, so now look for BEFORE to the left
+ // of it
+ if( str_before.empty() )
+ {
+ // There is no str_before, so the left limit is all the way to the left
+ leftmost_i = 0;
+ }
+ else
+ {
+ size_t nfound = str_input.rfind(str_before, rightmost_i);
+ if( nfound != std::u32string::npos )
+ {
+ // We found BEFORE, so we put the left limit just to the right of
+ // where we found it:
+ leftmost_i = nfound + str_before.size();
+ }
+ else
+ {
+ // Not finding the BEFORE phrase is the same as the BEFORE phrase
+ // not having been specified:
+ leftmost_i = 0;
+ }
+ }
+ }
+ // leftmost_i and rightmost_i have been established. Do the conversion of
+ // characters inside those limits:
+ for(size_t i=leftmost_i; i<rightmost_i; i++)
+ {
+ cbl_char_t ch = str_input[i];
+ map_it_t cvt = map.find(ch);
+ if( cvt != map.end() )
+ {
+ str_input[i] = cvt->second;
+ }
+ }
+
+ // We now take the converted str_input, and put it back into id_1:
+
+ size_t bytes_converted;
+ const char *converted = __gg__iconverter(DEFAULT_32_ENCODING,
+ input->encoding,
+ str_input.data(),
+ str_input.size()*width_of_utf32,
+ &bytes_converted) ;
+
+ // And move those characters into place in input:
+ memcpy(input->data + input_offset,
+ converted,
+ std::min(bytes_converted, input_size));
+ }
+
+
+
+extern "C"
+void
+__gg__inspect_format_1_sbc(int backward, size_t integers[])
+ {
+ // When this routine is called, we know we are working in a single-byte-coded
+ // codeset like ASCII or EBCDIC.
+ if( backward )
+ {
+ return inspect_backward_format_1(integers);
+ }
+
+ size_t int_index = 0;
+ size_t cblc_index = 0;
+
+ // Reference the language specification for the meanings of identifier_X
+
+ // Pick up the number of identifier_2 loops in this INSPECT statement
+ size_t n_identifier_2 = integers[int_index++];
+
+ std::vector<id_2_result> id_2_results(n_identifier_2);
+
+ // Pick up identifier_1, which is the string being inspected
+ const cblc_field_t *id1 = __gg__treeplet_1f[cblc_index];
+ size_t id1_o = __gg__treeplet_1o[cblc_index];
+ size_t id1_s = __gg__treeplet_1s[cblc_index];
+ cblc_index += 1;
+ // normalize it, according to the language specification.
+ std::string normalized_id_1
+ = normalize_id_sbc(id1, id1_o, id1_s, id1->encoding);
+
+ std::vector<comparand_sbc> comparands;
+
+ for(size_t i=0; i<n_identifier_2; i++)
+ {
+ // For each identifier_2, we pick up its value:
+
+ id_2_results[i].id2 = __gg__treeplet_1f [cblc_index];
+ id_2_results[i].id2_o = __gg__treeplet_1o[cblc_index];
+ id_2_results[i].id2_s = __gg__treeplet_1s[cblc_index];
+
+ cblc_index += 1;
+ id_2_results[i].result = 0;
+
+ // For each identifier 2, there is a count of operations:
+ size_t nbounds = integers[int_index++];
+
+ for(size_t j=0; j<nbounds; j++ )
+ {
+ // each operation has a bound code:
+ cbl_inspect_bound_t operation
+ = (cbl_inspect_bound_t)integers[int_index++];
+ switch( operation )
+ {
+ case bound_characters_e:
+ {
+ // We are counting characters. There is no identifier-3,
+ // but we we hard-code it to " " to set the length to 1.
+ comparand_sbc next_comparand = {};
+ next_comparand.id_2_index = i;
+ next_comparand.operation = operation;
+ next_comparand.identifier_3 = " ";
+
+ const cblc_field_t *id4_before = __gg__treeplet_1f [cblc_index];
+ size_t id4_before_o = __gg__treeplet_1o[cblc_index];
+ size_t id4_before_s = __gg__treeplet_1s[cblc_index];
+ cblc_index += 1;
+
+ const cblc_field_t *id4_after = __gg__treeplet_1f [cblc_index];
+ size_t id4_after_o = __gg__treeplet_1o[cblc_index];
+ size_t id4_after_s = __gg__treeplet_1s[cblc_index];
+ cblc_index += 1;
+
+ std::string normalized_id_4_before
+ = normalize_id_sbc( id4_before,
+ id4_before_o,
+ id4_before_s,
+ id1->encoding);
+ std::string normalized_id_4_after
+ = normalize_id_sbc( id4_after,
+ id4_after_o,
+ id4_after_s,
+ id1->encoding);
+ the_alpha_and_omega_sbc(normalized_id_4_before,
+ normalized_id_4_after,
+ normalized_id_1,
+ next_comparand.alpha,
+ next_comparand.omega);
+
+ comparands.push_back(next_comparand);
+ break;
+ }
+
+ default:
+ {
+ // We have some number of identifer-3 values,
+ // each with possible PHRASE1 modifiers.
+ size_t pair_count = integers[int_index++];
+
+ // We need to build up pair_count comparand structures:
+
+ for(size_t k=0; k<pair_count; k++)
+ {
+ comparand_sbc next_comparand = {};
+ next_comparand.id_2_index = i;
+ next_comparand.operation = operation;
+
+ const cblc_field_t *id3 = __gg__treeplet_1f[cblc_index];
+ size_t id3_o = __gg__treeplet_1o[cblc_index];
+ size_t id3_s = __gg__treeplet_1s[cblc_index];
+ cblc_index += 1;
+
+ const cblc_field_t *id4_before = __gg__treeplet_1f[cblc_index];
+ size_t id4_before_o = __gg__treeplet_1o[cblc_index];
+ size_t id4_before_s = __gg__treeplet_1s[cblc_index];
+ cblc_index += 1;
+
+ const cblc_field_t *id4_after = __gg__treeplet_1f[cblc_index];
+ size_t id4_after_o = __gg__treeplet_1o[cblc_index];
+ size_t id4_after_s = __gg__treeplet_1s[cblc_index];
+ cblc_index += 1;
+
+ next_comparand.identifier_3 = normalize_id_sbc(id3,
+ id3_o,
+ id3_s,
+ id1->encoding);
+ std::string normalized_id_4_before
+ = normalize_id_sbc( id4_before,
+ id4_before_o,
+ id4_before_s,
+ id1->encoding);
+ std::string normalized_id_4_after
+ = normalize_id_sbc( id4_after,
+ id4_after_o,
+ id4_after_s,
+ id1->encoding);
+ the_alpha_and_omega_sbc(normalized_id_4_before,
+ normalized_id_4_after,
+ normalized_id_1,
+ next_comparand.alpha,
+ next_comparand.omega);
+ next_comparand.leading = true;
+ next_comparand.leading_count = 0;
+ comparands.push_back(next_comparand);
+ }
+ }
+ }
+ }
+ }
+
+ // We are now ready to walk through identifier-1, character by
+ // character, checking each of the comparands for a match:
+
+ // We are now set up to accomplish the data flow described
+ // in the language specification. We loop through the
+ // the character positions in normalized_id_1:
+ size_t leftmost = 0;
+ size_t rightmost = leftmost + normalized_id_1.length();
+
+ while( leftmost < rightmost )
+ {
+ // For each leftmost position, we check each of the
+ // pairs:
+
+ for(size_t k=0; k<comparands.size(); k++)
+ {
+ if( leftmost < comparands[k].alpha )
+ {
+ // This can't be a match, because leftmost is
+ // to the left of the comparand's alpha.
+ continue;
+ }
+ if( leftmost + comparands[k].identifier_3.length() > comparands[k].omega)
+ {
+ // This can't be a match, because the rightmost
+ // character of the comparand falls to the right
+ // of the comparand's omega
+ continue;
+ }
+ // A match is theoretically possible, because all
+ // the characters of the comparand fall between
+ // alpha and omega:
+ bool possible_match = true;
+
+ if( comparands[k].operation != bound_characters_e )
+ {
+ for(size_t m=0; m<comparands[k].identifier_3.length(); m++)
+ {
+ if( comparands[k].identifier_3[m] != normalized_id_1[leftmost+m] )
+ {
+ possible_match = false;
+ break;
+ }
+ }
+ }
+ if( possible_match )
+ {
+ // The characters of the comparand match the
+ // characters at leftmost.
+ bool match = false;
+ switch( comparands[k].operation )
+ {
+ case bound_first_e:
+ // This can't happen in a FORMAT_1
+ warnx("The compiler goofed: "
+ "INSPECT FORMAT 1 "
+ "shouldn't have "
+ "bound_first_e");
+ abort();
+ break;
+
+ case bound_characters_e:
+ match = true;
+ break;
+
+ case bound_all_e:
+ {
+ // We have a match.
+ match = true;
+ break;
+ }
+
+ case bound_leading_e:
+ {
+ // We have a match at leftmost. But we need to figure out if this
+ // particular match is valid for LEADING.
+
+ // Hang onto your hat. This is delightfully clever.
+ //
+ // This position is LEADING if:
+ // 1) .leading is still true
+ // 2) leftmost / (length_of_comparand ) = current_count
+ //
+ // I get chills every time I look at that.
+
+ if( comparands[k].leading )
+ {
+ // So far, so good.
+ size_t count = ((leftmost - comparands[k].alpha))
+ / comparands[k].identifier_3.length();
+ if( count == comparands[k].leading_count )
+ {
+ // This means that the match here is just the latest of a
+ // string of LEADING matches that started at .alpha
+ comparands[k].leading_count += 1;
+ match = true;
+ }
+ }
+ break;
+ }
+
+ case bound_trailing_e:
+ {
+ // We have a match at leftmost.
+ //
+ // We want to know if this is a trailing match. For that to be,
+ // all of the possible matches from here to the omega have to be
+ // true as well:
+
+ if( (comparands[k].omega-leftmost)
+ % comparands[k].identifier_3.length() == 0 )
+ {
+ // The remaining number of characters is correct for a match.
+ // Keep checking.
+
+ // Assume a match until we learn otherwise:
+ match = true;
+ size_t local_left = leftmost;
+ local_left += comparands[k].identifier_3.length();
+ while( match && local_left < comparands[k].omega )
+ {
+ for(size_t m=0; m<comparands[k].identifier_3.length(); m++)
+ {
+ if( comparands[k].identifier_3[m]
+ != normalized_id_1[local_left+m] )
+ {
+ // We have a mismatched character, so no trailing match is
+ // possible
+ match = false;
+ break;
+ }
+ }
+ local_left += comparands[k].identifier_3.length();
+ }
+ }
+ break;
+ }
+ }
+
+ if( match )
+ {
+ // We have a match at leftmost:
+
+ // Bump the result counter
+ id_2_results[comparands[k].id_2_index].result += 1;
+
+ // Adjust the leftmost pointer to point to
+ // the rightmost character of the matched
+ // string, keeping in mind that it will be
+ // bumped again after we break out of the
+ // k<pair_count loop:
+ leftmost += comparands[k].identifier_3.length() - 1;
+ break;
+ }
+ }
+ else
+ {
+ // We are within alpha/omega, but there was no
+ // match, which permanently disqualifies the
+ // possibility of LEADING
+ comparands[k].leading = false;
+ }
+ }
+ leftmost += 1;
+ }
+
+ // Add our results to the identifier_2 values:
+
+ for(size_t i = 0; i<id_2_results.size(); i++)
+ {
+ int rdigits;
+ __int128 id_2_value
+ = __gg__binary_value_from_qualified_field(&rdigits,
+ id_2_results[i].id2,
+ id_2_results[i].id2_o,
+ id_2_results[i].id2_s);
+ while(rdigits--)
+ {
+ id_2_value /= 10.0;
+ }
+
+ // Accumulate what we've found into it
+ id_2_value += id_2_results[i].result;
+
+ // And put it back:
+ __gg__int128_to_qualified_field(id_2_results[i].id2,
+ id_2_results[i].id2_o,
+ id_2_results[i].id2_s,
+ id_2_value,
+ 0,
+ truncation_e,
+ NULL);
+ }
+ }
int __gg__rdigits = 0 ;
int __gg__nop = 0 ;
int __gg__main_called = 0 ;
-void *__gg__entry_label = NULL ;
+size_t __gg__entry_index = 0 ;
// During SORT operations, we don't want the end-of-file condition, which
// happens as a matter of course, from setting the EOF exception condition.
free(prior_winner);
}
-typedef std::vector<cbl_char_t>::const_iterator char_it_c ;
-typedef std::vector<cbl_char_t>::iterator char_it ;
-
-static const char *
-funky_find( const char *piece,
- const char *piece_end,
- const char *whole,
- const char *whole_end )
- {
- const char *retval = NULL;
-
- size_t length_of_piece = piece_end - piece;
- if(length_of_piece == 0)
- {
- __gg__abort("funky_find() length_of_piece shouldn't be zero");
- }
-
- whole_end -= length_of_piece;
-
- while( whole <= whole_end )
- {
- if( memcmp( piece, whole, length_of_piece) == 0 )
- {
- retval = whole;
- break;
- }
- whole += 1;
- }
- return retval;
- }
-
-static char_it_c
-funky_find_wide( char_it_c needle,
- char_it_c needle_end, // Actually end+1
- char_it_c haystack,
- char_it_c haystack_end, // Actually end+1
- char_it_c notfound)
- {
- // We are looking for the needle in the haystack
-
- char_it_c retval = notfound;
-
- size_t length_of_piece = needle_end - needle;
- if(length_of_piece == 0)
- {
- __gg__abort("funky_find_wide() length_of_piece shouldn't be zero");
- }
-
- haystack_end -= length_of_piece;
-
- while( haystack <= haystack_end )
- {
- // Compare the memory at needle to the memory at haystack
- if( memcmp( &(*needle),
- &(*haystack),
- length_of_piece*sizeof(cbl_char_t)) == 0 )
- {
- // They are the same; return where needle was found
- retval = haystack;
- break;
- }
- // Not found; move to the next location in the haystach
- haystack += 1;
- }
- return retval;
- }
-
-static const char *
-funky_find_backward(const char *piece,
- const char *piece_end,
- const char *whole,
- const char *whole_end )
- {
- const char *retval = NULL;
-
- size_t length_of_piece = piece_end - piece;
- if(length_of_piece == 0)
- {
- __gg__abort("funky_find_backward() length_of_piece shouldn't be zero");
- }
-
- whole_end -= length_of_piece;
-
- while( whole <= whole_end )
- {
- if( memcmp( piece, whole_end, length_of_piece) == 0 )
- {
- retval = whole_end;
- break;
- }
- whole_end -= 1;
- }
- return retval;
- }
-
-static char_it_c
-funky_find_wide_backward( char_it_c needle,
- char_it_c needle_end, // Actually end+1
- char_it_c haystack,
- char_it_c haystack_end, // Actually end+1
- char_it_c notfound)
- {
- // We are looking for the needle in the haystack
-
- char_it_c retval = notfound;
-
- size_t length_of_piece = needle_end - needle;
- if(length_of_piece == 0)
- {
- __gg__abort("funky_find_wide_backward() length_of_piece shouldn't be zero");
- }
-
- haystack_end -= length_of_piece;
-
- while( haystack <= haystack_end )
- {
- if( memcmp( &(*needle),
- &(*haystack_end),
- length_of_piece*sizeof(cbl_char_t)) == 0 )
- {
- // They are the same; return where needle was found
- retval = haystack_end;
- break;
- }
- // Not found; move to the next location in the haystack
- haystack_end -= 1;
- }
- return retval;
- }
-
-typedef struct normalized_operand
- {
- // These are the characters of the string. When the field is NumericDisplay
- // any leading or trailing +/- characters are removed, and any embedded
- // minus bits are removed.
-
- // In order for INSPECT to handle things like UTF-8, which often has
- // multi-byte codepoints, and UTF-16, which sometimes has multi-pair
- // codepoints we are going to convert everything to UTF-32 for internal
- // calculations and searches.
- std::string the_characters;
- std::vector<cbl_char_t>the_vectorxxxx;
-
- // offset and length are maintained in characters, not bytes
- size_t offset; // Usually zero. Increased by one for leading separate sign.
- size_t length; // Usually the same as the original. But it is one less
- // // than the original when there is a trailing separate sign.
- } normalized_operand;
-
-typedef struct comparand
- {
- size_t id_2_index;
- cbl_inspect_bound_t operation;
- normalized_operand identifier_3; // The thing to be found
- normalized_operand identifier_5; // The replacement, for FORMAT 2
- const char *alpha; // The start location within normalized_id_1
- const char *omega; // The end+1 location within normalized_id_1
- char_it_c alpha_it; // The start location within normalized_id_1
- char_it_c omega_it; // The end+1 location within normalized_id_1
- size_t leading_count;
- bool leading;
- bool first;
- } comparand;
-
-typedef struct id_2_result
- {
- cblc_field_t *id2;
- size_t id2_o;
- size_t id2_s;
- size_t result;
- } id_2_result;
-
-static normalized_operand
-normalize_id( const cblc_field_t *field,
- size_t field_o,
- size_t field_s,
- cbl_encoding_t encoding )
- {
- normalized_operand retval;
-
- if( field )
- {
- charmap_t *charmap = __gg__get_charmap(encoding);
-
- // This is the old-style byte-based assumption
- const unsigned char *data = field->data + field_o;
- cbl_figconst_t figconst
- = (cbl_figconst_t)(field->attr & FIGCONST_MASK);
-
- retval.offset = 0;
- retval.length = field_s;
-
- if( field->type == FldNumericDisplay )
- {
- // The value is NumericDisplay.
- if( field->attr & separate_e )
- {
- // Because the sign is a separate plus or minus, the length
- // gets reduced by one:
- retval.length = field_s - 1;
- if( field->attr & leading_e )
- {
- // Because the sign character is LEADING, we increase the
- // offset by one
- retval.offset = 1;
- }
- }
- for( size_t i=retval.offset; i<retval.length; i+=1 )
- {
- // Because we are dealing with a NumericDisplay that might have
- // the minus bit turned on, we will to mask it off as we copy the
- // input characters over to retval:
- retval.the_characters += charmap->set_digit_negative(data[i], false);
- }
- }
- else
- {
- // We are set up to create the_characters;
- if( figconst == normal_value_e )
- {
- for( size_t i=retval.offset; i<retval.length; i+=1 )
- {
- retval.the_characters += data[i];
- }
- }
- else
- {
- char ch = charmap->figconst_character(figconst);
- for( size_t i=retval.offset; i<retval.length; i+=1 )
- {
- retval.the_characters += ch;
- }
- }
- }
- }
- else
- {
- // There is no field, so leave the_characters empty.
- retval.offset = 0;
- retval.length = 0;
- }
-
- if( field )
- {
- cbl_encoding_t source_encoding = field->encoding;
- const charmap_t *charmap_source = __gg__get_charmap(source_encoding);
- charmap_t *charmap = __gg__get_charmap(encoding);
- int stride = charmap->stride();
-
- const unsigned char *data = field->data + field_o;
- cbl_figconst_t figconst = (cbl_figconst_t)(field->attr & FIGCONST_MASK);
- if( figconst == normal_value_e )
- {
- retval.offset = 0;
- retval.length = field_s / stride;
-
- if( field->type == FldNumericDisplay )
- {
- // The value is NumericDisplay, so we might need to adjust the offset
- // and length:
- if( field->attr & separate_e )
- {
- // Because the sign is a separate plus or minus, the length
- // gets reduced by one:
- retval.length = field_s - 1;
- if( field->attr & leading_e )
- {
- // Because the sign character is LEADING, we increase the
- // offset by one
- retval.offset = 1;
- }
- }
- }
- // We are ready to convert from the input to UTF32
- size_t converted_characters;
- const char *converted = __gg__iconverter(source_encoding,
- DEFAULT_32_ENCODING,
- data+retval.offset * stride,
- retval.length * stride,
- &converted_characters);
- // We are ready to copy the characters over:
- for( size_t i=0; i<converted_characters; i+=width_of_utf32 )
- {
- // Because we are dealing with a NumericDisplay that might have
- // the minus bit turned on, we will to mask it off as we copy the
- // input characters over to retval:
- cbl_char_t ch = charmap->getch(converted, i);
- if( field->type == FldNumericDisplay )
- {
- if( charmap_source->is_like_ebcdic() )
- {
- // In EBCDIC, a flagged negative digit 0xF0 through 0xF9 becomes
- // 0xD0 through 0xD9. Those represent the characters
- // "}JKLMNOPQR", which, now that we are in UTF32 space, don't have
- // the right bit pattern to be fixed with set_digit_negative().
- // So, we fix it separately with this table: Note that location
- // 0x7D, which is ASCII '{', becomes 0x30 '0'. See also that
- // locations 0x4A through 0x52 become 0x31 through 0x39.
- static const uint8_t fixit[256] =
- {
- 0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, 0x80, 0x09, 0x0a, 0x0b, 0x0c, 0x0d, 0x0e, 0x0f,
- 0x10, 0x11, 0x12, 0x13, 0x14, 0x15, 0x16, 0x17, 0x81, 0x19, 0x1a, 0x1b, 0x1c, 0x1d, 0x1e, 0x1f,
- 0x20, 0x21, 0x22, 0x23, 0x24, 0x25, 0x26, 0x27, 0x82, 0x29, 0x2a, 0x2b, 0x2c, 0x2d, 0x2e, 0x2f,
- 0x30, 0x31, 0x32, 0x33, 0x34, 0x35, 0x36, 0x37, 0x83, 0x39, 0x3a, 0x3b, 0x3c, 0x3d, 0x3e, 0x3f,
- 0x40, 0x41, 0x42, 0x43, 0x44, 0x45, 0x46, 0x47, 0x84, 0x49, 0x31, 0x32, 0x33, 0x34, 0x35, 0x36,
- 0x37, 0x38, 0x39, 0x53, 0x54, 0x55, 0x56, 0x57, 0x85, 0x59, 0x5a, 0x5b, 0x5c, 0x5d, 0x5e, 0x5f,
- 0x60, 0x61, 0x62, 0x63, 0x64, 0x65, 0x66, 0x67, 0x86, 0x69, 0x6a, 0x6b, 0x6c, 0x6d, 0x6e, 0x6f,
- 0x70, 0x71, 0x72, 0x73, 0x74, 0x75, 0x76, 0x77, 0x87, 0x79, 0x7a, 0x7b, 0x7c, 0x30, 0x7e, 0x7f,
- 0x80, 0x81, 0x82, 0x83, 0x84, 0x85, 0x86, 0x87, 0x88, 0x89, 0x8a, 0x8b, 0x8c, 0x8d, 0x8e, 0x8f,
- 0x90, 0x91, 0x92, 0x93, 0x94, 0x95, 0x96, 0x97, 0x89, 0x99, 0x9a, 0x9b, 0x9c, 0x9d, 0x9e, 0x9f,
- 0xa0, 0xa1, 0xa2, 0xa3, 0xa4, 0xa5, 0xa6, 0xa7, 0x8a, 0xa9, 0xaa, 0xab, 0xac, 0xad, 0xae, 0xaf,
- 0xb0, 0xb1, 0xb2, 0xb3, 0xb4, 0xb5, 0xb6, 0xb7, 0x8b, 0xb9, 0xba, 0xbb, 0xbc, 0xbd, 0xbe, 0xbf,
- 0xc0, 0xc1, 0xc2, 0xc3, 0xc4, 0xc5, 0xc6, 0xc7, 0x8c, 0xc9, 0xca, 0xcb, 0xcc, 0xcd, 0xce, 0xcf,
- 0xd0, 0xd1, 0xd2, 0xd3, 0xd4, 0xd5, 0xd6, 0xd7, 0x8d, 0xd9, 0xda, 0xdb, 0xdc, 0xdd, 0xde, 0xdf,
- 0xe0, 0xe1, 0xe2, 0xe3, 0xe4, 0xe5, 0xe6, 0xe7, 0x8e, 0xe9, 0xea, 0xeb, 0xec, 0xed, 0xee, 0xef,
- 0xf0, 0xf1, 0xf2, 0xf3, 0xf4, 0xf5, 0xf6, 0xf7, 0x8f, 0xf9, 0xfa, 0xfb, 0xfc, 0xfd, 0xfe, 0xff,
- };
- ch = fixit[ch & 0xFF];
- }
- else
- {
- ch = charmap->set_digit_negative(ch, false);
- }
- }
- retval.the_vectorxxxx.push_back(ch);
- }
- }
- else
- {
- // We need to fill the field with a figurative constant:
- // We are set up to create the_characters;
- charmap_t *charmap32 = __gg__get_charmap(DEFAULT_32_ENCODING);
- char ch = charmap32->figconst_character(figconst);
- for( size_t i=retval.offset; i<retval.length; i+=1 )
- {
- retval.the_characters += ch;
- retval.the_vectorxxxx.push_back(ch);
- }
- }
- }
- else
- {
- // There is no field, so leave the_characters empty.
- retval.offset = 0;
- retval.length = 0;
- }
-
- return retval;
- }
-
-static void
-match_lengths( normalized_operand &id_target,
- const normalized_operand &id_source)
- {
- // This routine gets called when id_source is a figurative constant and
- // we need the target to be the same length as the source
-
- char ch = id_target.the_characters[0];
- id_target.the_characters.clear();
- for(size_t i=0; i<id_source.length; i++)
- {
- id_target.the_characters += ch;
- }
-
- cbl_char_t wch = id_target.the_vectorxxxx[0];
- id_target.the_vectorxxxx.clear();
- for(size_t i=0; i<id_source.length; i++)
- {
- id_target.the_vectorxxxx.push_back(wch);
- }
- id_target.length = id_source.length;
- }
-
-static void
-the_alpha_and_omega(const normalized_operand &id_before,
- const normalized_operand &id_after,
- const char * &alpha,
- const char * &omega,
- char_it_c &alpha_it,
- char_it_c &omega_it,
- char_it_c notfound)
- {
- /* The 2023 ISO description of the AFTER and BEFORE phrases of the INSPECT
- statement is, in a word, garbled.
-
- IBM's COBOL for Linux 1.2 is a little better, but still a bit confusing
- because the description for AFTER neglects to specifically state that
- the scan starts one character to the right of the *first* occurrence of
- the AFTER value.
-
- Micro Focus 9.2.5 has the advantage of being ungarbled, succinct, and
- unambiguous.
-
- The BEFORE phrase modifies the character position to use as the rightmost
- position in source for the corresponding comparison operation. Comparisons
- in source occur only to the left of the first occurrence of delimiter. If
- delimiter is not present in source, then the comparison proceeds as if
- there were no BEFORE phrase.
-
- The AFTER phrase modifies the character position to use as the leftmost
- position in source for the corresponding comparison operation. Comparisons
- in source occur only to the right of the first occurrence of delimiter.
- This character position is the one immediately to the right of the
- rightmost character of the delimiter found. If delimiter is not found in
- source, the INSPECT statement has no effect (no tallying or replacement
- occurs).
-
- "xyzxyzAFTERxyzxyzxyzxyzBEFORExyzxyzAFTERxyzxyz"
- ^ ^
- | |
- | |-- omega
- ----------------alpha
- */
-
- if( id_before.length )
- {
- // This is the BEFORE delimiter. We look for the first occurrence of that
- // delimiter starting at the left of id_1
-
- const char *start = id_before.the_characters.c_str();
- const char *end = start + id_before.length;
- const char *found = funky_find(start, end, alpha, omega);
- if( found )
- {
- // We found id_before within alpha/omega, so reduce omega
- // to the found location.
- omega = found;
- // If not found, we just leave omega alone.
- }
-
- char_it_c omega_found = funky_find_wide(id_before.the_vectorxxxx.begin(),
- id_before.the_vectorxxxx.end(),
- alpha_it,
- omega_it,
- notfound );
- if( omega_found != notfound )
- {
- // We found id_before within alpha/omega, so reduce omega
- // to the found location.
- omega_it = omega_found;
- }
- }
-
- if( id_after.length )
- {
- // This is the AFTER delimiter. We look for the first occurrence of that
- // delimiter in id_1
-
- const char *start = id_after.the_characters.c_str();
- const char *end = start + id_after.length;
- const char *found = funky_find(start, end, alpha, omega);
- if( found )
- {
- // We found id_after in the alpha/omega segment. We update alpha
- // be the character after the id_after substring.
- alpha = found + (end-start);
- }
- else
- {
- // We didn't find the id_after string, so we set the alpha to be
- // omega. That means that no tally or replace operation will take
- // because no characters will qualify.
- alpha = omega;
- }
-
- char_it_c omega_found = funky_find_wide(id_after.the_vectorxxxx.begin(),
- id_after.the_vectorxxxx.end(),
- alpha_it,
- omega_it,
- notfound );
- if( omega_found != notfound)
- {
- // We found id_after in the alpha/omega segment. We update alpha
- // be the character after the id_after substring.
- alpha_it = omega_found + (end-start);
- }
- else
- {
- // We didn't find the id_after string, so we set the alpha to be
- // omega. That means that no tally or replace operation will take
- // because no characters will qualify.
- alpha_it = omega_it;
- }
- }
-
- }
-
-static void
-the_alpha_and_omega_backward( const normalized_operand &id_before,
- const normalized_operand &id_after,
- const char * &alpha,
- const char * &omega,
- char_it_c &alpha_it,
- char_it_c &omega_it,
- char_it_c notfound)
- {
- /* Like the_alpha_and_omega(), but for handling BACKWARD.
-
- "xyzxyzBEFORExyzxyzAFTERxyzxyzxyzxyzBEFORExyzxyzAFTERxyzxyz"
- ^ ^
- | |
- | -- omega
- |--------alpha
- */
-
- const char *id_1 = alpha;
- const char *id_1_end = omega;
-
- if( id_before.length )
- {
- // This is the BEFORE delimiter. We look for the first occurrence of it
- // from the right end of id_1
-
- const char *start = id_before.the_characters.c_str();
- const char *end = start + id_before.length;
- const char *found = funky_find_backward(start, end, id_1, id_1_end);
- if( found )
- {
- // We found id_before within id_1, so change alpha to the character just
- // to the right of BEFORE. Otherwise, we will leave alpha alone, so that
- // it stays at the beginning of id_1. That's because if you can't find
- // id_before, it's as if there were no BEFORE phrase.
- alpha = found + id_before.length;
- }
-
- char_it_c omega_found = funky_find_wide_backward(id_before.the_vectorxxxx.begin(),
- id_before.the_vectorxxxx.end(),
- alpha_it,
- omega_it,
- notfound );
- if( omega_found != notfound )
- {
- // We found id_before within id_1, so change alpha to the character just
- // to the right of BEFORE. Otherwise, we will leave alpha alone, so that
- // it stays at the beginning of id_1
- alpha_it = omega_found + id_before.length;
- }
- }
-
- if( id_after.length )
- {
- // This is the AFTER delimiter. We look for the first occurrence in id_1
-
- const char *start = id_after.the_characters.c_str();
- const char *end = start + id_after.length;
- const char *found = funky_find_backward(start, end, alpha, omega);
- if( found )
- {
- // We found id_after in id_1. We update omega to be
- // at that location.
- omega = found;
- }
- else
- {
- // If the AFTER isn't found, we need to adjust things so that nothing
- // happens.
- omega = alpha;
- }
-
- char_it_c omega_found = funky_find_wide_backward(id_after.the_vectorxxxx.begin(),
- id_after.the_vectorxxxx.end(),
- alpha_it,
- omega_it,
- notfound );
- if( omega_found != notfound)
- {
- // We found id_after in id_1. We update omega to be
- // at that location.
- omega_it = omega_found;
- }
- else
- {
- // If the AFTER isn't found, we need to adjust things so that nothing
- // happens.
- omega_it = alpha_it;
- }
- }
- }
-
-static
-void
-inspect_backward_format_1(const size_t integers[])
- {
- size_t int_index = 0;
- size_t cblc_index = 0;
-
- // Reference the language specification for the meanings of identifier_X
-
- // Pick up the number of identifier_2 loops in this INSPECT statement
- size_t n_identifier_2 = integers[int_index++];
-
- std::vector<id_2_result> id_2_results(n_identifier_2);
-
- // Pick up identifier_1, which is the string being inspected
- const cblc_field_t *id1 = __gg__treeplet_1f[cblc_index];
- size_t id1_o = __gg__treeplet_1o[cblc_index];
- size_t id1_s = __gg__treeplet_1s[cblc_index];
- cblc_index += 1;
- // normalize it, according to the language specification.
- normalized_operand normalized_id_1 = normalize_id(id1, id1_o, id1_s, id1->encoding);
-
- std::vector<comparand> comparands;
-
- for(size_t i=0; i<n_identifier_2; i++)
- {
- // For each identifier_2, we pick up its value:
-
- id_2_results[i].id2 = __gg__treeplet_1f [cblc_index];
- id_2_results[i].id2_o = __gg__treeplet_1o[cblc_index];
- id_2_results[i].id2_s = __gg__treeplet_1s[cblc_index];
-
- cblc_index += 1;
- id_2_results[i].result = 0;
-
- // For each identifier 2, there is a count of operations:
- size_t nbounds = integers[int_index++];
-
- for(size_t j=0; j<nbounds; j++ )
- {
- // each operation has a bound code:
- cbl_inspect_bound_t operation
- = (cbl_inspect_bound_t)integers[int_index++];
- switch( operation )
- {
- case bound_characters_e:
- {
- // We are counting characters. There is no identifier-3,
- // but we we hard-code the length to one to represent a
- // single character.
- comparand next_comparand = {};
- next_comparand.id_2_index = i;
- next_comparand.operation = operation;
- next_comparand.identifier_3.length = 1;
-
- const cblc_field_t *id4_before = __gg__treeplet_1f [cblc_index];
- size_t id4_before_o = __gg__treeplet_1o[cblc_index];
- size_t id4_before_s = __gg__treeplet_1s[cblc_index];
- cblc_index += 1;
-
- const cblc_field_t *id4_after = __gg__treeplet_1f [cblc_index];
- size_t id4_after_o = __gg__treeplet_1o[cblc_index];
- size_t id4_after_s = __gg__treeplet_1s[cblc_index];
- cblc_index += 1;
-
- normalized_operand normalized_id_4_before
- = normalize_id(id4_before, id4_before_o, id4_before_s, id1->encoding);
-
- normalized_operand normalized_id_4_after
- = normalize_id(id4_after, id4_after_o, id4_after_s, id1->encoding);
-
- next_comparand.alpha
- = normalized_id_1.the_characters.c_str();
-
- next_comparand.omega
- = next_comparand.alpha + normalized_id_1.length;
-
- next_comparand.alpha_it = normalized_id_1.the_vectorxxxx.begin();
- next_comparand.omega_it = normalized_id_1.the_vectorxxxx.end();
-
- the_alpha_and_omega_backward(normalized_id_4_before,
- normalized_id_4_after,
- next_comparand.alpha,
- next_comparand.omega,
- next_comparand.alpha_it,
- next_comparand.omega_it,
- normalized_id_1.the_vectorxxxx.end());
-
- comparands.push_back(next_comparand);
- break;
- }
- default:
- {
- // We have some number of identifer-3 values,
- // each with possible PHRASE1 modifiers.
- size_t pair_count = integers[int_index++];
-
- // We need to build up pair_count comparand structures:
-
- for(size_t k=0; k<pair_count; k++)
- {
- comparand next_comparand = {};
- next_comparand.id_2_index = i;
- next_comparand.operation = operation;
-
- const cblc_field_t *id3 = __gg__treeplet_1f[cblc_index];
- size_t id3_o = __gg__treeplet_1o[cblc_index];
- size_t id3_s = __gg__treeplet_1s[cblc_index];
- cblc_index += 1;
-
- const cblc_field_t *id4_before = __gg__treeplet_1f[cblc_index];
- size_t id4_before_o = __gg__treeplet_1o[cblc_index];
- size_t id4_before_s = __gg__treeplet_1s[cblc_index];
- cblc_index += 1;
-
- const cblc_field_t *id4_after = __gg__treeplet_1f[cblc_index];
- size_t id4_after_o = __gg__treeplet_1o[cblc_index];
- size_t id4_after_s = __gg__treeplet_1s[cblc_index];
- cblc_index += 1;
-
- next_comparand.identifier_3
- = normalize_id(id3, id3_o, id3_s, id1->encoding);
-
- next_comparand.alpha
- = normalized_id_1.the_characters.c_str();
- next_comparand.omega
- = next_comparand.alpha + normalized_id_1.length;
-
- normalized_operand normalized_id_4_before
- = normalize_id(id4_before, id4_before_o, id4_before_s, id1->encoding);
-
- normalized_operand normalized_id_4_after
- = normalize_id(id4_after, id4_after_o, id4_after_s, id1->encoding);
-
- next_comparand.alpha_it = normalized_id_1.the_vectorxxxx.begin();
- next_comparand.omega_it = normalized_id_1.the_vectorxxxx.end();
-
- the_alpha_and_omega_backward(normalized_id_4_before,
- normalized_id_4_after,
- next_comparand.alpha,
- next_comparand.omega,
- next_comparand.alpha_it,
- next_comparand.omega_it,
- normalized_id_1.the_vectorxxxx.end());
-
- next_comparand.leading = true;
- next_comparand.leading_count = 0;
- comparands.push_back(next_comparand);
- }
- }
- }
- }
- }
-
- // We are now ready to walk through identifier-1, character by
- // character, checking each of the comparands for a match:
-
- // We are now set up to accomplish the data flow described
- // in the language specification. We loop through the
- // the character positions in normalized_id_1:
- char_it_c leftmost = normalized_id_1.the_vectorxxxx.begin();
- char_it_c rightmost = leftmost + normalized_id_1.length;
- char_it_c the_end_of_the_world = rightmost;
-
- while( leftmost < rightmost )
- {
- size_t rightmost_delta = 0;
- rightmost -= 1;
- // We look at the rightmost position. If that position is within the
- // alpha-to-omega qualified range, we check all possible matches:
-
- for(size_t k=0; k<comparands.size(); k++)
- {
- if( rightmost < comparands[k].alpha_it )
- {
- // This can't be a match, because rightmost is
- // to the left of the comparand's alpha.
- continue;
- }
- if( rightmost + comparands[k].identifier_3.length >
- comparands[k].omega_it )
- {
- // This can't be a match, because the rightmost
- // character of the comparand falls to the right
- // of the comparand's omega
- continue;
- }
- if( rightmost + comparands[k].identifier_3.length >
- the_end_of_the_world )
- {
- // This can't be a match, because the rightmost character of the
- // comparand falls past the new edge of id_1 established by a prior
- // match.
- continue;
- }
- // A match is theoretically possible, because all
- // the characters of the comparand fall between
- // alpha and omega:
- bool possible_match = true;
-
- if( comparands[k].operation != bound_characters_e )
- {
- for(size_t m=0; m<comparands[k].identifier_3.length; m++)
- {
- if( comparands[k].identifier_3.the_vectorxxxx[m] != rightmost[m] )
- {
- possible_match = false;
- break;
- }
- }
- }
- if( possible_match )
- {
- // The characters of the comparand match the
- // characters at rightmost.
- bool match = false;
- switch( comparands[k].operation )
- {
- case bound_first_e:
- // This can't happen in a FORMAT_1
- warnx("The compiler goofed: "
- "INSPECT FORMAT 1 "
- "shouldn't have "
- "bound_first_e");
- abort();
- break;
-
- case bound_characters_e:
- match = 1;
- break;
-
- case bound_all_e:
- {
- // We have a match.
- match = true;
- break;
- }
-
- case bound_leading_e:
- {
- // We have a match at rightmost. But we need to figure out if this
- // particular match is valid for LEADING.
-
- if( comparands[k].leading )
- {
- if( rightmost + comparands[k].identifier_3.length
- == comparands[k].omega_it)
- {
- // This means that the match here is just the latest of a
- // string of LEADING matches that started at .omega
- comparands[k].leading_count += 1;
- match = true;
- comparands[k].omega_it -= comparands[k].identifier_3.length;
- the_end_of_the_world = rightmost;
- rightmost_delta = comparands[k].identifier_3.length-1;
- }
- }
- break;
- }
-
- case bound_trailing_e:
- {
- // We have a match at rightmost.
- //
- // We want to know if this is a trailing match. For that to be,
- // all of the possible matches from here leftward to the alpha have
- // to be true as well:
-
- if( (rightmost - comparands[k].alpha_it )
- % comparands[k].identifier_3.length == 0 )
- {
- // The remaining number of characters is correct for a match.
- // Keep checking.
-
- // Assume a match until we learn otherwise:
- match = true;
- char_it_c local_left = rightmost;
- local_left -= comparands[k].identifier_3.length;
- while( local_left >= comparands[k].alpha_it )
- {
- for(size_t m=0; m<comparands[k].identifier_3.length; m++)
- {
- if( comparands[k].identifier_3.the_vectorxxxx[m]
- != local_left[m] )
- {
- // We have a mismatched character, so no trailing match is
- // possible
- match = false;
- break;
- }
- }
- local_left -= comparands[k].identifier_3.length;
- }
- }
- break;
- }
- }
-
- if( match )
- {
- // We have a match at rightmost:
- // Bump the result counter
- id_2_results[comparands[k].id_2_index].result += 1;
-
- // We have a match here at rightmost, so we need to set the end of
- // the world here
- the_end_of_the_world = rightmost;
-
- // Adjust rightmost by the additional characters in a BACKWARD
- // LEADING search:
- rightmost -= rightmost_delta;
- break;
- }
- }
- else
- {
- // We are within alpha/omega, but there was no
- // match, which permanently disqualifies the
- // possibility of LEADING
- comparands[k].leading = false;
- }
- }
- }
-
- // Add our results to the identifier_2 values:
-
- for(size_t i = 0; i<id_2_results.size(); i++)
- {
- int rdigits;
- __int128 id_2_value
- = __gg__binary_value_from_qualified_field(&rdigits,
- id_2_results[i].id2,
- id_2_results[i].id2_o,
- id_2_results[i].id2_s);
- while(rdigits--)
- {
- id_2_value /= 10.0;
- }
-
- // Accumulate what we've found into it
- id_2_value += id_2_results[i].result;
-
- // And put it back:
- __gg__int128_to_qualified_field(id_2_results[i].id2,
- id_2_results[i].id2_o,
- id_2_results[i].id2_s,
- id_2_value,
- 0,
- truncation_e,
- NULL);
- }
- }
-
-extern "C"
-void
-__gg__inspect_format_1(int backward, size_t integers[])
- {
- if( backward )
- {
- return inspect_backward_format_1(integers);
- }
-
- size_t int_index = 0;
- size_t cblc_index = 0;
-
- // Reference the language specification for the meanings of identifier_X
-
- // Pick up the number of identifier_2 loops in this INSPECT statement
- size_t n_identifier_2 = integers[int_index++];
-
- std::vector<id_2_result> id_2_results(n_identifier_2);
-
- // Pick up identifier_1, which is the string being inspected
- const cblc_field_t *id1 = __gg__treeplet_1f[cblc_index];
- size_t id1_o = __gg__treeplet_1o[cblc_index];
- size_t id1_s = __gg__treeplet_1s[cblc_index];
- cblc_index += 1;
- // normalize it, according to the language specification.
- normalized_operand normalized_id_1
- = normalize_id(id1, id1_o, id1_s, id1->encoding);
-
- std::vector<comparand> comparands;
-
- for(size_t i=0; i<n_identifier_2; i++)
- {
- // For each identifier_2, we pick up its value:
-
- id_2_results[i].id2 = __gg__treeplet_1f [cblc_index];
- id_2_results[i].id2_o = __gg__treeplet_1o[cblc_index];
- id_2_results[i].id2_s = __gg__treeplet_1s[cblc_index];
-
- cblc_index += 1;
- id_2_results[i].result = 0;
-
- // For each identifier 2, there is a count of operations:
- size_t nbounds = integers[int_index++];
-
- for(size_t j=0; j<nbounds; j++ )
- {
- // each operation has a bound code:
- cbl_inspect_bound_t operation
- = (cbl_inspect_bound_t)integers[int_index++];
- switch( operation )
- {
- case bound_characters_e:
- {
- // We are counting characters. There is no identifier-3,
- // but we we hard-code the length to one to represent a
- // single character.
- comparand next_comparand = {};
- next_comparand.id_2_index = i;
- next_comparand.operation = operation;
- next_comparand.identifier_3.length = 1;
-
- const cblc_field_t *id4_before = __gg__treeplet_1f [cblc_index];
- size_t id4_before_o = __gg__treeplet_1o[cblc_index];
- size_t id4_before_s = __gg__treeplet_1s[cblc_index];
- cblc_index += 1;
-
- const cblc_field_t *id4_after = __gg__treeplet_1f [cblc_index];
- size_t id4_after_o = __gg__treeplet_1o[cblc_index];
- size_t id4_after_s = __gg__treeplet_1s[cblc_index];
- cblc_index += 1;
-
- normalized_operand normalized_id_4_before
- = normalize_id(id4_before, id4_before_o, id4_before_s, id1->encoding);
-
- normalized_operand normalized_id_4_after
- = normalize_id(id4_after, id4_after_o, id4_after_s, id1->encoding);
-
- next_comparand.alpha
- = normalized_id_1.the_characters.c_str();
-
- next_comparand.omega
- = next_comparand.alpha + normalized_id_1.length;
-
- next_comparand.alpha_it = normalized_id_1.the_vectorxxxx.begin();
- next_comparand.omega_it = normalized_id_1.the_vectorxxxx.end();
-
- the_alpha_and_omega(normalized_id_4_before,
- normalized_id_4_after,
- next_comparand.alpha,
- next_comparand.omega,
- next_comparand.alpha_it,
- next_comparand.omega_it,
- normalized_id_1.the_vectorxxxx.end());
-
- comparands.push_back(next_comparand);
- break;
- }
- default:
- {
- // We have some number of identifer-3 values,
- // each with possible PHRASE1 modifiers.
- size_t pair_count = integers[int_index++];
-
- // We need to build up pair_count comparand structures:
-
- for(size_t k=0; k<pair_count; k++)
- {
- comparand next_comparand = {};
- next_comparand.id_2_index = i;
- next_comparand.operation = operation;
-
- const cblc_field_t *id3 = __gg__treeplet_1f[cblc_index];
- size_t id3_o = __gg__treeplet_1o[cblc_index];
- size_t id3_s = __gg__treeplet_1s[cblc_index];
- cblc_index += 1;
-
- const cblc_field_t *id4_before = __gg__treeplet_1f[cblc_index];
- size_t id4_before_o = __gg__treeplet_1o[cblc_index];
- size_t id4_before_s = __gg__treeplet_1s[cblc_index];
- cblc_index += 1;
-
- const cblc_field_t *id4_after = __gg__treeplet_1f[cblc_index];
- size_t id4_after_o = __gg__treeplet_1o[cblc_index];
- size_t id4_after_s = __gg__treeplet_1s[cblc_index];
- cblc_index += 1;
-
- next_comparand.identifier_3
- = normalize_id(id3,
- id3_o,
- id3_s,
- id1->encoding);
-
- next_comparand.alpha
- = normalized_id_1.the_characters.c_str();
- next_comparand.omega
- = next_comparand.alpha + normalized_id_1.length;
-
- next_comparand.alpha_it = normalized_id_1.the_vectorxxxx.begin();
- next_comparand.omega_it = normalized_id_1.the_vectorxxxx.end();
-
- normalized_operand normalized_id_4_before
- = normalize_id(id4_before, id4_before_o, id4_before_s, id1->encoding);
-
- normalized_operand normalized_id_4_after
- = normalize_id(id4_after, id4_after_o, id4_after_s, id1->encoding);
-
- the_alpha_and_omega(normalized_id_4_before,
- normalized_id_4_after,
- next_comparand.alpha,
- next_comparand.omega,
- next_comparand.alpha_it,
- next_comparand.omega_it,
- normalized_id_1.the_vectorxxxx.end());
-
- next_comparand.leading = true;
- next_comparand.leading_count = 0;
- comparands.push_back(next_comparand);
- }
- }
- }
- }
- }
-
- // We are now ready to walk through identifier-1, character by
- // character, checking each of the comparands for a match:
-
- // We are now set up to accomplish the data flow described
- // in the language specification. We loop through the
- // the character positions in normalized_id_1:
- char_it_c leftmost = normalized_id_1.the_vectorxxxx.begin();
- char_it_c rightmost = leftmost + normalized_id_1.length;
-
- while( leftmost < rightmost )
- {
- // For each leftmost position, we check each of the
- // pairs:
-
- for(size_t k=0; k<comparands.size(); k++)
- {
- if( leftmost < comparands[k].alpha_it )
- {
- // This can't be a match, because leftmost is
- // to the left of the comparand's alpha.
- continue;
- }
- if( leftmost + comparands[k].identifier_3.length > comparands[k].omega_it )
- {
- // This can't be a match, because the rightmost
- // character of the comparand falls to the right
- // of the comparand's omega
- continue;
- }
- // A match is theoretically possible, because all
- // the characters of the comparand fall between
- // alpha and omega:
- bool possible_match = true;
-
- if( comparands[k].operation != bound_characters_e )
- {
- for(size_t m=0; m<comparands[k].identifier_3.length; m++)
- {
- if( comparands[k].identifier_3.the_vectorxxxx[m] != leftmost[m] )
- {
- possible_match = false;
- break;
- }
- }
- }
- if( possible_match )
- {
- // The characters of the comparand match the
- // characters at leftmost.
- bool match = false;
- switch( comparands[k].operation )
- {
- case bound_first_e:
- // This can't happen in a FORMAT_1
- warnx("The compiler goofed: "
- "INSPECT FORMAT 1 "
- "shouldn't have "
- "bound_first_e");
- abort();
- break;
-
- case bound_characters_e:
- match = true;
- break;
-
- case bound_all_e:
- {
- // We have a match.
- match = true;
- break;
- }
-
- case bound_leading_e:
- {
- // We have a match at leftmost. But we need to figure out if this
- // particular match is valid for LEADING.
-
- // Hang onto your hat. This is delightfully clever.
- //
- // This position is LEADING if:
- // 1) .leading is still true
- // 2) leftmost / (length_of_comparand ) = current_count
- //
- // I get chills every time I look at that.
-
- if( comparands[k].leading )
- {
- // So far, so good.
- size_t count = ((leftmost - comparands[k].alpha_it))
- / comparands[k].identifier_3.length;
- if( count == comparands[k].leading_count )
- {
- // This means that the match here is just the latest of a
- // string of LEADING matches that started at .alpha
- comparands[k].leading_count += 1;
- match = true;
- }
- }
- break;
- }
-
- case bound_trailing_e:
- {
- // We have a match at leftmost.
- //
- // We want to know if this is a trailing match. For that to be,
- // all of the possible matches from here to the omega have to be
- // true as well:
-
- if( (comparands[k].omega_it-leftmost)
- % comparands[k].identifier_3.length == 0 )
- {
- // The remaining number of characters is correct for a match.
- // Keep checking.
-
- // Assume a match until we learn otherwise:
- match = true;
- char_it_c local_left = leftmost;
- local_left += comparands[k].identifier_3.length;
- while( match && local_left < comparands[k].omega_it )
- {
- for(size_t m=0; m<comparands[k].identifier_3.length; m++)
- {
- if( comparands[k].identifier_3.the_vectorxxxx[m]
- != local_left[m] )
- {
- // We have a mismatched character, so no trailing match is
- // possible
- match = false;
- break;
- }
- }
- local_left += comparands[k].identifier_3.length;
- }
- }
- break;
- }
- }
-
- if( match )
- {
- // We have a match at leftmost:
-
- // Bump the result counter
- id_2_results[comparands[k].id_2_index].result += 1;
-
- // Adjust the leftmost pointer to point to
- // the rightmost character of the matched
- // string, keeping in mind that it will be
- // bumped again after we break out of the
- // k<pair_count loop:
- leftmost += comparands[k].identifier_3.length - 1;
- break;
- }
- }
- else
- {
- // We are within alpha/omega, but there was no
- // match, which permanently disqualifies the
- // possibility of LEADING
- comparands[k].leading = false;
- }
- }
- leftmost += 1;
- }
-
- // Add our results to the identifier_2 values:
-
- for(size_t i = 0; i<id_2_results.size(); i++)
- {
- int rdigits;
- __int128 id_2_value
- = __gg__binary_value_from_qualified_field(&rdigits,
- id_2_results[i].id2,
- id_2_results[i].id2_o,
- id_2_results[i].id2_s);
- while(rdigits--)
- {
- id_2_value /= 10.0;
- }
-
- // Accumulate what we've found into it
- id_2_value += id_2_results[i].result;
-
- // And put it back:
- __gg__int128_to_qualified_field(id_2_results[i].id2,
- id_2_results[i].id2_o,
- id_2_results[i].id2_s,
- id_2_value,
- 0,
- truncation_e,
- NULL);
- }
- }
-
-static
-void
-inspect_backward_format_2(const size_t integers[])
- {
- size_t int_index = 0;
- size_t cblc_index = 0;
-
- // Reference the language specification for the meanings of identifier_X
-
- // Pick up identifier_1, which is the string being inspected
- cblc_field_t *id1 = __gg__treeplet_1f[cblc_index];
- size_t id1_o = __gg__treeplet_1o[cblc_index];
- size_t id1_s = __gg__treeplet_1s[cblc_index];
- cblc_index += 1;
-
- // normalize it, according to the language specification.
- normalized_operand normalized_id_1
- = normalize_id(id1, id1_o, id1_s, id1->encoding);
-
- std::vector<comparand> comparands;
-
- // Pick up the count of operations:
- size_t nbounds = integers[int_index++];
-
- for(size_t j=0; j<nbounds; j++ )
- {
- // each operation has a bound code:
- cbl_inspect_bound_t operation = (cbl_inspect_bound_t)integers[int_index++];
- switch( operation )
- {
- case bound_characters_e:
- {
- comparand next_comparand = {};
- next_comparand.operation = operation;
-
- const cblc_field_t *id5 = __gg__treeplet_1f[cblc_index];
- size_t id5_o = __gg__treeplet_1o[cblc_index];
- size_t id5_s = __gg__treeplet_1s[cblc_index];
- cblc_index += 1;
-
- const cblc_field_t *id4_before = __gg__treeplet_1f[cblc_index];
- size_t id4_before_o = __gg__treeplet_1o[cblc_index];
- size_t id4_before_s = __gg__treeplet_1s[cblc_index];
- cblc_index += 1;
-
- const cblc_field_t *id4_after = __gg__treeplet_1f [cblc_index];
- size_t id4_after_o = __gg__treeplet_1o[cblc_index];
- size_t id4_after_s = __gg__treeplet_1s[cblc_index];
- cblc_index += 1;
-
- next_comparand.identifier_5
- = normalize_id(id5, id5_o, id5_s, id1->encoding);
- normalized_operand normalized_id_4_before
- = normalize_id(id4_before, id4_before_o, id4_before_s, id1->encoding);
- normalized_operand normalized_id_4_after
- = normalize_id(id4_after, id4_after_o, id4_after_s, id1->encoding);
-
- // Because this is a CHARACTER operation, the lengths of
- // identifier-3 and identifier-5 should be one. Let's avoid the
- // chaos that will otherwise ensue should the lengths *not* be
- // one.
- next_comparand.identifier_3.length = 1;
- next_comparand.identifier_5.length = 1;
-
- next_comparand.alpha = normalized_id_1.the_characters.c_str();
- next_comparand.omega
- = next_comparand.alpha + normalized_id_1.length;
-
- next_comparand.alpha_it = normalized_id_1.the_vectorxxxx.begin();
- next_comparand.omega_it = normalized_id_1.the_vectorxxxx.end();
-
- the_alpha_and_omega_backward(normalized_id_4_before,
- normalized_id_4_after,
- next_comparand.alpha,
- next_comparand.omega,
- next_comparand.alpha_it,
- next_comparand.omega_it,
- normalized_id_1.the_vectorxxxx.end());
-
-
- comparands.push_back(next_comparand);
- break;
- }
- default:
- {
- // We have some number of identifer-3/identifier-5 pairs,
- // each with possible PHRASE1 modifiers.
- size_t pair_count = integers[int_index++];
-
- for(size_t k=0; k<pair_count; k++)
- {
- comparand next_comparand = {};
- next_comparand.operation = operation;
-
- const cblc_field_t *id3 = __gg__treeplet_1f[cblc_index];
- size_t id3_o = __gg__treeplet_1o[cblc_index];
- size_t id3_s = __gg__treeplet_1s[cblc_index];
- cblc_index += 1;
-
- const cblc_field_t *id5 = __gg__treeplet_1f[cblc_index];
- size_t id5_o = __gg__treeplet_1o[cblc_index];
- size_t id5_s = __gg__treeplet_1s[cblc_index];
- cblc_index += 1;
-
- const cblc_field_t *id4_before = __gg__treeplet_1f[cblc_index];
- size_t id4_before_o = __gg__treeplet_1o[cblc_index];
- size_t id4_before_s = __gg__treeplet_1s[cblc_index];
- cblc_index += 1;
-
- const cblc_field_t *id4_after = __gg__treeplet_1f[cblc_index];
- size_t id4_after_o = __gg__treeplet_1o[cblc_index];
- size_t id4_after_s = __gg__treeplet_1s[cblc_index];
- cblc_index += 1;
-
- next_comparand.identifier_3 = normalize_id(id3, id3_o, id3_s, id1->encoding);
- next_comparand.identifier_5 = normalize_id(id5, id5_o, id5_s, id1->encoding);
-
- // Identifiers 3 and 5 have to be the same length. But
- // but either, or both, can be figurative constants. If
- // they are figurative constants, they start off with a
- // length of one. We will expand figurative constants to
- // match the length of the other one:
-
- if( id3->attr & FIGCONST_MASK )
- {
- match_lengths( next_comparand.identifier_3,
- next_comparand.identifier_5);
- }
- else if( id5->attr & FIGCONST_MASK )
- {
- match_lengths( next_comparand.identifier_5,
- next_comparand.identifier_3);
- }
-
- next_comparand.alpha
- = normalized_id_1.the_characters.c_str();
- next_comparand.omega
- = next_comparand.alpha + normalized_id_1.length;
-
- normalized_operand normalized_id_4_before
- = normalize_id(id4_before, id4_before_o, id4_before_s, id1->encoding);
- normalized_operand normalized_id_4_after
- = normalize_id(id4_after, id4_after_o, id4_after_s, id1->encoding);
-
- next_comparand.alpha_it = normalized_id_1.the_vectorxxxx.begin();
- next_comparand.omega_it = normalized_id_1.the_vectorxxxx.end();
-
- the_alpha_and_omega_backward(normalized_id_4_before,
- normalized_id_4_after,
- next_comparand.alpha,
- next_comparand.omega,
- next_comparand.alpha_it,
- next_comparand.omega_it,
- normalized_id_1.the_vectorxxxx.end());
-
- next_comparand.leading = true;
- next_comparand.leading_count = 0;
- next_comparand.first = true;
- comparands.push_back(next_comparand);
- }
- }
- }
- }
-
- // We can now look through normalized_id_1 and replace characters:
-
- char_it_c leftmost = normalized_id_1.the_vectorxxxx.begin();
- char_it_c rightmost = leftmost + normalized_id_1.length;
- char_it_c the_end_of_the_world = rightmost;
-
- while( leftmost < rightmost )
- {
- size_t rightmost_delta = 0;
-
- rightmost -= 1;
- // We look at the rightmost position. If that position is within the
- // alpha-to-omega qualified range, we check all possible matches:
-
- for(size_t k=0; k<comparands.size(); k++)
- {
- if( rightmost < comparands[k].alpha_it )
- {
- // This can't be a match, because rightmost is
- // to the left of the comparand's alpha.
- continue;
- }
- if( rightmost + comparands[k].identifier_3.length > comparands[k].omega_it )
- {
- // This can't be a match, because the rightmost
- // character of the comparand falls to the right
- // of the comparand's omega
- continue;
- }
- if( rightmost + comparands[k].identifier_3.length > the_end_of_the_world )
- {
- // This can't be a match, because the rightmost character of the
- // comparand falls past the new edge of id_1 established by a prior
- // match.
- continue;
- }
- // A match is theoretically possible, because all
- // the characters of the comparand fall between
- // alpha and omega:
- bool possible_match = true;
-
- if( comparands[k].operation != bound_characters_e )
- {
- for(size_t m=0; m<comparands[k].identifier_3.length; m++)
- {
- if( comparands[k].identifier_3.the_vectorxxxx[m] != rightmost[m] )
- {
- possible_match = false;
- break;
- }
- }
- }
- if( possible_match )
- {
- // The characters of the comparand match the
- // characters at rightmost.
- bool match = false;
- switch( comparands[k].operation )
- {
- case bound_first_e:
- // This can't happen in a FORMAT_2
- warnx("The compiler goofed: "
- "INSPECT FORMAT 2 "
- "shouldn't have "
- "bound_first_e");
- abort();
- break;
-
- case bound_characters_e:
- match = 1;
- break;
-
- case bound_all_e:
- {
- // We have a match.
- match = true;
- break;
- }
-
- case bound_leading_e:
- {
- // We have a match at rightmost. But we need to figure out if this
- // particular match is valid for LEADING.
-
- if( comparands[k].leading )
- {
- if( rightmost
- + comparands[k].identifier_3.length * (comparands[k].leading_count +1)
- == comparands[k].omega_it)
- {
- // This means that the match here is just the latest of a
- // string of LEADING matches that started at .omega
- comparands[k].leading_count += 1;
- match = true;
- rightmost_delta = comparands[k].identifier_3.length-1;
- }
- }
- break;
- }
-
- case bound_trailing_e:
- {
- // We have a match at rightmost.
- //
- // We want to know if this is a trailing match. For that to be,
- // all of the possible matches from here leftward to the alpha have
- // to be true as well:
-
- if( (rightmost - comparands[k].alpha_it )
- % comparands[k].identifier_3.length == 0 )
- {
- // The remaining number of characters is correct for a match.
- // Keep checking.
-
- // Assume a match until we learn otherwise:
- match = true;
- char_it_c local_left = rightmost;
- local_left -= comparands[k].identifier_3.length;
- while( local_left >= comparands[k].alpha_it )
- {
- for(size_t m=0; m<comparands[k].identifier_3.length; m++)
- {
- if( comparands[k].identifier_3.the_vectorxxxx[m]
- != local_left[m] )
- {
- // We have a mismatched character, so no trailing match is
- // possible
- match = false;
- break;
- }
- }
- local_left -= comparands[k].identifier_3.length;
- }
- }
- break;
- }
- }
-
- if( match )
- {
- // We have a match at rightmost. We need to
- // to replace the characters in normalized_id_1
- // with the characters from normalized_id_5
- //fprintf(stderr, "Rule: %ld %p %s\n", k+1, rightmost, rightmost);
-
- size_t index = rightmost - normalized_id_1.the_vectorxxxx.begin();
- for( size_t l = 0;
- l < comparands[k].identifier_5.length;
- l++ )
- {
- cbl_char_t ch = comparands[k].identifier_5.
- the_vectorxxxx[l];
- normalized_id_1.the_vectorxxxx[index++] = ch;
- }
-
- the_end_of_the_world = rightmost;
- rightmost -= rightmost_delta;
- break;
- }
- }
- else
- {
- comparands[k].leading = false;
- }
- }
- }
-
- // Here is where we take the characters from normalized_id_1 and put them
- // back into identifier_1.
-
- charmap_t *charmap = __gg__get_charmap(id1->encoding);
- // Wastefully prefill id_1 with spaces in case the processing resulted in a
- // string shorter than the original. (There is always the possiblity that
- // a UTF-8 or UTF-16 codeset pair got replaced with a single character.) Do
- // this before calling __gg__converter, because both mapped_character and
- // __gg__iconverter use the same static buffer.
- unsigned char *id1_data = id1->data + id1_o;
- charmap->memset(id1_data, charmap->mapped_character(ascii_space), id1_s);
-
- // We've been working in UTF32; we convert back to the original id1 encoding.
- size_t bytes_converted;
- const char *converted = __gg__iconverter( DEFAULT_32_ENCODING,
- id1->encoding,
- normalized_id_1.the_vectorxxxx.data(),
- normalized_id_1.length*width_of_utf32,
- &bytes_converted) ;
- // And move those characters into place in id_1:
- memcpy(id1_data,
- converted,
- std::min(bytes_converted, id1_s));
-
- return;
- }
-
-extern "C"
-void
-__gg__inspect_format_2(int backward, size_t integers[])
- {
- if( backward )
- {
- return inspect_backward_format_2(integers);
- }
- size_t int_index = 0;
- size_t cblc_index = 0;
-
- // Reference the language specification for the meanings of identifier_X
-
- // Pick up identifier_1, which is the string being inspected
- cblc_field_t *id1 = __gg__treeplet_1f[cblc_index];
- size_t id1_o = __gg__treeplet_1o[cblc_index];
- size_t id1_s = __gg__treeplet_1s[cblc_index];
- cblc_index += 1;
-
- // normalize it, according to the language specification.
- normalized_operand normalized_id_1
- = normalize_id(id1, id1_o, id1_s, id1->encoding);
-
- std::vector<comparand> comparands;
-
- // Pick up the count of operations:
- size_t nbounds = integers[int_index++];
-
- for(size_t j=0; j<nbounds; j++ )
- {
- // each operation has a bound code:
- cbl_inspect_bound_t operation
- = (cbl_inspect_bound_t)integers[int_index++];
- switch( operation )
- {
- case bound_characters_e:
- {
- comparand next_comparand = {} ;
- next_comparand.operation = operation;
-
- const cblc_field_t *id5 = __gg__treeplet_1f[cblc_index];
- size_t id5_o = __gg__treeplet_1o[cblc_index];
- size_t id5_s = __gg__treeplet_1s[cblc_index];
- cblc_index += 1;
-
- const cblc_field_t *id4_before = __gg__treeplet_1f[cblc_index];
- size_t id4_before_o = __gg__treeplet_1o[cblc_index];
- size_t id4_before_s = __gg__treeplet_1s[cblc_index];
- cblc_index += 1;
-
- const cblc_field_t *id4_after = __gg__treeplet_1f [cblc_index];
- size_t id4_after_o = __gg__treeplet_1o[cblc_index];
- size_t id4_after_s = __gg__treeplet_1s[cblc_index];
- cblc_index += 1;
-
- next_comparand.identifier_5
- = normalize_id(id5, id5_o, id5_s, id1->encoding);
- normalized_operand normalized_id_4_before
- = normalize_id(id4_before, id4_before_o, id4_before_s, id1->encoding);
- normalized_operand normalized_id_4_after
- = normalize_id(id4_after, id4_after_o, id4_after_s, id1->encoding);
-
- // Because this is a CHARACTER operation, the lengths of
- // identifier-3 and identifier-5 should be one. Let's avoid the
- // chaos that will otherwise ensue should the lengths *not* be
- // one.
- next_comparand.identifier_3.length = 1;
- next_comparand.identifier_5.length = 1;
-
- next_comparand.alpha = normalized_id_1.the_characters.c_str();
- next_comparand.omega
- = next_comparand.alpha + normalized_id_1.length;
-
- next_comparand.alpha_it = normalized_id_1.the_vectorxxxx.begin();
- next_comparand.omega_it = normalized_id_1.the_vectorxxxx.end();
-
- the_alpha_and_omega(normalized_id_4_before,
- normalized_id_4_after,
- next_comparand.alpha,
- next_comparand.omega,
- next_comparand.alpha_it,
- next_comparand.omega_it,
- normalized_id_1.the_vectorxxxx.end());
- comparands.push_back(next_comparand);
- break;
- }
- default:
- {
- // We have some number of identifer-3/identifier-5 pairs,
- // each with possible PHRASE1 modifiers.
- size_t pair_count = integers[int_index++];
-
- for(size_t k=0; k<pair_count; k++)
- {
- comparand next_comparand = {};
- next_comparand.operation = operation;
-
- const cblc_field_t *id3 = __gg__treeplet_1f[cblc_index];
- size_t id3_o = __gg__treeplet_1o[cblc_index];
- size_t id3_s = __gg__treeplet_1s[cblc_index];
- cblc_index += 1;
-
- const cblc_field_t *id5 = __gg__treeplet_1f[cblc_index];
- size_t id5_o = __gg__treeplet_1o[cblc_index];
- size_t id5_s = __gg__treeplet_1s[cblc_index];
- cblc_index += 1;
-
- const cblc_field_t *id4_before = __gg__treeplet_1f[cblc_index];
- size_t id4_before_o = __gg__treeplet_1o[cblc_index];
- size_t id4_before_s = __gg__treeplet_1s[cblc_index];
- cblc_index += 1;
-
- const cblc_field_t *id4_after = __gg__treeplet_1f[cblc_index];
- size_t id4_after_o = __gg__treeplet_1o[cblc_index];
- size_t id4_after_s = __gg__treeplet_1s[cblc_index];
- cblc_index += 1;
-
- next_comparand.identifier_3 = normalize_id(id3,
- id3_o,
- id3_s,
- id1->encoding);
- next_comparand.identifier_5 = normalize_id(id5,
- id5_o,
- id5_s,
- id1->encoding);
-
- // Identifiers 3 and 5 have to be the same length. But
- // but either, or both, can be figurative constants. If
- // they are figurative constants, they start off with a
- // length of one. We will expand figurative constants to
- // match the length of the other one:
-
- if( id3->attr & FIGCONST_MASK )
- {
- match_lengths( next_comparand.identifier_3,
- next_comparand.identifier_5);
- }
- else if( id5->attr & FIGCONST_MASK )
- {
- match_lengths( next_comparand.identifier_5,
- next_comparand.identifier_3);
- }
-
- next_comparand.alpha
- = normalized_id_1.the_characters.c_str();
- next_comparand.omega
- = next_comparand.alpha + normalized_id_1.length;
-
- normalized_operand normalized_id_4_before
- = normalize_id(id4_before, id4_before_o, id4_before_s, id1->encoding);
- normalized_operand normalized_id_4_after
- = normalize_id(id4_after, id4_after_o, id4_after_s, id1->encoding);
-
- next_comparand.alpha_it = normalized_id_1.the_vectorxxxx.begin();
- next_comparand.omega_it = normalized_id_1.the_vectorxxxx.end();
-
- the_alpha_and_omega(normalized_id_4_before,
- normalized_id_4_after,
- next_comparand.alpha,
- next_comparand.omega,
- next_comparand.alpha_it,
- next_comparand.omega_it,
- normalized_id_1.the_vectorxxxx.end());
-
- next_comparand.leading = true;
- next_comparand.leading_count = 0;
- next_comparand.first = true;
- comparands.push_back(next_comparand);
- }
- }
- }
- }
-
- // We are now set up to accomplish the data flow described
- // in the language specification. We loop through the
- // the character positions in normalized_id_1:
- char_it_c leftmost = normalized_id_1.the_vectorxxxx.begin();
- char_it_c rightmost = leftmost + normalized_id_1.length;
-
- while( leftmost < rightmost )
- {
- // For each leftmost position, we check each of the
- // comparands
-
- for(size_t k=0; k<comparands.size(); k++)
- {
- if( leftmost < comparands[k].alpha_it )
- {
- // This can't be a match, because leftmost is
- // to the left of the comparand's alpha.
- continue;
- }
- if( leftmost + comparands[k].identifier_3.length
- > comparands[k].omega_it )
- {
- // This can't be a match, because the rightmost
- // character of the comparand falls to the right
- // of the comparand's omega
- continue;
- }
- // A match is theoretically possible, because all
- // the characters of the comparand fall between
- // alpha and omega:
- bool possible_match = true;
- if( comparands[k].operation != bound_characters_e)
- {
- for(size_t m=0; m<comparands[k].identifier_3.length; m++)
- {
- if( comparands[k].identifier_3.the_vectorxxxx[m]
- != leftmost[m] )
- {
- possible_match = false;
- break;
- }
- }
- }
- if( possible_match )
- {
- // The characters of the comparand match the
- // characters at leftmost. See if further processing is
- // indicated:
-
- bool match = false;
- switch( comparands[k].operation )
- {
- case bound_characters_e:
- match = true;
- break;
-
- case bound_first_e:
- if( comparands[k].first )
- {
- match = true;
- comparands[k].first = false;
- }
- break;
-
- case bound_all_e:
- {
- // We have a match.
- match = true;
- break;
- }
-
- case bound_leading_e:
- {
- // We have a match at leftmost. But we need to figure out if this
- // particular match is valid for LEADING.
-
- // Hang onto your hat. This is delightfully clever.
- //
- // This position is LEADING if:
- // 1) .leading is still true
- // 2) leftmost / (length_of_comparand ) = current_count
- //
- // I get chills every time I look at that.
- if( comparands[k].leading )
- {
- // So far, so good.
- size_t count = (leftmost - comparands[k].alpha_it)
- / comparands[k].identifier_3.length;
- if( count == comparands[k].leading_count )
- {
- // This means that the match here is just the latest of a
- // string of LEADING matches that started at .alpha
- comparands[k].leading_count += 1;
- match = true;
- }
- }
- break;
- }
-
- case bound_trailing_e:
- {
- // We have a match at leftmost.
- //
- // We want to know if this is a trailing match. For that to be,
- // all of the possible matches from here to the omega have to be
- // true as well:
-
- if( (comparands[k].omega_it-leftmost)
- % comparands[k].identifier_3.length == 0 )
- {
- // The remaining number of characters is correct for a match.
- // Keep checking.
-
- // Assume a match until we learn otherwise:
- match = true;
- char_it_c local_left = leftmost;
- local_left += comparands[k].identifier_3.length;
- while( local_left < comparands[k].omega_it )
- {
- for(size_t m=0; m<comparands[k].identifier_3.length; m++)
- {
- if( comparands[k].identifier_3.the_vectorxxxx[m]
- != local_left[m] )
- {
- // We have a mismatched character, so no trailing match is
- // possible
- match = false;
- break;
- }
- }
- local_left += comparands[k].identifier_3.length;
- }
- }
- break;
- }
- }
- if( match )
- {
- // We have a match at leftmost. We need to
- // to replace the characters in normalized_id_1
- // with the characters from normalized_id_5
-
- size_t index = leftmost
- - normalized_id_1.the_vectorxxxx.begin();
- for( size_t l = 0;
- l < comparands[k].identifier_5.length;
- l++ )
- {
- char ch = comparands[k].identifier_5.
- the_vectorxxxx[l];
- normalized_id_1.the_vectorxxxx[index++] = ch;
- }
- // Adjust the leftmost pointer to point to
- // the rightmost character of the matched
- // string, keeping in mind that it will be
- // bumped again after we break out of the
- // k<pair_count loop:
- leftmost += comparands[k].identifier_3.length - 1;
- break;
- }
- }
- else
- {
- comparands[k].leading = false;
- }
- }
- leftmost += 1;
- }
-
- // Here is where we take the characters from normalized_id_1 and put them
- // back into identifier_1.
-
- charmap_t *charmap = __gg__get_charmap(id1->encoding);
- // Wastefully prefill id_1 with spaces in case the processing resulted in a
- // string shorter than the original. (There is always the possiblity that
- // a UTF-8 or UTF-16 codeset pair got replaced with a single character.) Do
- // this before calling __gg__converter, because both mapped_character and
- // __gg__iconverter use the same static buffer.
- unsigned char *id1_data = id1->data + id1_o;
- charmap->memset(id1_data, charmap->mapped_character(ascii_space), id1_s);
-
- // We've been working in UTF32; we convert back to the original id1 encoding.
- size_t bytes_converted;
- const char *converted = __gg__iconverter( DEFAULT_32_ENCODING,
- id1->encoding,
- normalized_id_1.the_vectorxxxx.data(),
- normalized_id_1.length*width_of_utf32,
- &bytes_converted) ;
- // And move those characters into place in id_1:
- memcpy(id1_data,
- converted,
- std::min(bytes_converted, id1_s));
- return;
- }
-
-static std::u32string
-normalize_for_inspect_format_4(const cblc_field_t *var,
- size_t var_offset,
- size_t var_size,
- cbl_encoding_t source_encoding)
- {
- std::u32string retval;
- if(var)
- {
- const charmap_t *charmap_var = __gg__get_charmap(source_encoding);
- charmap_t *charmap32 = __gg__get_charmap(DEFAULT_32_ENCODING);
-
- cbl_figconst_t figconst =
- static_cast<cbl_figconst_t>(var->attr & FIGCONST_MASK);
- // We have a corner case to deal with:
- if( strcmp(var->name, "NULLS") == 0 )
- {
- figconst = null_value_e;
- }
-
- if( figconst )
- {
- // Build up an var_size array of figconst characters
- cbl_char_t figchar = '\0';
- switch( figconst )
- {
- case low_value_e :
- figchar = charmap32->low_value_character();
- break;
- case zero_value_e :
- figchar = charmap32->mapped_character(ascii_0);
- break;
- case space_value_e :
- figchar = charmap32->mapped_character(ascii_space);
- break;
- case quote_value_e :
- figchar = charmap32->quote_character();
- break;
- case high_value_e :
- {
- if( __gg__high_value_character == DEFAULT_HIGH_VALUE_8 )
- {
- // See the comments where these constants are defined.
- if(charmap_var->stride() == 1)
- {
- if(charmap_var->is_like_ebcdic())
- {
- // This maps back to 0xFF in CP1140
- figchar = EBCDIC_HIGH_VALUE_32;
- }
- else
- {
- // This maps back to 0xFF in CP1252
- figchar = ASCII_HIGH_VALUE_32;
- }
- }
- else if(charmap_var->stride() == 2)
- {
- figchar = UTF16_HIGH_VALUE_32;
- }
- else
- {
- figchar = UTF32_HIGH_VALUE_32;
- }
- }
- else
- {
- figchar = charmap32->mapped_character(__gg__high_value_character);
- }
- break;
- }
- case null_value_e:
- break;
- default:
- figchar = '\0';
- abort();
- break;
- }
- retval.push_back(figchar);
- }
- else
- {
- // It's not a figurative constant, so convert var to UTF32.
- size_t converted_bytes;
- const char *converted = __gg__iconverter(
- var->encoding,
- DEFAULT_32_ENCODING,
- var->data + var_offset,
- var_size,
- &converted_bytes);
- void *duped = __gg__memdup(converted, converted_bytes);
- for(size_t i=0; i<converted_bytes; i+=width_of_utf32)
- {
- cbl_char_t ch = charmap32->getch(duped, i);
- retval.push_back(ch);
- }
- free(duped);
- }
- }
- return retval;
- }
-
-extern "C"
-void
-__gg__inspect_format_4( int backward,
- cblc_field_t *input, // identifier-1
- size_t input_offset,
- size_t input_size,
- const cblc_field_t *original, // id-6 / literal-4
- size_t original_offset,
- size_t original_size,
- const cblc_field_t *replacement, // id-7 / literal-5
- size_t replacement_offset,
- size_t replacement_size,
- const cblc_field_t *after, // id-4 / literal-2
- size_t after_offset,
- size_t after_size,
- const cblc_field_t *before, // id-4 / literal-2
- size_t before_offset,
- size_t before_size
- )
- {
- // We need to cope with multiple encodings; the ISO specification says only
- // that identifier-1 and -3 through -n are display or national.
-
- // We will leave the input encoded as whatever it is, and we will convert the
- // others to match.
-
- // We also need to cope with anything except identifier-1 being a figurative
- // constant.
-
- cbl_figconst_t figconst_original =
- static_cast<cbl_figconst_t>(original->attr & FIGCONST_MASK);
- cbl_figconst_t figconst_replacement =
- static_cast<cbl_figconst_t>(replacement->attr & FIGCONST_MASK);
- int figswitch = (figconst_original ? 2 : 0) + (figconst_replacement ? 1 : 0);
- switch( figswitch )
- {
- case 0:
- // Neither are figconst; we leave the sizes alone
- break;
- case 1:
- // Only replacement is figconst, so we make its size -1
- // This will cause CONVERTING "ABC" TO ZERO to be the same as
- // CONVERTING "ABC" TO "000"
- replacement_size = (size_t)(-1LL);
- break;
- case 2:
- // Only original is figconst. Set the size to one. (This is necessary
- // because the size of NULL is eight, since NULL does double-duty as both
- // a character (this is a MicroFocus specification) and a pointer.
- original_size = 1;
- break;
- case 3:
- // Both are figconst
- replacement_size = original_size = 1;
- break;
- }
-
- // Because before and after can be figurative constant NULL, we have to make
- // sure that in such cases the size is 1:
- if(before && before_size && before->attr & FIGCONST_MASK)
- {
- before_size = 1;
- }
- if(after && after_size && after->attr & FIGCONST_MASK)
- {
- after_size = 1;
- }
-
- bool all = (replacement_size == (size_t)(-1LL));
- if( all )
- {
- // A replacement_size of -1 means that the statement is something like
- // INSPECT XYZ CONVERTING "abcxyz" to ALL "?" That means replacement is
- // a single character. We need to convert it to the target encoding.
- const charmap_t * charmap = __gg__get_charmap(input->encoding);
- replacement_size = charmap->stride();
- }
-
- std::u32string str_input = normalize_for_inspect_format_4(input , input_offset , input_size , input->encoding);
- std::u32string str_original = normalize_for_inspect_format_4(original , original_offset , original_size , input->encoding);
- std::u32string str_replacement = normalize_for_inspect_format_4(replacement, replacement_offset, replacement_size, input->encoding);
- std::u32string str_after = normalize_for_inspect_format_4(after , after_offset , after_size , input->encoding);
- std::u32string str_before = normalize_for_inspect_format_4(before , before_offset , before_size , input->encoding);
-
- if( all )
- {
- // We now expand the single-character replacement to be the same length as
- // original.
- cbl_char_t ch = str_replacement[0];
- str_replacement.clear();
- for(size_t i=0; i<str_original.size(); i++)
- {
- str_replacement.push_back(ch);
- }
- }
-
- // Use a map to make this O(N), rather than an O(N-squared),
- // computational complexity
- std::unordered_map<cbl_char_t, cbl_char_t>map;
- typedef std::unordered_map<cbl_char_t, cbl_char_t>::const_iterator map_it_t ;
-
- // The rule is, if the same character appears more than once in the
- // original (which is identifier-6), then the first occurrence of the
- // matching character in replacement is used. So, we create the map
- // backwards. The one closest to zero will win.
- for(size_t i=str_original.size()-1; i<str_original.size(); i--)
- {
- map[str_original[i]] = str_replacement[i];
- }
-
- size_t leftmost_i; // Leftmost index to replace at.
- size_t rightmost_i; // Rightmost+1 index to replace at.
-
- if( !backward )
- {
- // This is a forward conversion. We look for the first instance
- // of str_after from the left. And then we look for the first instance
- // of str_before after that. When there is no str_before, we move the
- // rightmost limit to the end of str_input, as if there were no BEFORE
- // phrase:
-
- if( str_after.empty() )
- {
- // There is no AFTER phrase, so we start from the left.
- leftmost_i = 0;
- }
- else
- {
- size_t nfound = str_input.find(str_after);
- if( nfound != std::u32string::npos )
- {
- // Move the left limit to one character past the found element
- leftmost_i = nfound + str_after.size();
- }
- else
- {
- // We didn't find the after phrase, so we move the left limit to the
- // end of input, which means nothing will be replaced
- leftmost_i = str_input.size();
- }
- }
-
- // At this point, leftmost_i has been set to something. Look for the
- // BEFORE phrase somewhere to the right of it:
-
- if( str_before.empty() )
- {
- // There is no BEFORE phrase, so set rightmost to the end of the input
- rightmost_i = str_input.size();
- }
- else
- {
- // Look for BEFORE to the right of leftmost_i:
- size_t nfound = str_input.find(str_before, leftmost_i);
- if( nfound != std::u32string::npos )
- {
- // We found the BEFORE phrase.
- rightmost_i = nfound;
- }
- else
- {
- // We didn't find the BEFORE phrase; IOS says to treat this situation
- // as if there were no BEFORE phrase
- rightmost_i = str_input.size();
- }
- }
- }
- else
- {
- // We are doing a BACKWARD conversion. So, we look for the AFTER phrase
- // and use that to establish the rightmost limit. And we look for the
- // BEFORE to the left of AFTER phrase and use that to establish the
- // leftmost limit
-
- if( str_after.empty() )
- {
- // There is no AFTER phrase, so we set the rightmost limit to the end
- // of the input:
- rightmost_i = str_input.size();
- }
- else
- {
- // Start from the right and look for AFTER
- size_t nfound = str_input.rfind(str_after, str_input.size());
- if( nfound != std::u32string::npos )
- {
- // We found str_after, so its location becomes rightmost
- rightmost_i = nfound;
- }
- else
- {
- // We didn't find str_after, so we move rightmost all the way to the
- // left, so that nothing will ever be found.
- rightmost_i = 0;
- }
- }
- // rightmost_i has been established, so now look for BEFORE to the left
- // of it
- if( str_before.empty() )
- {
- // There is no str_before, so the left limit is all the way to the left
- leftmost_i = 0;
- }
- else
- {
- size_t nfound = str_input.rfind(str_before, rightmost_i);
- if( nfound != std::u32string::npos )
- {
- // We found BEFORE, so we put the left limit just to the right of
- // where we found it:
- leftmost_i = nfound + str_before.size();
- }
- else
- {
- // Not finding the BEFORE phrase is the same as the BEFORE phrase
- // not having been specified:
- leftmost_i = 0;
- }
- }
- }
- // leftmost_i and rightmost_i have been established. Do the conversion of
- // characters inside those limits:
- for(size_t i=leftmost_i; i<rightmost_i; i++)
- {
- cbl_char_t ch = str_input[i];
- map_it_t cvt = map.find(ch);
- if( cvt != map.end() )
- {
- str_input[i] = cvt->second;
- }
- }
-
- // We now take the converted str_input, and put it back into id_1:
-
- size_t bytes_converted;
- const char *converted = __gg__iconverter(DEFAULT_32_ENCODING,
- input->encoding,
- str_input.data(),
- str_input.size()*width_of_utf32,
- &bytes_converted) ;
-
- // And move those characters into place in input:
- memcpy(input->data + input_offset,
- converted,
- std::min(bytes_converted, input_size));
- }
-
static void
move_string(cblc_field_t *field,
size_t offset,
return retval;
}
+static std::u32string
+normalize_for_inspect_format_4(const cblc_field_t *var,
+ size_t var_offset,
+ size_t var_size,
+ cbl_encoding_t source_encoding)
+ {
+ std::u32string retval;
+ if(var)
+ {
+ const charmap_t *charmap_var = __gg__get_charmap(source_encoding);
+ charmap_t *charmap32 = __gg__get_charmap(DEFAULT_32_ENCODING);
+
+ cbl_figconst_t figconst =
+ static_cast<cbl_figconst_t>(var->attr & FIGCONST_MASK);
+ // We have a corner case to deal with:
+ if( strcmp(var->name, "NULLS") == 0 )
+ {
+ figconst = null_value_e;
+ }
+
+ if( figconst )
+ {
+ // Build up an var_size array of figconst characters
+ cbl_char_t figchar = '\0';
+ switch( figconst )
+ {
+ case low_value_e :
+ figchar = charmap32->low_value_character();
+ break;
+ case zero_value_e :
+ figchar = charmap32->mapped_character(ascii_0);
+ break;
+ case space_value_e :
+ figchar = charmap32->mapped_character(ascii_space);
+ break;
+ case quote_value_e :
+ figchar = charmap32->quote_character();
+ break;
+ case high_value_e :
+ {
+ if( __gg__high_value_character == DEFAULT_HIGH_VALUE_8 )
+ {
+ // See the comments where these constants are defined.
+ if(charmap_var->stride() == 1)
+ {
+ if(charmap_var->is_like_ebcdic())
+ {
+ // This maps back to 0xFF in CP1140
+ figchar = EBCDIC_HIGH_VALUE_32;
+ }
+ else
+ {
+ // This maps back to 0xFF in CP1252
+ figchar = ASCII_HIGH_VALUE_32;
+ }
+ }
+ else if(charmap_var->stride() == 2)
+ {
+ figchar = UTF16_HIGH_VALUE_32;
+ }
+ else
+ {
+ figchar = UTF32_HIGH_VALUE_32;
+ }
+ }
+ else
+ {
+ figchar = charmap32->mapped_character(__gg__high_value_character);
+ }
+ break;
+ }
+ case null_value_e:
+ break;
+ default:
+ figchar = '\0';
+ abort();
+ break;
+ }
+ retval.push_back(figchar);
+ }
+ else
+ {
+ // It's not a figurative constant, so convert var to UTF32.
+ size_t converted_bytes;
+ const char *converted = __gg__iconverter(
+ var->encoding,
+ DEFAULT_32_ENCODING,
+ var->data + var_offset,
+ var_size,
+ &converted_bytes);
+ void *duped = __gg__memdup(converted, converted_bytes);
+ for(size_t i=0; i<converted_bytes; i+=width_of_utf32)
+ {
+ cbl_char_t ch = charmap32->getch(duped, i);
+ retval.push_back(ch);
+ }
+ free(duped);
+ }
+ }
+ return retval;
+ }
+
extern "C"
int
__gg__string(const size_t integers[])
extern "C"
int __gg__is_canceled(size_t function_pointer)
{
- int retval = 0;
- std::set<size_t>::iterator it = to_be_canceled.find(function_pointer);
- if( it == to_be_canceled.end() )
- {
- retval = 0;
- }
- else
- {
- retval = 1;
- to_be_canceled.erase(it);
- }
+ int retval = static_cast<int>(to_be_canceled.erase(function_pointer));
return retval;
}
}
static std::vector<void *>proc_signatures;
-static std::vector<void *>return_addresses;
+static std::vector<size_t>return_addresses;
static std::vector<size_t>bookmarks;
extern "C"
void
__gg__pseudo_return_push( void *proc_signature,
- void *return_address)
+ size_t index)
{
proc_signatures.push_back(proc_signature);
- return_addresses.push_back(return_address);
+ return_addresses.push_back(index);
__gg__exit_address = proc_signature;
}
extern "C"
-void *
+size_t
__gg__pseudo_return_pop()
{
- void *retval = return_addresses.back();
+ size_t retval = return_addresses.back();
return_addresses.pop_back();
proc_signatures.pop_back();
// See comment for __gg__look_at_int128
return ptr;
}
+
+extern "C"
+void
+__gg__set_data_member(cblc_field_t *field, unsigned char *data)
+ {
+ // This function is used to hide the initialization of the ->data member
+ // from the compiler. This avoids the bug that causes n-squared time in the
+ // middle end for a -O0 compiler when doing a -fpie compilation.
+ field->data = data;
+ }
VERSION-INFORMATION The value, between quotation marks or apostrophes, of the version information in the XML declaration
*/
-///////////////
-extern cblc_field_t __ggsr__xml_event;
-extern cblc_field_t __ggsr__xml_code;
-extern cblc_field_t __ggsr__xml_text;
-extern cblc_field_t __ggsr__xml_ntext;
+static cblc_field_t *xml_field_event = nullptr;
+static cblc_field_t *xml_field_text = nullptr;
+static cblc_field_t *xml_field_code = nullptr;
static void
-xml_event( const char event_name[], size_t len, char text[] ) {
- assert(strlen(event_name) < __ggsr__xml_event.allocated);
+xml_event( const char event_name[], size_t len, char text[]) {
+ assert(strlen(event_name) < xml_field_event->allocated);
- auto pend = __ggsr__xml_event.data + __ggsr__xml_event.allocated;
+ auto pend = xml_field_event->data + xml_field_event->allocated;
auto p = std::copy( event_name, event_name + strlen(event_name),
- PTRCAST(char, __ggsr__xml_event.data) );
+ PTRCAST(char, xml_field_event->data) );
std::fill(PTRCAST(unsigned char, p), pend, 0x20);
- __ggsr__xml_text.data = reinterpret_cast<unsigned char*>(text);
- __ggsr__xml_text.capacity = __ggsr__xml_text.allocated = len;
- __ggsr__xml_code.data = 0;
+ xml_field_text->data = reinterpret_cast<unsigned char*>(text);
+ xml_field_text->capacity = xml_field_text->allocated = len;
+ xml_field_code->data = 0;
cobol_callback();
}
cblc_field_t *encoding __attribute__ ((unused)),
cblc_field_t *validating __attribute__ ((unused)),
int returns_national __attribute__ ((unused)),
- void (*callback)(void) )
+ void (*callback)(void),
+ cblc_field_t *event,
+ cblc_field_t *code,
+ cblc_field_t *text)
{
+ xml_field_event = event;
+ xml_field_code = code;
+ xml_field_text = text;
+
initialize_handlers(callback);
const char *input = PTRCAST(char, input_field->data + input_offset);