int last_line;
int last_column;
+ cbl_loc_t()
+ : first_line(0)
+ , first_column(0)
+ , last_line(0)
+ , last_column(0)
+ {}
cbl_loc_t( const YYLTYPE& loc )
: first_line(loc.first_line)
, first_column(loc.first_column)
, last_column(loc.last_column)
{}
- operator YYLTYPE() const {
+ operator YYLTYPE() const { // cppcheck-suppress syntaxError
return { first_line, first_column, last_line, last_column };
}
};
MfBinaryLongLong,
MfCallGiving,
+ MfCallLiteral,
MfCdfDollar,
MfComp6,
MfCompX,
MfLevel_1_Occurs,
MfLevel78,
+ MfAnyLength,
+ MfMoveIndex,
MfMovePointer,
MfReturningNum,
MfUsageTypename,
%{
#include "cobol-system.h"
-#include "coretypes.h"
-#include "tree.h"
+#include <coretypes.h>
+#include <tree.h>
+
#undef yy_flex_debug
+
#include "../../libgcobol/ec.h"
#include "../../libgcobol/common-defs.h"
#include "util.h"
}
void cobol_warning( cbl_diag_id_t id, int yn, bool );
+void cobol_warning_suppress( cbl_dialect_t dialect );
static bool
cobol_langhook_handle_option (size_t scode,
copybook_extension_add(cobol_copyext);
return true;
+ case OPT_fexec_charset_:
+ if( ! cobol_alpha_encoding( arg ) ) {
+ cbl_errx( "no such charset %qs", arg);
+ }
+ return true;
+
+ case OPT_fexec_national_charset_:
+ if( ! cobol_national_encoding( arg ) ) {
+ cbl_errx( "no such national charset %qs", arg);
+ }
+ return true;
+
case OPT_M:
cobol_set_pp_option('M');
return true;
return true;
case OPT_fdefaultbyte:
- // cobol_default_byte is an unsigned ing
+ // cobol_default_byte is an unsigned int
wsclear(cobol_default_byte);
return true;
// gcc disallows 0 as an enumerated value, so we used 0x10 for iso.
if( cobol_dialect == 0x100 ) cobol_dialect = 0;
cobol_dialect_set(cbl_dialect_t(cobol_dialect));
+ cobol_warning_suppress(cbl_dialect_t(cobol_dialect));
return true;
case OPT_fsyntax_only:
case OPT_nomain:
return true;
- case OPT_finternal_ebcdic:
- cobol_gcobol_feature_set(feature_internal_ebcdic_e);
- return true;
-
// Warnings and errors
case OPT_Wbinary_long_long:
cobol_warning(MfCallGiving, call_giving, warning_as_error);
return true;
+ case OPT_Wcall_literal:
+ cobol_warning(MfCallLiteral, call_literal, warning_as_error);
+ return true;
+
case OPT_Wcdf_dollar:
cobol_warning(MfCdfDollar, cdf_dollar, warning_as_error);
return true;
cobol_warning(Par78CdfDefinedW, level_78_defined, warning_as_error);
return true;
+ case OPT_Wmove_index:
+ cobol_warning(MfMoveIndex, move_index, warning_as_error);
+ return true;
+
case OPT_Wmove_pointer:
cobol_warning(MfMovePointer, move_pointer, warning_as_error);
return true;
cobol_warning(MfLevel78, level_78, warning_as_error);
return true;
+ case OPT_Wany_length:
+ cobol_warning(MfAnyLength, cobol_any_length, warning_as_error);
+ return true;
+
case OPT_Wreturning_number:
cobol_warning(MfReturningNum, returning_number, warning_as_error);
return true;
-C) error "$opt $incomparable"
;;
-d | -debug | --debug) opts="$opts -fcobol-exceptions=EC-ALL"
- warn "$opt implies -fstack-check:"
;;
# define for preprocessor, note: -D* is directly passed
-D)
opts="$opts $opt"
;;
-ext)
- pending_arg="$opt "
+ pending_arg="-copyext "
;;
-ext=*) opts="$opts $(echo "$opt" | sed 's/-ext=/-copyext ./')"
;;
-fwrite-after) warn "$opt" ;;
-fmfcomment) warn "$opt" ;;
-facucomment) warn "$opt" ;;
- -fno-trunc) no_warn "$opt" ;;
+ -ftrunc | -fno-trunc | -fnotrunc) no_warn "$opt" ;;
-fsingle-quote) warn "$opt" ;;
-foptional-file) warn "$opt" ;;
-fstatic-call | -fno-static-call)
-fformat=auto) ;; # gcobol and gnucobol default
-fixed | --fixed | -fformat=fixed | -fformat=variable | -fformat=xcard)
- # note: variable + xcard are only _more similar_ to fixed than free,
- # (with changing right-column to 250/255, which isn't supported in gcobol, yet)
+ # note: variable + xcard are only _more similar_
+ # to fixed than free, (with changing right-column
+ # to 250/255, which isn't supported in gcobol
opts="$opts -ffixed-form"
;;
;;
-i | --info) warn "$opt"
;;
-
- # -I
+ -I) pending_arg=$opt
+ ;;
+ -I*)
+ opts="$opts $opt"
+ ;;
-fimplicit-init) warn "$opt"
;;
-j | -job) warn "$opt"
-K*) warn "$opt"
;;
# -l
- # -L
+ -L) pending_arg=$opt
+ ;;
--list*) warn "$opt"
;;
-m) mode="-shared"
opts="$opts $opt"
;;
- # -O0, -Ox
- -O | -O2 | -Os) warn "$opt"
- ;;
-S) mode="$opt"
;;
-save-temps=*) opt="$(echo "$opt" | sed -E 's/^.+=//')"
.Op Fl copyext Ar ext
.Op Fl ffixed-form | Fl ffree-form
.Op Fl findicator-column
-.Op Fl finternal-ebcdic
+.Op Fl fexec-charset= Ns Ar encoding
+.Op Fl fexec-national-charset= Ns Ar encoding
+.ig
+.Op Fl collseq Ar encoding Ns \/, Fl ncolseq Ar encoding
+..
.Op Fl dialect Ar dialect-name
.Op Fl include Ar filename
.Op Fl preprocess Ar preprocess-filter
.Op Fl Wno-bad-numeric
.Op Fl Wno-binary-long-long
.Op Fl Wno-call-giving
+.Op Fl Wno-call-literal
.Op Fl Wno-cdf-dollar
.Op Fl Wno-cdf-invalid-parameter
.Op Fl Wno-cdf-name-not-found
.Op Fl Wno-literal-concat
.Op Fl Wno-locale-error
.Op Fl Wno-move-corresponding
+.Op Fl Wno-move-index
.Op Fl Wno-move-pointer
.Op Fl Wno-nllanginfo-error
.Op Fl Wno-operator-space
linking, with
.Xr dlsym 3 Ns Li ,
because its value is determined at run time.
+.
+.It Fl fexec-charset= Ns Ar encoding
+Set the default execution character set for alphanumeric data items
+and literals in the absence of
+.Sy "CHARACTER CLASSIFICATION"
+in
+.Sy "CONFIGURATION SECTION" .
+.Ar encoding
+is an encoding name as defined by
+.Xr iconv 3 .
+Unless otherwise specified, the runtime encoding for both alphanumeric
+and NATIONAL is
+.Sy CP1252
+as defined by
+.Xr iconv 3 .
+.
+.It Fl fexec-national-charset= Ns Ar encoding
+Set the default execution character set for NATIONAL data items
+and literals in the absence of
+.Sy "CHARACTER CLASSIFICATION"
+in
+.Sy "CONFIGURATION SECTION" .
+.Ar encoding
+is an encoding name as defined by
+.Xr iconv 3 .
+To use an EBCDIC encoding for data items, one might use
+.D1 Fl fexec-national-charset= Ns Li CP1140
+for example.
+.
.It Fl dialect Ar dialect-name
By default,
.Nm
Warn if BINARY-LONG-LONG is used.
.It Fl Wno-call_giving
Warn if CALL ... GIVING is used.
+.It Fl Wno-call_literal
+Warn if CALL is used is used with a literal parameter by reference.
.It Fl Wno-cdf-dollar
Warn if CDF \[Do]IF is used.
.It Fl Wno-comp-6
Warn if Level 01 is used with OCCURS.
.It Fl Wno-level-78-defined
Warn if CDF defines Level 78 constant.
+.It Fl Wno-move-index
+Warn if MOVE INDEX is used.
.It Fl Wno-move-pointer
Warn if MOVE POINTER is used.
.It Fl Wno-returning-number
.
.Sh EBCDIC
The
-.Fl finternal-ebcdic
+.Fl fexec-charset=cp1140
option is useful when working with mainframe \*[lang] programs intended
for EBCDIC-encoded files. With this option, while the \*[lang] text
remains in ASCII, the character literals and field initial values
read from a file are interpreted as EBCDIC data. The file data are
not
.Em converted ;
-rather, the file is assumed to use EBCDIC representation. String
+rather, the file is assumed to use EBCDIC representation. String
literals in the \*[lang] text
.Em are
-converted, so that they can be compared meaningfully with data in the file.
+converted, so that they can be compared meaningfully with data in the
+file. Code Page 1140 is one of a number of EBCDIC code pages; it is
+often useful because it closely parallels the commonly-used Code Page
+1252 that provides many Western European characters.
.Pp
Only file data and character literals are affected. Data read from
and written to the environment, or taken from the command line, are
.Bl -tag -compact
.It Sy %EBCDIC-MODE
is set by
-.Fl finternal-ebcdic .
+.Fl fexec-charset= Ns Ar <encoding>
+for all EBCDIC encodings.
.It Sy %64-BIT-POINTER
is implied by
.Fl "dialect ibm" .
char *
level_88_helper(size_t parent_capacity,
const cbl_domain_elem_t &elem,
- size_t &returned_size,
- cbl_encoding_t encoding)
+ size_t &returned_size)
{
// We return a MALLOCed return value, which the caller must free.
char *retval = static_cast<char *>(xmalloc(parent_capacity + 64));
switch(figconst)
{
case normal_value_e :
- // This really should never happend
+ // This really should never happen
abort();
break;
case low_value_e :
memcpy(first_name, elem.name(), first_name_length);
first_name[first_name_length] = '\0';
- /* By rights, the parser should have given us this string in the target
- encoding. When I discovered that it was not, Jim Lowden was out of
- town for a week, and I didn't feel like figuring out where in the
- parser the fix should be.
-
- So, I am doing the conversion here. Eventually that will be fixed, and
- chaos will reign here. When that happens, remove the following
- conversion. */
- charmap_t *charmap = __gg__get_charmap(encoding);
- for(size_t i=0; i<strlen(first_name); i++)
- {
- first_name[i] = charmap->mapped_character(first_name[i]);
- }
- ///////////////// end of conversion
-
if( parent_capacity == 0 )
{
// Special case: parent_capacity is zero when this routine has been
// Do the first element of the domain
stream = level_88_helper( parent_capacity,
domain->first,
- stream_len,
- var->codeset.encoding);
+ stream_len);
if( output_index + stream_len > retval_capacity )
{
retval_capacity *= 2;
// Do the second element of the domain
stream = level_88_helper( parent_capacity,
domain->last,
- stream_len,
- var->codeset.encoding);
+ stream_len);
if( output_index + stream_len > retval_capacity )
{
retval_capacity *= 2;
char *
get_class_condition_string(cbl_field_t *var)
{
+ // This routine returns a malloced pointer.
+
// We know at this point that var is FldClass
// The LEVEL is not 88, so this is a CLASS SPECIAL-NAME
"ABCJ12" // This is the same as "A" "B" "C" ...
Expressly presented numbers are the ordinal positions in the run-time
- character set. So, an ASCII "A" would be given as 66, which is one
- greater than 65, which is the ASCII codepoint for "A". An EBCDIC "A"
- would be presented as 194, which is one greater than 193, which is the
- decimal representation of an EBCDIC "A", whose hex code is 0xC2.
-
- We need to account for EBCDIC as well as ASCII. In EBCDIC,
- "A" THROUGH "Z" doesn't mean what it looks like it means, because EBCIDC
- encoding has gaps between I and J, and between R and S. That isn't true
- in ASCII. We don't want to deal with these issues at compile time, so we
- are encoding numeric ordinals with their negated values, while other
- characters are given as the numeric forms of their ASCII encoding.
- Conversion to EBCDIC occurs at runtime.
-
- In support of this strategy, character strings like "ABCD" are broken up
- into "A" "B" "C" "D" and converted to their hexadecimal representations.
+ character set. We encode those values with a leading ascii_hyphen to
+ distinguish them from characters.
+
+ Characters are converted to UTF32 values, and then encoded as big-endian
+ hexadecimal characters.
+
+ A range of values is encoded as a pair of hexadecimal values with an
+ ascii_slash between them. The second value ends with a space
+
+ A list of characters is encoded simply as a stream of hexadecimal values
+ separated by spaces.
*/
char ach[8192];
while( domain->first.is_numeric || domain->first.name() )
{
- // *What* were they smoking back then?
-
- uint8_t value1;
- uint8_t value2;
-
size_t first_name_length = domain->first.size()
? domain->first.size()
: strlen(domain->first.name());
+ cbl_encoding_t from = var->codeset.default_encodings.source->type;
+ cbl_encoding_t to = DEFAULT_32_ENCODING;
+ size_t nbytes;
+ const char *converted;
+
if( domain->first.is_numeric )
{
if( strlen(ach) > sizeof(ach) - 1000 )
{
- cbl_internal_error("Nice try, but you cannot fire me.");
+ cbl_internal_error("That string should not be that long.");
}
- // We are working with unquoted strings that contain the values 1 through
- // 256:
- value1 = (uint8_t)atoi(domain->first.name());
- value2 = (uint8_t)atoi(domain->last.name());
+ // We are working with unquoted strings that contain the values
+ uint32_t value1 = atoll(domain->first.name());
+ uint32_t value2 = atoll(domain->last.name());
if( value2 < value1 )
{
std::swap(value1, value2);
}
if( value1 != value2 )
{
- p += sprintf(p, "-%2.2X/-%2.2X ", value1-1, value2-1);
+ p += sprintf(p, "-%X/-%X ", value1, value2);
}
else
{
- p += sprintf(p, "-%2.2X ", value1-1);
+ p += sprintf(p, "-%X ", value1);
}
}
else if( first_name_length == 1 )
{
// Since the first.name is a single character, we can do this as
// a single-character pair.
- uint8_t ch1;
- uint8_t ch2;
-
- ch2 = domain->last.name()[0];
- ch1 = domain->first.name()[0];
-
- if( ch1 < ch2 )
- {
- value1 = ch1;
- value2 = ch2;
- }
- else
+ converted = __gg__iconverter(from,
+ to,
+ domain->first.name(),
+ 1,
+ &nbytes);
+ cbl_char_t ch1 = *reinterpret_cast<const cbl_char_t *>(converted);
+ converted = __gg__iconverter(from,
+ to,
+ domain->last.name(),
+ 1,
+ &nbytes);
+ cbl_char_t ch2 = *reinterpret_cast<const cbl_char_t *>(converted);
+
+ if( ch1 > ch2 )
{
- value2 = ch1;
- value1 = ch2;
+ std::swap(ch1, ch2);
}
- if( value1 != value2 )
+ if( ch1 != ch2 )
{
- p += sprintf(p, "%2.2X/%2.2X ", value1, value2);
+ p += sprintf(p, "%X/%X ", ch1, ch2);
}
else
{
- p += sprintf(p, "%2.2X ", value1);
+ p += sprintf(p, "%X ", ch1);
}
}
else
// : strlen(domain->first.name());
for(size_t i=0; i<first_name_length; i++)
{
- p += sprintf(p, "%2.2X ", (unsigned char)domain->first.name()[i]);
+ converted = __gg__iconverter(from,
+ to,
+ domain->first.name()+i,
+ 1,
+ &nbytes);
+ cbl_char_t ch1 = *reinterpret_cast<const cbl_char_t *>(converted);
+ p += sprintf(p, "%X ", ch1);
}
}
domain += 1;
NULL); // And, hence, no types
// Fetch the FUNCTION_DECL for that FUNCTION_TYPE
- char *tname = static_cast<char *>(xmalloc(name.field->data.capacity+1));
- memcpy(tname, name.field->data.original(), name.field->data.capacity);
- tname[name.field->data.capacity] = '\0';
+ char *tname = static_cast<char *>(xmalloc(name.field->data.capacity()+1));
+ memcpy(tname, name.field->data.original(), name.field->data.capacity());
+ tname[name.field->data.capacity()] = '\0';
tree function_decl = gg_build_fn_decl(tname,
fndecl_type);
free(tname);
bool explicitly=false,
bool just_once=false)
{
- // fprintf(stderr, "initialize_variable_internal for %s\n", refer.field->name);
- // gg_printf("initialize_variable_internal for %s\n",
- // gg_string_literal(refer.field->name),
- // NULL_TREE);
cbl_field_t *parsed_var = refer.field;
if( !parsed_var )
{
}
while(0);
SHOW_PARSE_REF(" ", refer);
- if( parsed_var->data.initial )
+ if( parsed_var->data.original() )
{
SHOW_PARSE_TEXT(" >>")
if( parsed_var->level == 88)
case FldNumericEdited:
case FldAlphaEdited:
case FldLiteralA:
- SHOW_PARSE_TEXT(parsed_var->data.initial);
+ SHOW_PARSE_TEXT(parsed_var->data.original());
break;
default:
{
is_redefined = true;
}
- if( parsed_var->data.initial )
+ if( parsed_var->data.original() )
{
bool a_parent_initialized = false;
const cbl_field_t *parent = parent_of(parsed_var);
flag_bits |= just_once ? JUST_ONCE_BIT : 0 ;
suppress_dest_depends = false; // Set this to false so that refer_is_clean is valid
- //fprintf(stderr, "refer_is_clean %2.2d %s %d 0x%lx\n", refer.field->level, refer.field->name, refer_is_clean(refer), refer.field->attr);
if( !refer_is_clean(refer) )
{
|| parsed_var->type == FldLiteralA )
{
gg_assign(spaces, integer_one_node);
- tree counter = gg_define_int(parsed_var->data.capacity);
+ tree counter = gg_define_int(parsed_var->data.capacity());
WHILE(counter, gt_op, integer_zero_node)
{
gg_decrement(counter);
suppress_dest_depends = false;
}
-//static void
-//initialize_variable_internal( cbl_field_t *field,
-// bool explicitly=false,
-// bool just_once=false)
-// {
-// cbl_refer_t wrapper(field);
-// initialize_variable_internal( wrapper,
-// explicitly,
-// just_once);
-// }
-
void
parser_initialize(const cbl_refer_t& refer, bool like_parser_symbol_add)
{
}
else
{
- gcc_assert(refer.field->data.initial);
+ gcc_assert(refer.field->data.original());
static const bool explicitly = true;
initialize_variable_internal(refer, explicitly);
}
{
// The destination is something with rdigits; the source is FldFloat
tree ftype;
- switch( source->data.capacity )
+ switch( source->data.capacity() )
{
case 4:
ftype = FLOAT;
case FldPointer:
case FldFloat:
case FldLiteralN:
- retval = field->data.capacity;
+ retval = field->data.capacity();
break;
case FldNumericDisplay:
}
else
{
- retval = field->data.capacity;
+ retval = field->data.capacity();
}
break;
}
// gg_string_literal(left_side_ref.field->name),
// gg_string_literal(right_side_ref.field->name),
// member(left_side_ref.field, "data"),
- // gg_string_literal(right_side_ref.field->data.initial),
+ // gg_string_literal(right_side_ref.field->data.original()),
// NULL_TREE);
CHECK_FIELD(left_side_ref.field);
case FldGroup:
case FldAlphanumeric:
- case FldLiteralA:
{
- // Comparing a FldLiteralN to an alphanumeric
-
- // This next conversion may be overkill. But just in case
- // the encodings of the two variables are different, we are
- // going to convert left-side text to the right-side encoding
- cbl_encoding_t enc_left = lefty->field->codeset.encoding;
- cbl_encoding_t enc_right = righty->field->codeset.encoding;
+ // Comparing a FldLiteralN to an alphanumeric. The alphanumeric
+ // is encoded in its codeset.encoding, but the FldLiteralN is,
+ // in accordance with the rules in cbl_field_t::internalize,
+ // encoded in the source-code encoding. The routine we are about
+ // to call assumes that the literal string is encoded the same
+ // as the alphanumeric, so we have to make it match.
size_t outlength;
- size_t inlength = strlen(lefty->field->data.initial);
+ cbl_encoding_t enc_right = righty->field->codeset.encoding;
char *converted = __gg__iconverter(
- enc_left,
+ DEFAULT_SOURCE_ENCODING,
enc_right,
- lefty->field->data.initial,
- inlength,
+ lefty->field->data.original(),
+ strlen(lefty->field->data.original()),
&outlength );
gg_assign( return_int, gg_call_expr(
INT,
"__gg__literaln_alpha_compare",
- build_string_literal(strlen(lefty->field->data.initial)+1,
- converted),
+ gg_string_literal(converted),
gg_get_address_of(righty->field->var_decl_node),
refer_offset(*righty),
refer_size_source( *righty),
break;
}
+ case FldLiteralA:
+ {
+ // Comparing a FldLiteralN to an FldLiteralA.
+ // lefty->field->data.original() is the numeric string in ASCII.
+ // righty->field->data.original() is original alphanumeric
+ // string in ASCII.
+ int icmp = strcmp(lefty->field->data.original(),
+ righty->field->data.original());
+ gg_assign(return_int, build_int_cst_type(INT, icmp));
+ compared = true;
+ break;
+ }
+
+
default:
break;
}
int rightflags = (right_side_ref.all ? REFER_T_MOVE_ALL : 0)
+ (right_side_ref.addr_of ? REFER_T_ADDRESS_OF : 0)
+ (right_side_ref.refmod.from ? REFER_T_REFMOD : 0);
+
gg_assign( return_int, gg_call_expr(
INT,
"__gg__compare",
}
}
-static void
-move_tree( cbl_field_t *dest,
- tree offset,
- tree psz_source, // psz_source is a null-terminated string
- tree length_bump=integer_zero_node)
- {
- // This routine assumes that the psz_source is in the same codeset as the
- // dest.
-
- Analyze();
- SHOW_PARSE
- {
- SHOW_PARSE_HEADER
- SHOW_PARSE_FIELD(" ", dest);
- SHOW_PARSE_END
- }
-
- CHECK_FIELD(dest);
-
- bool moved = true;
-
- tree source_length = gg_define_size_t();
- gg_assign(source_length, gg_strlen(psz_source));
- gg_assign(source_length, gg_add(source_length, gg_cast(SIZE_T, length_bump)));
-
- tree min_length = gg_define_size_t();
-
- tree location = gg_define_uchar_star();
- tree length = gg_define_size_t();
-
- gg_assign(location,
- gg_add(member(dest->var_decl_node, "data"),
- offset));
- gg_assign(length,
- member(dest->var_decl_node, "capacity"));
-
- IF(source_length, lt_op, length)
- {
- gg_assign(min_length, source_length);
- }
- ELSE
- {
- gg_assign(min_length, length);
- }
- ENDIF
-
- tree value;
- tree rdigits;
-
- switch( dest->type )
- {
- case FldGroup:
- case FldAlphanumeric:
- {
- // Space out the alphanumeric destination:
- charmap_t *charmap = __gg__get_charmap(dest->codeset.encoding);
-
- gg_memset( location,
- build_int_cst_type(INT,
- charmap->mapped_character(ascii_space)),
- length );
- // Copy the alphanumeric result over.
- gg_memcpy( location,
- psz_source,
- min_length );
- break;
- }
-
- case FldNumericDisplay:
- case FldNumericEdited:
- case FldNumericBinary:
- case FldNumericBin5:
- case FldPacked:
- case FldIndex:
- {
- value = gg_define_int128();
- rdigits = gg_define_int();
-
- gg_assign(value,
- gg_call_expr( INT128,
- "__gg__dirty_to_binary",
- psz_source,
- build_int_cst_type(INT, dest->codeset.encoding),
- source_length,
- gg_get_address_of(rdigits),
- NULL_TREE));
-
- gg_call(VOID,
- "__gg__int128_to_qualified_field",
- gg_get_address_of(dest->var_decl_node),
- offset,
- build_int_cst_type(SIZE_T, dest->data.capacity),
- value,
- rdigits,
- build_int_cst_type(INT, truncation_e),
- null_pointer_node,
- NULL_TREE);
- }
- break;
-
- case FldAlphaEdited:
- {
- gg_call(VOID,
- "__gg__string_to_alpha_edited",
- location,
- build_int_cst_type(INT, DEFAULT_SOURCE_ENCODING),
- psz_source,
- min_length,
- member(dest->var_decl_node, "picture"),
- NULL_TREE);
- break;
- }
-
- default:
- moved = false;
- break;
- }
-
- TRACE1
- {
- TRACE1_HEADER
- gg_fprintf(trace_handle, 1, "source: \"%s\"", psz_source);
- TRACE1_END
- TRACE1_INDENT
- TRACE1_FIELD( "dest : ", dest, "")
- TRACE1_END
- }
-
- if( !moved )
- {
- dbgmsg("%10s in %s:%d", __func__, __FILE__, __LINE__ );
- cbl_internal_error( "I don%'t know how to MOVE an alphabetical string to %s(%s)",
- cbl_field_type_str(dest->type),
- dest->name
- );
- return;
- }
- }
-
-static void
-move_tree_to_field(cbl_field_t *field, tree psz)
- {
- // psz has to be in the same encoding as field
- move_tree(field, integer_zero_node, psz);
- }
-
-static tree
-get_string_from(cbl_field_t *field)
- {
- // This returns a malloced copy of either a literal string or a
- // an alphanumeric field. The idea is that eventually free() will be
- // called in the runtime space:
-
- tree psz = gg_define_char_star();
-
- if( field )
- {
- switch( field->type )
- {
- case FldLiteralA:
- {
- gg_assign(psz,
- gg_cast(CHAR_P,
- gg_malloc(build_int_cst_type(SIZE_T,
- field->data.capacity+1))));
- const char *litstring = get_literal_string(field);
- gg_memcpy(psz,
- gg_string_literal(litstring),
- build_int_cst_type(SIZE_T, field->data.capacity+1));
- break;
- }
-
- case FldGroup:
- case FldAlphanumeric:
- // make a copy of .data:
- gg_assign(psz,
- gg_cast(CHAR_P,
- gg_malloc(build_int_cst_type(SIZE_T,
- field->data.capacity+1))));
- gg_memcpy( psz,
- member(field, "data"),
- member(field, "capacity"));
- // null-terminate it:
- gg_assign( gg_array_value(psz, member(field, "capacity")),
- char_nodes[0]);
- break;
-
- case FldForward:
- {
- // At the present time, we are assuming this happens when somebody
- // specifies an unquoted file name in an ASSIGN statement:
- // SELECT file3 ASSIGN DISK.
- //
- // In that case, we just return DISK, which is field->name:
- psz = gg_strdup(gg_string_literal(field->name));
- break;
- }
-
- default:
- cbl_internal_error(
- "%s: %<field->type%> %s must be literal or alphanumeric",
- __func__, cbl_field_type_str(field->type));
- break;
- }
- }
- else
- {
- gg_assign(psz, gg_cast(CHAR_P, null_pointer_node));
- }
- return psz;
- }
-
static char *
combined_name(const cbl_label_t *label)
{
gg_call(VOID,
"__gg__variables_to_init",
gg_get_address_of(array),
- wsclear() ? build_string_literal(1, (const char *)wsclear())
+ wsclear() ? build_string_literal(
+ 1,
+ reinterpret_cast<const char *>(wsclear()))
: null_pointer_node,
NULL_TREE);
}
uint32_t digits;
int32_t rdigits;
uint64_t attr;
- FIXED_WIDE_INT(128) value = dirty_to_binary(field->data.original(),
+ //// DUBNERHACK. Necessary to prevent UAT lockup:
+ const char *source_text = field->data.original()
+ ? field->data.original()
+ : field->data.initial;
+ FIXED_WIDE_INT(128) value = dirty_to_binary(source_text,
capacity,
digits,
rdigits,
attr);
// This is a rare occurrence of a parser_xxx call changing the entry
// in the symbol table.
- field->data.capacity = capacity;
+ field->data.capacity( capacity );
field->data.digits = digits;
field->data.rdigits = rdigits;
field->attr |= attr;
// The value is 1, 2, 4, 8 or 16 bytes, so an ordinary constructor can be
// used.
- var_type = tree_type_from_size( field->data.capacity,
+ var_type = tree_type_from_size( field->data.capacity(),
field->attr & signable_e);
tree new_var_decl = gg_define_variable( var_type,
base_name,
CHECK_FIELD(target);
- tree pointer = gg_define_char_star();
- gg_assign(pointer, gg_call_expr(CHAR_P,
- "__gg__get_date_yymmdd",
- gg_get_address_of(target->var_decl_node),
- NULL_TREE));
- move_tree_to_field( target,
- pointer);
-
- gg_free(pointer);
-
+ gg_call(VOID,
+ "__gg__get_date_yymmdd",
+ gg_get_address_of(target->var_decl_node),
+ NULL_TREE);
TRACE1
{
TRACE1_HEADER
SHOW_PARSE_HEADER
SHOW_PARSE_END
}
-
- CHECK_FIELD(target);
-
- tree pointer = gg_define_char_star();
- gg_assign(pointer, gg_call_expr(CHAR_P,
- "__gg__get_date_yyyymmdd",
- gg_get_address_of(target->var_decl_node),
- NULL_TREE));
- move_tree_to_field( target,
- pointer);
-
- gg_free(pointer);
-
+ gg_call(VOID,
+ "__gg__get_date_yyyymmdd",
+ gg_get_address_of(target->var_decl_node),
+ NULL_TREE);
TRACE1
{
TRACE1_HEADER
CHECK_FIELD(target);
- tree pointer = gg_define_char_star();
- gg_assign(pointer, gg_call_expr(CHAR_P,
- "__gg__get_date_yyddd",
- gg_get_address_of(target->var_decl_node),
- NULL_TREE));
- move_tree_to_field( target,
- pointer);
-
- gg_free(pointer);
-
+ gg_call(VOID,
+ "__gg__get_date_yyddd",
+ gg_get_address_of(target->var_decl_node),
+ NULL_TREE);
TRACE1
{
TRACE1_HEADER
CHECK_FIELD(target);
- tree pointer = gg_define_char_star();
- gg_assign(pointer, gg_call_expr(CHAR_P,
- "__gg__get_yyyyddd",
- gg_get_address_of(target->var_decl_node),
- NULL_TREE));
- move_tree_to_field( target,
- pointer);
-
- gg_free(pointer);
-
+ gg_call(VOID,
+ "__gg__get_yyyyddd",
+ gg_get_address_of(target->var_decl_node),
+ NULL_TREE);
TRACE1
{
TRACE1_HEADER
CHECK_FIELD(target);
- tree pointer = gg_define_char_star();
- gg_assign(pointer, gg_call_expr(CHAR_P,
- "__gg__get_date_dow",
- gg_get_address_of(target->var_decl_node),
- NULL_TREE));
- move_tree_to_field( target,
- pointer);
-
- gg_free(pointer);
-
+ gg_call(VOID,
+ "__gg__get_date_dow",
+ gg_get_address_of(target->var_decl_node),
+ NULL_TREE);
TRACE1
{
TRACE1_HEADER
CHECK_FIELD(target);
- tree pointer = gg_define_char_star();
- gg_assign(pointer, gg_call_expr(CHAR_P,
- "__gg__get_date_hhmmssff",
- gg_get_address_of(target->var_decl_node),
- NULL_TREE));
- move_tree_to_field( target,
- pointer);
-
- gg_free(pointer);
-
+ gg_call(VOID,
+ "__gg__get_date_hhmmssff",
+ gg_get_address_of(target->var_decl_node),
+ NULL_TREE);
TRACE1
{
TRACE1_HEADER
case custom_encoding_e:
{
-#pragma message "Verify program-id is disambiguated"
- size_t alphabet_index = symbol_unique_index(symbol_elem_of(&alphabet));
+ uint64_t alphabet_index = symbol_unique_index(symbol_elem_of(&alphabet));
unsigned char ach[256];
SHOW_PARSE_END
}
- size_t alphabet_index = symbol_index(symbol_elem_of(&alphabet));
+ uint64_t alphabet_index = symbol_unique_index(symbol_elem_of(&alphabet));
switch(alphabet.encoding)
{
ENDIF
}
}
- else if( refer.field->type == FldLiteralA )
- {
- gg_call(VOID,
- "__gg__display_string",
- file_descriptor,
- build_int_cst_type(INT, refer.field->codeset.encoding),
- build_string_literal(refer.field->data.capacity,
- refer.field->data.initial),
- build_int_cst_type(SIZE_T, refer.field->data.capacity),
- advance ? integer_one_node : integer_zero_node,
- NULL_TREE );
- }
else if( refer.field->type == FldLiteralN )
{
// The parser found the string of digits from the source code and converted
integer_one_node);
}
}
+ else if( refer.field->type == FldFloat
+ && refer.field->attr & constant_e
+ && !(refer.field->attr & intermediate_e) )
+ {
+ // We are going to output what we think the user typed in the first place
+ char * const to_print = xstrdup(refer.field->data.original());
+ char *p = to_print;
+ if( *p == ascii_plus )
+ {
+ p += 1;
+ }
+ size_t len = strlen(p);
+ if(len > 2 && p[len-2] == ascii_E && p[len-1] == ascii_0 )
+ {
+ len -= 2;
+ }
+ gg_write( file_descriptor,
+ build_string_literal(len, p),
+ build_int_cst_type(SIZE_T, len));
+ free(to_print);
+
+ if( advance )
+ {
+ gg_write( file_descriptor,
+ gg_string_literal("\n"),
+ integer_one_node);
+ }
+ }
else
{
if( refer_is_clean(refer) )
{
// Get the literal N value from the integer var_decl
tree retval = NULL_TREE;
- tree var_type = tree_type_from_size(var->data.capacity,
+ tree var_type = tree_type_from_size(var->data.capacity(),
var->attr & signable_e);
retval = gg_cast(var_type, var->data_decl_node);
return retval;
case FldAlphaEdited:
case FldNumericEdited:
retval = CHAR_P;
- nbytes = field->data.capacity;
+ nbytes = field->data.capacity();
break;
case FldNumericDisplay:
case FldNumericBin5:
case FldIndex:
case FldPointer:
- if( field->data.capacity > 8 )
+ if( field->data.capacity() > 8 )
{
retval = UINT128;
nbytes = 16;
break;
case FldFloat:
- if( field->data.capacity == 8 )
+ if( field->data.capacity() == 8 )
{
retval = DOUBLE;
nbytes = 8;
}
- else if( field->data.capacity == 4 )
+ else if( field->data.capacity() == 4 )
{
retval = FLOAT;
nbytes = 4;
gg_memcpy(gg_get_address_of(retval),
member(returner, "data"),
build_int_cst_type( SIZE_T,
- std::min(nbytes, (size_t)returner->data.capacity)));
+ std::min(nbytes, (size_t)returner->data.capacity())));
}
else
{
// error of returning a pointer to data on the stack.
tree array_type = build_array_type_nelts(UCHAR,
- returner->data.capacity);
+ returner->data.capacity());
tree array = gg_define_variable(array_type, vs_static);
gg_memcpy(gg_get_address_of(array),
member(returner->var_decl_node, "data"),
{
gg_memset(member(this_one->var_decl_node, "data"),
integer_zero_node,
- build_int_cst_type(SIZE_T, this_one->data.capacity));
+ build_int_cst_type(SIZE_T, this_one->data.capacity()));
}
}
}
current_function->our_name,
instance_counter++);
- cbl_field_t for_entry = {};
- for_entry.type = FldAlphanumeric;
- for_entry.data.capacity = strlen(ach);
- for_entry.data.initial = ach;
- for_entry.codeset.encoding = iconv_CP1252_e;
+ cbl_field_data_t data( 0, strlen(ach), 0,0, ach );
+ cbl_field_t for_entry(FldAlphanumeric, 0, data, 0);
+ for_entry.codeset.set(iconv_CP1252_e);
// build an island for the callback:
tree island_goto;
"__gg__decimal_point_is_comma",
NULL_TREE);
}
+
+ // This is where we tell the library about this program's initialization
+ // values:
+ cbl_field_t *init_working = current_options().initial_working();
+ cbl_field_t *init_local = current_options().initial_local();
+ gg_call(VOID,
+ "__gg__initialization_values",
+ build_int_cst_type(UINT, wsclear() ? *wsclear()
+ : static_cast<uint32_t>(NOT_A_CHARACTER)),
+ init_working ? gg_get_address_of(init_working->var_decl_node)
+ : null_pointer_node,
+ init_local ? gg_get_address_of(init_local->var_decl_node)
+ : null_pointer_node,
+ NULL_TREE);
}
static
gg_get_address_of(base),
build_int_cst_type(SIZE_T, nbytes));
- tree array_type = build_array_type_nelts(UCHAR, new_var->data.capacity);
+ tree array_type = build_array_type_nelts(UCHAR, new_var->data.capacity());
tree data_decl_node = gg_define_variable( array_type,
NULL,
vs_static);
}
gcc_assert(tgt->type == FldConditional);
- gcc_assert(domain->data.initial);
- gcc_assert(strlen(domain->data.initial));
switch(op)
{
gg_assign(tgt->var_decl_node, gg_build_relational_expression(
gg_call_expr(INT,
"__gg__setop_compare",
- member(candidate, "data"),
- member(candidate, "capacity"),
+ gg_get_address_of(candidate->var_decl_node),
member(domain, "initial"),
- build_int_cst_type(INT,
- domain->codeset.encoding),
NULL_TREE),
ne_op,
integer_zero_node));
{
// We are dealing with an ordinary string.
- // When Jim gets around to converting the domain to the target encoding,
- // this code will have to be removed
-#if 1
- char *fname = xstrdup(src->first.name());
- charmap_t *charmap = __gg__get_charmap(tgt->codeset.encoding);
- for(size_t i=0; i<strlen(fname); i++)
- {
- fname[i] = charmap->mapped_character(fname[i]);
- }
- move_tree_to_field( parent,
- build_string_literal(strlen(fname)+1, fname));
- free(fname);
-#else
- move_tree_to_field( parent,
- build_string_literal(src->first.size()+1,
- src->first.name()));
-#endif
+ size_t converted_bytes;
+ const char *converted =
+ __gg__iconverter(parent->codeset.default_encodings.source->type,
+ parent->codeset.encoding,
+ src->first.name(),
+ strlen(src->first.name())+1,
+ &converted_bytes);
+ gg_call(VOID,
+ "__gg__refer_from_string",
+ gg_get_address_of(parent->var_decl_node),
+ size_t_zero_node,
+ build_int_cst_type(SIZE_T, parent->data.capacity()),
+ build_string_literal(converted_bytes, converted),
+ NULL_TREE);
}
else
{
__func__);
}
-#pragma message "Verify program-id is disambiguated"
- size_t symbol_table_index = symbol_unique_index(symbol_elem_of(file));
+ // This code is a hack needed until the parser sets the varies.min/max
+ // properly when they are not equal:
+ if( varies.min != varies.max
+ && current_encoding(display_encoding_e) == iconv_UTF16LE_e
+ && varies.max == symbol_file_record(file)->data.capacity() )
+ {
+ fprintf(stderr,
+ "There is a hack in genapi.cc to take into account a parser error,\n"
+ "namely the fact that when there is a RECORD VARYING clause, the\n"
+ "min/max values reflect the values in the source code, while when\n"
+ "there is no VARYING clause the min/max values are the same as the\n"
+ "default_record's data.capacity(). If you are seeing this message,\n"
+ "it would appear the parser has been updated to supply the stride-\n"
+ "corrected min/max, and the hack should be removed.\n");
+ gcc_assert(false);
+ }
+ if( varies.max < symbol_file_record(file)->data.capacity())
+ {
+ const charmap_t *charmap =
+ __gg__get_charmap(current_encoding(display_encoding_e));
+ varies.min *= charmap->stride();
+ varies.max *= charmap->stride();
+ }
+
+ uint64_t symbol_table_index = symbol_unique_index(symbol_elem_of(file));
gg_call(VOID,
"__gg__file_init",
gg_get_address_of(new_var_decl),
gg_string_literal(file->name),
- build_int_cst_type(SIZE_T, symbol_table_index),
+ build_int_cst_type(ULONGLONG, symbol_table_index),
array_of_keys,
key_numbers,
unique_flags,
build_int_cst_type(INT, (int)file->optional),
build_int_cst_type(SIZE_T, varies.min),
build_int_cst_type(SIZE_T, varies.max),
-/* Right now, file->codeset.encoding is not being set properly. Remove this
- comment and fix the following code when that's repaired. */
+/* Right now, file->codeset.encoding is not being set properly. For example,
+ when the exec-charset is EBCDIC, file->codeset is coming through as CP1252.
+ However, when exec-charset is UTF32LE, file->codeset is arriving here as
+ UTF32LE. Go figure.
+
+ Remove this comment and fix the following code when that's repaired. */
// build_int_cst_type(INT, (int)file->codeset.encoding),
build_int_cst_type(INT, current_encoding(display_encoding_e)),
build_int_cst_type(INT, (int)file->codeset.alphabet),
}
}
-static
-tree get_the_filename(bool "ed_name, const cbl_file_t *file)
- {
- // The cbl_file_t has a cbl_field_t *filename. This can be a FldAlphanumeric.
- // The runtime has a (char *)filename, so we need to
- // do a runtime conversion.
-
- tree psz; // This is going to be either the name of the file, or the
- // possible run-time environment variable that will contain
- // the name of the file.
-
- cbl_field_t *field_of_name = symbol_field_forward(file->filename);
- quoted_name = false;
- if( field_of_name->type == FldForward )
- {
- // The target of ASSIGN TO was unquoted, but didn't resolve to a
- // cbl_field_t. This means that the name of the field is an
- // environment variable that will hold the file name
- psz = gg_define_char_star();
- gg_assign(psz, gg_strdup(gg_string_literal(field_of_name->name)));
- }
- else
- {
- // The name is coming from a presumably FldAlphaNumeric variable
- psz = get_string_from(field_of_name);
- gg_call( CHAR_P,
- "__gg__convert_encoding",
- psz,
- build_int_cst_type(INT,
- field_of_name->codeset.encoding),
- build_int_cst_type(INT,
- DEFAULT_SOURCE_ENCODING),
- NULL_TREE);
- quoted_name = true;
- }
- return psz;
- }
-
void
parser_file_open( struct cbl_file_t *file, int mode_char )
{
TRACE1_END
}
- bool quoted_name;
- tree pszFilename = get_the_filename(quoted_name, file);
+ tree pszFilename = gg_define_char_star();
+ cbl_field_t *field_of_name = symbol_field_forward(file->filename);
+ if( field_of_name->type == FldForward )
+ {
+ // The target of ASSIGN TO was unquoted, but didn't resolve to a
+ // cbl_field_t. This means that the name of the field is an
+ // environment variable that will hold the file name
+ gg_assign(pszFilename, gg_strdup(gg_string_literal(field_of_name->name)));
+ }
+ else
+ {
+ gg_assign(pszFilename, gg_cast(CHAR_P, null_pointer_node));
+ }
sv_is_i_o = true;
store_location_stuff("OPEN");
gg_call(VOID,
"__gg__file_open",
gg_get_address_of(file->var_decl_node),
+ field_of_name->var_decl_node
+ ? gg_get_address_of(field_of_name->var_decl_node)
+ : null_pointer_node,
pszFilename,
build_int_cst_type(INT, mode_char),
- quoted_name ? integer_one_node : integer_zero_node,
NULL_TREE);
set_user_status(file);
}
tree there_was_an_error = gg_define_int(0);
for(size_t i=0; i<filenames.size(); i++)
{
- bool quoted_name;
- tree pszFilename = get_the_filename(quoted_name, filenames[i]);
+ tree pszFilename = gg_define_char_star();
+ cbl_field_t *field_of_name = symbol_field_forward(filenames[i]->filename);
+ if( field_of_name->type == FldForward )
+ {
+ // The target of ASSIGN TO was unquoted, but didn't resolve to a
+ // cbl_field_t. This means that the name of the field is an
+ // environment variable that will hold the file name
+ gg_assign(pszFilename,
+ gg_strdup(gg_string_literal(field_of_name->name)));
+ }
+ else
+ {
+ gg_assign(pszFilename, gg_cast(CHAR_P, null_pointer_node));
+ }
gg_assign(there_was_an_error,
gg_bitwise_or(there_was_an_error,
gg_call_expr(
INT,
"__gg__file_remove",
gg_get_address_of(filenames[i]->var_decl_node),
+ field_of_name->var_decl_node
+ ? gg_get_address_of(field_of_name->var_decl_node)
+ : null_pointer_node,
pszFilename,
- quoted_name ? integer_one_node : integer_zero_node,
NULL_TREE)));
set_user_status(filenames[i]);
}
{
size_t nfield = file->keys[key_number].fields[ifield];
cbl_field_t *field = cbl_field_of(symbol_at(nfield));
- combined_length += field->data.capacity;
+ combined_length += field->data.capacity();
}
gg_assign(length, build_int_cst_type(SIZE_T, combined_length));
}
}
void
-parser_module_name( cbl_field_t *tgt, module_type_t type )
+parser_intrinsic_find_string(cbl_field_t *tgt,
+ const cbl_refer_t& haystack,
+ const cbl_refer_t& needle,
+ const cbl_refer_t *after,
+ bool last,
+ bool anycase)
{
SHOW_PARSE
{
SHOW_PARSE_END
}
gg_call(VOID,
- "__gg__module_name",
- gg_get_address_of(tgt->var_decl_node),
- build_int_cst_type(INT, type),
- NULL_TREE);
+ "__gg__find_string",
+ gg_get_address_of( tgt->var_decl_node),
+ gg_get_address_of( haystack.field->var_decl_node),
+ refer_offset( haystack),
+ refer_size_source( haystack),
+ gg_get_address_of( needle.field->var_decl_node),
+ refer_offset( needle),
+ refer_size_source( needle),
+ after ? gg_get_address_of( after->field->var_decl_node)
+ : null_pointer_node,
+ after ? refer_offset(*after) : size_t_zero_node,
+ after ? refer_size_source(*after) : size_t_zero_node,
+ last ?integer_one_node:integer_zero_node,
+ anycase?integer_one_node:integer_zero_node,
+ NULL_TREE
+ );
}
void
-parser_intrinsic_numval_c( cbl_field_t *f,
- cbl_refer_t& input,
- bool locale,
- cbl_refer_t& currency,
- bool anycase,
- bool test_numval_c ) // true for TEST-NUMVAL-C
+parser_intrinsic_convert(cbl_field_t *tgt,
+ const cbl_refer_t& input,
+ convert_type_t src_fmt,
+ unsigned int dst_fmt )
{
- Analyze();
SHOW_PARSE
{
SHOW_PARSE_HEADER
SHOW_PARSE_END
}
- TRACE1
- {
- TRACE1_HEADER
- TRACE1_END
- }
- if( locale || anycase )
- {
+ gg_call(VOID,
+ "__gg__convert",
+ gg_get_address_of(tgt->var_decl_node),
+ gg_get_address_of(input.field->var_decl_node),
+ refer_offset(input),
+ refer_size_source(input),
+ build_int_cst_type(INT, src_fmt),
+ build_int_cst_type(INT, dst_fmt),
+ NULL_TREE);
+ }
+
+void
+parser_module_name( cbl_field_t *tgt, module_type_t type )
+ {
+ SHOW_PARSE
+ {
+ SHOW_PARSE_HEADER
+ SHOW_PARSE_END
+ }
+ gg_call(VOID,
+ "__gg__module_name",
+ gg_get_address_of(tgt->var_decl_node),
+ build_int_cst_type(INT, type),
+ NULL_TREE);
+ }
+
+void
+parser_intrinsic_numval_c( cbl_field_t *f,
+ cbl_refer_t& input,
+ bool locale,
+ cbl_refer_t& currency,
+ bool anycase,
+ bool test_numval_c ) // true for TEST-NUMVAL-C
+ {
+ Analyze();
+ SHOW_PARSE
+ {
+ SHOW_PARSE_HEADER
+ SHOW_PARSE_END
+ }
+ TRACE1
+ {
+ TRACE1_HEADER
+ TRACE1_END
+ }
+ if( locale || anycase )
+ {
gcc_unreachable();
}
if( test_numval_c )
TRACE1_INDENT
TRACE1_REFER("parameter: ", ref1, "")
}
+
+ const charmap_t *charmap = __gg__get_charmap(ref1.field->codeset.encoding);
+ tree stride = gg_cast(LONG, integer_one_node);
+
+ switch(ref1.field->type)
+ {
+ case FldInvalid:
+ case FldGroup:
+ case FldAlphanumeric:
+ case FldNumericDisplay:
+ case FldNumericEdited:
+ case FldAlphaEdited:
+ case FldLiteralA:
+ stride = build_int_cst_type(LONG, charmap->stride());
+ break;
+
+ case FldNumericBinary:
+ case FldFloat:
+ case FldPacked:
+ case FldNumericBin5:
+ case FldLiteralN:
+ case FldClass:
+ case FldConditional:
+ case FldForward:
+ case FldIndex:
+ case FldSwitch:
+ case FldDisplay:
+ case FldPointer:
+ stride = gg_cast(LONG, integer_one_node);
+ break;
+ }
+
+ if( ref1.field->attr & hex_encoded_e )
+ {
+ stride = gg_cast(LONG, integer_one_node);
+ }
+
size_t upper = ref1.field->occurs.bounds.upper
? ref1.field->occurs.bounds.upper : 1;
if( ref1.nsubscript() )
"__gg__int128_to_field",
gg_get_address_of(tgt->var_decl_node),
gg_cast(INT128,
- gg_multiply(refer_size_source(ref1),
- depending_on)),
+ gg_divide(gg_multiply(refer_size_source(ref1),
+ depending_on),
+ stride)),
integer_zero_node,
build_int_cst_type(INT, truncation_e),
null_pointer_node,
"__gg__int128_to_field",
gg_get_address_of(tgt->var_decl_node),
gg_cast(INT128,
- refer_size_source(ref1)),
+ gg_divide(refer_size_source(ref1),
+ stride)),
integer_zero_node,
build_int_cst_type(INT, truncation_e),
null_pointer_node,
"__gg__int128_to_field",
gg_get_address_of(tgt->var_decl_node),
gg_cast(INT128,
- gg_multiply(refer_size_source(ref1),
- build_int_cst_type(SIZE_T, upper))),
+ gg_divide(gg_multiply(refer_size_source(ref1),
+ build_int_cst_type(LONG, upper)),
+ stride)),
integer_zero_node,
build_int_cst_type(INT, truncation_e),
null_pointer_node,
TRACE1_INDENT
TRACE1_REFER("parameter: ", ref1, "")
}
+ gg_get_address_of(tgt->var_decl_node);
+ gg_get_address_of(ref1.field->var_decl_node);
+ refer_offset(ref1);
+ refer_size_source(ref1);
+
gg_call(VOID,
function_name,
gg_get_address_of(tgt->var_decl_node),
{
crv = by_content_e;
gg_assign(location,
- gg_cast(UCHAR_P, build_string_literal(args[i].refer.field->data.capacity,
- args[i].refer.field->data.initial)));
+ gg_cast(UCHAR_P, build_string_literal(args[i].refer.field->data.capacity(),
+ args[i].refer.field->data.original())));
gg_assign(length,
build_int_cst_type( SIZE_T,
- args[i].refer.field->data.capacity));
+ args[i].refer.field->data.capacity()));
}
else
{
{
// All temporaries are SIZE_T
if( args[i].refer.field->type == FldFloat
- && args[i].refer.field->data.capacity == 16 )
+ && args[i].refer.field->data.capacity() == 16 )
{
as_int128 = true;
}
else if( args[i].refer.field->type == FldNumericBin5
&& args[i].refer.field->data.digits == 0
- && args[i].refer.field->data.capacity == 16 )
+ && args[i].refer.field->data.capacity() == 16 )
{
as_int128 = true;
}
// We expect the return value to be a 64-bit or 128-bit integer. How
// we treat that returned value depends on the target.
- // Pick up that value:
+ // Create a variable of the type expected from the called function
returned_value = gg_define_variable(returned_value_type);
+
+ // Actually call the function, assigning the returned value to that
+ // variable:
push_program_state();
gg_assign(returned_value, gg_cast(returned_value_type, call_expr));
pop_program_state();
+ // Now we decided what to do with the returned value, based on its type.
if( returned_value_type == CHAR_P )
{
- tree returned_location = gg_define_uchar_star();
- tree returned_length = gg_define_size_t();
- // we were given a returned::field, so find its location and length:
- gg_assign(returned_location,
- gg_add( member(returned.field->var_decl_node, "data"),
- refer_offset(returned)));
- gg_assign(returned_length,
- gg_cast(TREE_TYPE(returned_length), refer_size_dest(returned)));
-
- // The returned value is a string of nbytes, which by specification
- // has to be at least as long as the returned_length of the target:
- IF( returned_value,
- eq_op,
- gg_cast(returned_value_type, null_pointer_node ) )
- {
- // Somebody was discourteous enough to return a NULL pointer
- // We'll jam in spaces:
- charmap_t *charmap = __gg__get_charmap(returned.field->codeset.encoding);
- int dest_space = charmap->mapped_character(ascii_space);
- gg_memset( returned_location,
- char_nodes[(unsigned char)dest_space],
- returned_length );
- }
- ELSE
- {
- // There is a valid pointer. Do the assignment.
- move_tree(returned.field,
- refer_offset(returned),
- returned_value,
- integer_one_node);
- }
- ENDIF
+ // Let the library do the assignment of the 'char *returned_value' to the
+ // target 'refer returned'
+ gg_call(VOID,
+ "__gg__refer_from_psz",
+ gg_get_address_of(returned.field->var_decl_node),
+ refer_offset(returned),
+ refer_size_dest(returned),
+ returned_value,
+ NULL_TREE);
TRACE1
{
TRACE1_HEADER
}
else
{
+ // Getting here should be impossible; it means we didn't anticipate
+ // the type of the returned value:
cbl_internal_error(
"%s: What in the name of Nero are we doing here?",
__func__);
// for a simple byte-for-byte copy of the data areas:
bool moved = false;
if( destref.field->type == sourceref.field->type
- && destref.field->data.capacity == sourceref.field->data.capacity
+ && destref.field->data.capacity() == sourceref.field->data.capacity()
&& destref.field->data.digits == sourceref.field->data.digits
&& destref.field->data.rdigits == sourceref.field->data.rdigits
&& (destref.field->attr & (signable_e|separate_e|leading_e))
refer_offset(destref)),
gg_add(member(sourceref.field->var_decl_node, "data"),
tsource.offset),
- build_int_cst_type(SIZE_T, sourceref.field->data.capacity));
+ build_int_cst_type(SIZE_T, sourceref.field->data.capacity()));
moved = true;
}
}
}
// We need the data sent to __gg__psz_to_alpha_move to be in the
- // encoding of the destination
+ // encoding of the destination. In accordance with the rules of
+ // cbl_field_t::internalize, the FldLiteralN is in source-code
+ // encoding, so we have to convert.
size_t charsout;
const char *converted = __gg__iconverter(
- sourceref.field->codeset.encoding,
+ DEFAULT_SOURCE_ENCODING,
destref.field->codeset.encoding,
- sourceref.field->data.initial,
- strlen(sourceref.field->data.initial),
+ sourceref.field->data.original(),
+ strlen(sourceref.field->data.original()),
&charsout);
gg_call(VOID,
"__gg__psz_to_alpha_move",
gg_get_address_of(destref.field->var_decl_node),
refer_offset(destref),
refer_size_dest(destref),
- gg_string_literal(converted),
+ build_string_literal(charsout, converted),
build_int_cst_type(SIZE_T, charsout),
NULL_TREE);
moved = true;
SHOW_PARSE_TEXT("mh_source_is_literalN: pointer/index")
}
- if( sourceref.field->data.capacity < 8 )
+ if( sourceref.field->data.capacity() < 8 )
{
// There are too few bytes in sourceref
if( sourceref.field->attr & signable_e )
gg_get_indirect_reference(gg_add(member(sourceref.field->var_decl_node,
"data"),
build_int_cst_type(SIZE_T,
- sourceref.field->data.capacity-1)),
+ sourceref.field->data.capacity()-1)),
integer_zero_node));
IF( gg_bitwise_and(highbyte, build_int_cst_type(UCHAR, 0x80)),
eq_op,
gg_memcpy(gg_add(member(destref.field->var_decl_node, "data"),
refer_offset(destref)),
gg_get_address_of(literalN_value),
- build_int_cst_type(SIZE_T, sourceref.field->data.capacity));
+ build_int_cst_type(SIZE_T, sourceref.field->data.capacity()));
moved = true;
break;
// For now, we are ignoring intermediates:
assert( !(destref.field->attr & intermediate_e) );
- int bytes_needed = std::max(destref.field->data.capacity,
- sourceref.field->data.capacity);
+ int bytes_needed = std::max(destref.field->data.capacity(),
+ sourceref.field->data.capacity());
tree calc_type = tree_type_from_size(bytes_needed,
sourceref.field->attr & signable_e);
- tree dest_type = tree_type_from_size( destref.field->data.capacity,
+ tree dest_type = tree_type_from_size( destref.field->data.capacity(),
destref.field->attr & signable_e);
// Pick up the source data.
}
// __gg__string_to_alpha_edited expects the source string to be in
- // the same encoding as the target:
- size_t len = strlen(sourceref.field->data.initial);
- char *src =
- static_cast<char *>(xmalloc(len+1));
- memcpy( src,
- sourceref.field->data.initial,
- strlen(sourceref.field->data.initial));
+ // the same encoding as the target. The rule in internalize is that
+ // a FldLiteralN::data.initial is left in source-code space, so it
+ // needs to be converted to the destination encoding.
size_t charsout;
- const char *converted = __gg__iconverter(
- sourceref.field->codeset.encoding,
+ const char *converted_ = __gg__iconverter(
+ DEFAULT_SOURCE_ENCODING,
destref.field->codeset.encoding,
- src,
- len,
+ sourceref.field->data.original(),
+ strlen(sourceref.field->data.original()),
&charsout);
+ // Copy converted, because __gg__string_to_alpha_edited might have its
+ // own reasons to use charmap_t, which could mess up the static buffer
+ // used by __gg__iconverter:
+ char *converted = xstrdup(converted_);
gg_call(VOID,
"__gg__string_to_alpha_edited",
gg_add( member(destref.field->var_decl_node, "data"),
refer_offset(destref) ),
build_int_cst_type(INT, destref.field->codeset.encoding),
gg_string_literal(converted),
- build_int_cst_type(INT, len),
+ build_int_cst_type(INT, strlen(converted)),
gg_string_literal(destref.field->data.picture),
NULL_TREE);
- free(src);
moved = true;
+ free(converted);
break;
}
{
tree tdest = gg_add(member(destref.field->var_decl_node, "data"),
refer_offset(destref) );
- switch( destref.field->data.capacity )
+ switch( destref.field->data.capacity() )
{
- // For some reason, using FLOAT128 in the build_pointer_type causes
- // a SEGFAULT. So, we'll use other types with equivalent sizes. I
- // am speculating that the use of floating-point types causes the -O0
- // compilation to move things using the mmx registers. So, I am using
- // intxx types in the hope that they are simpler.
case 4:
{
// The following generated code is the exact equivalent
float_type_of(const cbl_field_t *field)
{
gcc_assert(field->type == FldFloat);
- return float_type_of(field->data.capacity);
+ return float_type_of(field->data.capacity());
}
static tree
case FldNumericEdited:
case FldPacked:
{
- switch( destref.field->data.capacity )
+ switch( destref.field->data.capacity() )
{
case 4:
gg_call(VOID,
// If the destination is bigger than the source, then we can
// do an untested move:
- if( destref.field->data.capacity >= sourceref.field->data.capacity )
+ if( destref.field->data.capacity() >= sourceref.field->data.capacity() )
{
tree dtype = float_type_of(&destref);
tree stype = float_type_of(&sourceref);
else
{
// There are only three possible moves left:
- if(destref.field->data.capacity == 8 )
+ if(destref.field->data.capacity() == 8 )
{
if( size_error )
{
else
{
// The destination has to be float32
- if( sourceref.field->data.capacity == 8 )
+ if( sourceref.field->data.capacity() == 8 )
{
if( size_error )
{
case FldLiteralA:
case FldAlphanumeric:
+ case FldGroup:
{
// Alphanumeric to float is inherently slow. Send it off to the library
break;
{
bool moved = false;
+ charmap_t *charmap_source =
+ __gg__get_charmap(sourceref.field->codeset.encoding);
if( destref.field->type == FldNumericDisplay
&& sourceref.field->type == FldNumericDisplay
&& !(destref.field->attr & scaled_e)
- && !(sourceref.field->attr & scaled_e) )
+ && !(sourceref.field->attr & scaled_e)
+ && charmap_source->stride() == 1
+ && sourceref.field->codeset.encoding == destref.field->codeset.encoding
+ )
{
+ // We can do simple moves of single-byte same-encoding numeric display.
+ // More complex ones get sent to __gg__move
+
Analyze();
// I believe that there are 450 pathways through the following code.
// That's because there are five different valid combination of signable_e,
// nybble is 0xC0 for positive values, and 0xD0 for negative; all other
// digits are 0x70.
- charmap_t *charmap_source =
- __gg__get_charmap(sourceref.field->codeset.encoding);
charmap_t *charmap_dest =
- __gg__get_charmap( destref.field->codeset.encoding);
+ __gg__get_charmap( destref.field->codeset.encoding);
static tree source_sign_loc = gg_define_variable(UCHAR_P,
"..mhnd_sign_loc",
gg_assign(source_sign_loc,
gg_add(source_p,
build_int_cst_type(SIZE_T,
- sourceref.field->data.capacity-1)));
+ sourceref.field->data.capacity()-1)));
break;
case 5:
// signable, not leading, separate
gg_assign(source_sign_loc,
gg_add(source_p,
build_int_cst_type(SIZE_T,
- sourceref.field->data.capacity-1)));
+ sourceref.field->data.capacity()-1)));
break;
case 6:
// signable, leading, not separate
gg_assign(dest_sign_loc,
gg_add(dest_p,
build_int_cst_type(SIZE_T,
- destref.field->data.capacity-1)));
+ destref.field->data.capacity()-1)));
break;
case 5:
// signable, not leading, separate
gg_assign(dest_sign_loc,
gg_add(dest_p,
build_int_cst_type(SIZE_T,
- destref.field->data.capacity-1)));
+ destref.field->data.capacity()-1)));
break;
case 6:
// signable, leading, not separate
{
bool moved = false;
- cbl_figconst_t figconst = cbl_figconst_of( sourceref.field->data.initial);
+ cbl_figconst_t figconst = cbl_figconst_of( sourceref.field->data.original());
if( !figconst
&& !(destref.field->attr & scaled_e)
&& !(destref.field->attr & (intermediate_e ))
&& !(sourceref.field->attr & (intermediate_e ))
+ && sourceref.field->type != FldGroup
&& sourceref.field->type != FldLiteralA
&& sourceref.field->type != FldAlphanumeric
&& sourceref.field->type != FldNumericEdited
const TREEPLET &tsrc)
{
bool retval = false;
- if( sourceref.field->type == FldGroup && !(destref.field->attr & rjust_e) )
+ charmap_t *charmap = __gg__get_charmap(destref.field->codeset.encoding);
+ if( sourceref.field->type == FldGroup && !(destref.field->attr & rjust_e)
+ && sourceref.field->codeset.encoding == destref.field->codeset.encoding
+ && charmap->stride() == 1)
{
Analyze();
// We are moving a group to a something. The rule here is just move as
ELSE
{
// There are too-few source bytes:
- charmap_t *charmap = __gg__get_charmap(destref.field->codeset.encoding);
int dest_space = charmap->mapped_character(ascii_space);
gg_memset(tdest, build_int_cst_type(INT, dest_space), dbytes);
gg_memcpy(tdest, tsource, sbytes);
// construct a string with the same number of characters as the source, but
// in the target variable's encoding.
- // We will then call a library routine that will be in charge of trimming
- // and space filling.
+ // We will then call a library routine that will be in charge of run-time
+ // trimming or space filling, as necessary.
cbl_encoding_t encoding_dest = destref.field->codeset.encoding;
charmap_t *charmap_dest = __gg__get_charmap(encoding_dest);
static char *buffer = NULL;
static size_t buffer_size = 0;
- size_t source_length = sourceref.field->data.capacity;
+ size_t source_length;
+ size_t dest_length;
+ if( sourceref.field->attr & hex_encoded_e )
+ {
+ // Hex-encoded data is moved as-is
+ source_length = sourceref.field->data.capacity();
+ dest_length = std::min(source_length,
+ static_cast<size_t>(destref.field->data.capacity()));
+ }
+ else
+ {
+ // Otherwise, data.initial prevails:
+ size_t source_based_on_strlen = strlen(sourceref.field->data.original());
+ size_t source_based_on_capacity = sourceref.field->data.capacity() /
+ sourceref.field->codeset.stride() ;
+ source_length = std::max( source_based_on_strlen ,
+ source_based_on_capacity );
+ dest_length = source_length * charmap_dest->stride();
+ }
- if( buffer_size < source_length )
+ if( buffer_size < dest_length )
{
- buffer_size = source_length;
- buffer = static_cast<char *>(xrealloc(buffer, source_length));
+ buffer_size = dest_length;
+ buffer = static_cast<char *>(xrealloc(buffer, buffer_size));
}
gcc_assert(buffer);
- cbl_figconst_t figconst = cbl_figconst_of( sourceref.field->data.initial);
+ cbl_figconst_t figconst = cbl_figconst_of( sourceref.field->data.original());
+ size_t outlength;
if( figconst )
{
// We are going to fill 'buffer' with a solid run of the figurative
{
case normal_value_e :
// This is not possible, it says here in the fine print.
- abort();
+ gcc_unreachable();
break;
case low_value_e :
const_char = charmap_dest->low_value_character();
}
else
{
- // We are going to convert the source string to the destination codeset,
- // and then copy it to 'buffer', trimming if necessary, and space-filling
- // to the right if necessary:
- cbl_encoding_t encoding_src = sourceref.field->codeset.encoding;
-
- size_t outlength;
- const char *source_string = __gg__iconverter( encoding_src,
- encoding_dest,
- sourceref.field->data.initial,
- source_length,
- &outlength );
- // Copy over the converted string
- memcpy( buffer,
- source_string,
- outlength );
+ if( sourceref.field->attr & hex_encoded_e )
+ {
+ // hex_encoded data goes as is:
+ memcpy(buffer, sourceref.field->data.original(), dest_length);
+ outlength = dest_length;
+ }
+ else
+ {
+ // We are going to convert the source string to the destination
+ // codeset, and then copy it to 'buffer', trimming if necessary, and
+ // space-filling to the right if necessary:
+ const char *source_string =
+ __gg__iconverter(
+ sourceref.field->codeset.default_encodings.source->type,
+ encoding_dest,
+ sourceref.field->data.original(),
+ source_length,
+ &outlength );
+ if( outlength > dest_length )
+ {
+ outlength = dest_length;
+ }
+ // Copy over the converted string
+ memcpy( buffer,
+ source_string,
+ outlength );
+ }
}
// If the source is flagged ALL, or if we are setting the destination to
// a figurative constant, pass along the ALL bit:
int rounded_parameter = rounded
- | ((sourceref.all || figconst ) ? REFER_ALL_BIT : 0);
+ | ((sourceref.all || figconst ) ? REFER_ALL_BIT : 0);
if( size_error )
{
refer_offset(destref),
refer_size_dest(destref),
build_int_cst_type(INT, rounded_parameter),
- build_string_literal(source_length,
+ build_string_literal(outlength,
buffer),
- build_int_cst_type( SIZE_T, source_length),
+ build_int_cst_type( SIZE_T, outlength),
NULL_TREE));
}
else
refer_offset(destref),
refer_size_dest(destref),
build_int_cst_type(INT, rounded_parameter),
- build_string_literal(source_length,
+ build_string_literal(outlength,
buffer),
- build_int_cst_type( SIZE_T, source_length),
+ build_int_cst_type( SIZE_T, outlength),
NULL_TREE);
}
if( destref.refmod.from
first_time = false;
gg_assign(stash, gg_cast(UCHAR_P, gg_malloc(stash_size)));
}
- if( stash_size < destref.field->data.capacity )
+ if( stash_size < destref.field->data.capacity() )
{
- stash_size = destref.field->data.capacity;
+ stash_size = destref.field->data.capacity();
gg_assign(stash, gg_cast(UCHAR_P, gg_realloc(stash, stash_size)));
}
st_data = qualified_data_location(destref);
size_error);
}
- if( !moved && sourceref.field->type == FldLiteralA)
+ if( !moved )
{
moved = mh_source_is_literalA(destref,
sourceref,
return pow10;
}
-static
-char *
-binary_initial(cbl_field_t *field)
- {
- // This routine returns an xmalloced buffer designed to replace the
- // data.initial member of the incoming field
- char *retval = NULL;
-
- uint32_t capacity;
- uint32_t ddigits;
- int32_t drdigits;
- uint64_t attr;
- FIXED_WIDE_INT(128) value128 = dirty_to_binary(field->data.original(),
- capacity,
- ddigits,
- drdigits,
- attr);
- int scaled_rdigits = get_scaled_rdigits(field);
-
- int i = field->data.rdigits;
- while( i<0 )
- {
- value128 = value128/10;
- i += 1;
- }
-
- // We take the digits of value128, and put them into ach. We line up
- // the rdigits, and we truncate the string after desired_digits
- while(drdigits < scaled_rdigits)
- {
- value128 *= 10;
- drdigits += 1;
- }
- while(drdigits > scaled_rdigits)
- {
- value128 = value128 / 10;
- drdigits -= 1;
- }
-
- retval = static_cast<char *>(xmalloc(field->data.capacity));
- gcc_assert(retval);
- switch(field->data.capacity)
- {
- tree type;
- case 1:
- case 2:
- case 4:
- case 8:
- case 16:
- type = build_nonstandard_integer_type ( field->data.capacity
- * BITS_PER_UNIT, 0);
- native_encode_wide_int (type, value128, PTRCAST(unsigned char, retval),
- field->data.capacity);
- break;
- default:
- fprintf(stderr,
- "Trouble in binary_initial at %s() %s:%d\n",
- __func__,
- __FILE__,
- __LINE__);
- abort();
- break;
- }
-
- return retval;
- }
-
-static void
-digits_from_int128( char *ach,
- cbl_field_t *field,
- uint32_t desired_digits,
- FIXED_WIDE_INT(128) value128, // cppcheck-suppress unknownMacro
- int32_t rdigits)
- {
- if( value128 < 0 )
- {
- value128 = -value128;
- }
-
- // 'rdigits' are the number of rdigits in value128.
-
- int scaled_rdigits = get_scaled_rdigits(field);
-
- int i = field->data.rdigits;
- while( i<0 )
- {
- value128 = value128/10;
- i += 1;
- }
-
- // We take the digits of value128, and put them into ach. We line up
- // the rdigits, and we truncate the string after desired_digits
- while(rdigits < scaled_rdigits)
- {
- value128 *= 10;
- rdigits += 1;
- }
- while(rdigits > scaled_rdigits)
- {
- value128 = value128 / 10;
- rdigits -= 1;
- }
- char conv[128];
- print_dec (value128, conv, SIGNED);
- size_t len = strlen(conv);
-
- if( len<desired_digits )
- {
- memset(ach, ascii_0, desired_digits - len);
- strcpy(ach+desired_digits - len, conv);
- }
- else
- {
- strcpy(ach, conv + len-desired_digits);
- }
- }
-
-#if 0
-// This routine was replaced with digits_from_int1289. However, I am choosing
-// to keep it around for a while, because it is a master class in manipulating
-// REAL_VALUE_TYPE and FIXED_WIDE_INT
-
-static void
-digits_from_float128(char *retval, cbl_field_t *field, size_t width, int rdigits, REAL_VALUE_TYPE value)
- {
- char ach[128];
-
- // We need to adjust value so that it has no decimal places
- if( rdigits )
- {
- REAL_VALUE_TYPE pow10 = real_powi10 (rdigits);
- real_arithmetic (&value, MULT_EXPR, &value, &pow10);
- }
- // We need to make sure that the resulting string will fit into
- // a number with 'digits' digits
- REAL_VALUE_TYPE pow10 = real_powi10 (field->data.digits);
- mpfr_t m0, m1;
-
- mpfr_inits2 (FLOAT_MODE_FORMAT (TYPE_MODE (float128_type_node))->p, m0, m1,
- NULL);
- mpfr_from_real (m0, &value, MPFR_RNDN);
- mpfr_from_real (m1, &pow10, MPFR_RNDN);
- mpfr_clear_flags ();
- mpfr_fmod (m0, m0, m1, MPFR_RNDN);
- real_from_mpfr (&value, m0,
- REAL_MODE_FORMAT (TYPE_MODE (float128_type_node)),
- MPFR_RNDN);
- real_convert (&value, TYPE_MODE (float128_type_node), &value);
- mpfr_clears (m0, m1, NULL);
- real_roundeven (&value, TYPE_MODE (float128_type_node), &value);
-
- bool fail = false;
- FIXED_WIDE_INT(128) i
- = FIXED_WIDE_INT(128)::from (real_to_integer (&value, &fail, 128), SIGNED);
-
- // We convert it to a integer string of digits:
- print_dec (i, ach, SIGNED);
-
- gcc_assert( strlen(ach) <= field->data.digits );
- if( strlen(ach) < width )
- {
- memset(retval, '0', width-strlen(ach) );
- }
- strcpy(retval + (width-strlen(ach)), ach);
- }
-#endif
-
-static char *
-initial_from_initial(cbl_field_t *field)
+static tree
+convert_data_initial(cbl_field_t * field)
{
- Analyze();
- // This routine returns an xmalloced buffer that is intended to replace the
- // data.initial member of the incoming field.
-
- //fprintf(stderr, " %s\n", field->name);
-
- char *retval = NULL;
-
- // Let's handle the possibility of a figurative constant
- cbl_figconst_t figconst = cbl_figconst_of(field->data.initial);
- if( figconst )
- {
- charmap_t *charmap = __gg__get_charmap(field->codeset.encoding);
- int const_char = charmap->figconst_character(figconst);
- bool set_return = figconst != zero_value_e;
- if( !set_return )
- {
- // The figconst is zero
- switch(field->type)
- {
- case FldGroup:
- case FldAlphanumeric:
- set_return = true;
- break;
-
- default:
- break;
- }
- }
- if( set_return )
- {
- retval = static_cast<char *>(xmalloc(field->data.capacity+1));
- gcc_assert(retval);
- memset(retval, const_char, field->data.capacity);
- retval[field->data.capacity] = '\0';
- return retval;
- }
- }
-
- // ??? Refactoring the cases below that do not need 'value' would
- // make this less ugly
- REAL_VALUE_TYPE value;
- if( field->data.etc_type == cbl_field_data_t::value_e )
- value = TREE_REAL_CST (field->data.value_of ());
-
-#if 0
- int rdigits;
- // There is always the infuriating possibility of a P-scaled number
- if( field->attr & scaled_e )
- {
- rdigits = 0;
- if( field->data.rdigits >= 0 )
- {
- // Suppose our PIC is PPPPPP999, meaning that field->digits
- // is 3, and field->rdigits is 6.
-
- // Our result has no decimal places, and we have to multiply the value
- // by 10**9 to get the significant bdigits where they belong.
-
- REAL_VALUE_TYPE pow10
- = real_powi10 (field->data.digits + field->data.rdigits);
- real_arithmetic (&value, MULT_EXPR, &value, &pow10);
- }
- else
- {
- // Suppose our target is 999PPPPPP, so there is a ->digits
- // of 3 and field->rdigits of -6.
-
- // If our caller gave us 123000000, we need to divide
- // it by 1000000 to line up the 123 with where we want it to go:
-
- REAL_VALUE_TYPE pow10 = real_powi10 (-field->data.rdigits);
- real_arithmetic (&value, RDIV_EXPR, &value, &pow10);
- }
- // Either way, we now have everything aligned for the remainder of the
- // processing to work:
- }
- else
- {
- // Not P-scaled
- rdigits = field->data.rdigits;
- }
-#endif
-
- switch(field->type)
- {
- case FldNumericBin5:
- case FldIndex:
- retval = binary_initial(field);
- break;
-
- case FldNumericBinary:
- {
- retval = binary_initial(field);
- size_t left = 0;
- size_t right = field->data.capacity - 1;
- while(left < right)
- {
- std::swap(retval[left++], retval[right--]);
- }
- break;
- }
-
- case FldNumericDisplay:
- {
- charmap_t *charmap = __gg__get_charmap(field->codeset.encoding);
-
- retval = static_cast<char *>(xmalloc(field->data.capacity));
- gcc_assert(retval);
- char *pretval = retval;
- char ach[128];
-
- bool negative;
- if( real_isneg (&value) )
- {
- negative = true;
- value = real_value_negate (&value);
- }
- else
- {
- negative = false;
- }
-
- // Convert the data.initial to a __int128
- uint32_t capacity;
- uint32_t ddigits;
- int32_t drdigits;
- uint64_t attr;
- FIXED_WIDE_INT(128) value128 = dirty_to_binary(field->data.initial,
- capacity,
- ddigits,
- drdigits,
- attr);
- digits_from_int128(ach, field, field->data.digits, value128, drdigits);
-
- const char *digits = ach;
- if( (field->attr & signable_e)
- && (field->attr & separate_e)
- && (field->attr & leading_e ) )
- {
- // This zoned decimal value is signable, separate, and leading.
- if( negative )
- {
- *pretval++ = charmap->mapped_character(ascii_minus);
- }
- else
- {
- *pretval++ = charmap->mapped_character(ascii_plus);
- }
- }
- for(size_t i=0; i<field->data.digits; i++)
- {
- // Start by assuming it's an value that can't be signed
- *pretval++ = charmap->mapped_character(ascii_0) + ((*digits++) & 0x0F);
- }
- if( (field->attr & signable_e)
- && (field->attr & separate_e)
- && !(field->attr & leading_e ) )
- {
- // The value is signable, separate, and trailing
- if( negative )
- {
- *pretval++ = charmap->mapped_character(ascii_minus);
- }
- else
- {
- *pretval++ = charmap->mapped_character(ascii_plus);
- }
- }
- if( (field->attr & signable_e)
- && !(field->attr & separate_e) )
- {
- // This value is signable, and not separate. So, the sign information
- // goes into the first or last byte:
- char *sign_location = field->attr & leading_e ?
- retval : retval + field->data.digits - 1 ;
- *sign_location = charmap->set_digit_negative(*sign_location,
- negative);
- }
- break;
- }
-
- case FldPacked:
- {
- retval = static_cast<char *>(xmalloc(field->data.capacity));
- gcc_assert(retval);
- char *pretval = retval;
- char ach[128];
-
- bool negative;
- if( real_isneg (&value) )
- {
- negative = true;
- value = real_value_negate (&value);
- }
- else
- {
- negative = false;
- }
-
- // For COMP-6 (flagged by separate_e), the number of required digits is
- // twice the capacity.
-
- // For COMP-3, the number of digits is 2*capacity minus 1, because the
- // the final "digit" is a sign nybble.
-
- size_t ndigits = (field->attr & separate_e)
- ? field->data.capacity * 2
- : field->data.capacity * 2 - 1;
- uint32_t capacity;
- uint32_t ddigits;
- int32_t drdigits;
- uint64_t attr;
- FIXED_WIDE_INT(128) value128 = dirty_to_binary(field->data.initial,
- capacity,
- ddigits,
- drdigits,
- attr);
- digits_from_int128(ach, field, ndigits, value128, drdigits);
-
- const char *digits = ach;
- for(size_t i=0; i<ndigits; i++)
- {
- if( !(i & 0x01) )
- {
- *pretval = ((*digits++) & 0x0F)<<4;;
- }
- else
- {
- *pretval++ += (*digits++) & 0x0F;
- }
- }
- if( !(field->attr & separate_e) )
- {
- // This is COMP-3, so put in a sign nybble
- if( (field->attr & signable_e) )
- {
- if( negative )
- {
- *pretval++ += 0x0D; // Means signable and negative
- }
- else
- {
- *pretval++ += 0x0C; // Means signable and non-negative
- }
- }
- else
- {
- *pretval++ += 0x0F; // Means not signable
- }
- }
- break;
- }
+ // This routine returns a tree from field->data.initial, extended with
+ // a NUL on the end.
+ size_t buffer_size = field->data.capacity() + field->codeset.stride();
+ char *buffer = static_cast<char *>(xmalloc(buffer_size));
+ gcc_assert(buffer);
- case FldGroup:
- case FldAlphanumeric:
- case FldLiteralA:
- case FldAlphaEdited:
- {
- if( field->data.initial )
- {
- retval = static_cast<char *>(xmalloc(field->data.capacity+1));
- gcc_assert(retval);
- if( field->attr & hex_encoded_e)
- {
- memcpy(retval, field->data.initial, field->data.capacity);
- }
- else
- {
- size_t length = field->data.capacity;
- memcpy(retval, field->data.initial, length);
- if( strlen(field->data.initial) < length )
- {
- // If this is true, then the initial string must've been Z'xyz'
- retval[strlen(field->data.initial)] = '\0';
- }
- }
- retval[field->data.capacity] = '\0';
- }
- break;
- }
+ size_t nbytes = field->data.capacity();
- case FldNumericEdited:
- {
- charmap_t *charmap = __gg__get_charmap(field->codeset.encoding);
- retval = static_cast<char *>(xmalloc(field->data.capacity+1));
- gcc_assert(retval);
- if( field->data.initial && field->attr & quoted_e )
- {
- // What the programmer says the value is, the value becomes, no
- // matter how wrong it might be.
- size_t length = std::min( (size_t)field->data.capacity,
- strlen(field->data.initial));
- for(size_t i=0; i<length; i++)
- {
- retval[i] = field->data.initial[i];
- }
- if( length < (size_t)field->data.capacity )
- {
- memset( retval+length,
- charmap->mapped_character(ascii_space),
- (size_t)field->data.capacity - length);
- }
- }
- else
- {
- // It's not a quoted string, so we use data.value:
- bool negative;
- if( real_isneg (&value) )
- {
- negative = true;
- value = real_value_negate (&value);
- }
- else
- {
- negative = false;
- }
+ const char *converted = field->data.initial;
- char ach[128];
- memset(ach, 0, sizeof(ach));
- memset(retval, 0, field->data.capacity);
+ // Copy the converted bytes
+ gcc_assert(nbytes < buffer_size);
+ memcpy(buffer, converted, nbytes);
+ charmap_t *charmap = __gg__get_charmap(field->codeset.encoding);
- if( (field->attr & blank_zero_e) && real_iszero (&value) )
- {
- memset( retval,
- charmap->mapped_character(ascii_space),
- field->data.capacity);
- }
- else
- {
- size_t ndigits = field->data.capacity;
- uint32_t capacity;
- uint32_t ddigits;
- int32_t drdigits;
- uint64_t attr;
- FIXED_WIDE_INT(128) value128 = dirty_to_binary(field->data.original(),
- capacity,
- ddigits,
- drdigits,
- attr);
- digits_from_int128(ach, field, ndigits, value128, drdigits);
-
- // __gg__string_to_numeric_edited operates in ASCII space:
- __gg__string_to_numeric_edited( retval,
- ach,
- field->data.rdigits,
- negative,
- field->data.picture);
- // So now we convert it to the target encoding:
- size_t nbytes;
- const char *converted = __gg__iconverter(DEFAULT_SOURCE_ENCODING,
- field->codeset.encoding,
- retval,
- strlen(retval),
- &nbytes);
- strcpy(retval, converted);
- }
- }
- break;
- }
+ // Tack on a final NUL
+ charmap->putch(0, buffer, nbytes);
- case FldFloat:
- {
- retval = static_cast<char *>(xmalloc(field->data.capacity));
- gcc_assert(retval);
- switch( field->data.capacity )
- {
- case 4:
- value = real_value_truncate (TYPE_MODE (FLOAT), value);
- native_encode_real (SCALAR_FLOAT_TYPE_MODE (FLOAT), &value,
- PTRCAST(unsigned char, retval), 4, 0);
- break;
- case 8:
- value = real_value_truncate (TYPE_MODE (DOUBLE), value);
- native_encode_real (SCALAR_FLOAT_TYPE_MODE (DOUBLE), &value,
- PTRCAST(unsigned char, retval), 8, 0);
- break;
- case 16:
- value = real_value_truncate (TYPE_MODE (FLOAT128), value);
- native_encode_real (SCALAR_FLOAT_TYPE_MODE (FLOAT128), &value,
- PTRCAST(unsigned char, retval), 16, 0);
- break;
- }
- break;
- }
-
- case FldLiteralN:
- {
- // This requires annotation.
-
- // The compiler originally used ASCII for field->data.initial. Later we
- // expanded the field with the addition of the codeset.encoding
- // For consistency in the parser processing, the FldLiteralN is arriving
- // with the Object-Computer's character encoding, and field->data.initial
- // is showing up encoded.
-
- // But on the run-time side, if the initial string is needed, it is
- // invariably more useful in ASCII. Consider converting that string to
- // a floating-point value, for example.
-
- // So, we are going to convert the data.initial string back to ASCII
- // here. Later on, when we establish the run-time encoding, we will
- // check for FldLiteralN and set that to ASCII as well. See
- // actually_create_the_static_field().
-
- size_t nbytes;
- const char *converted = __gg__iconverter(field->codeset.encoding,
- DEFAULT_SOURCE_ENCODING,
- field->data.initial,
- strlen(field->data.initial),
- &nbytes);
- retval = static_cast<char *>(xmalloc(strlen(field->data.initial)+1));
- gcc_assert(retval);
- strcpy(retval, converted);
- break;
- }
-
- default:
- break;
- }
+ tree retval = build_string_literal( buffer_size,
+ buffer);
+ free(buffer);
return retval;
}
CONSTRUCTOR_APPEND_ELT( CONSTRUCTOR_ELTS(constr),
next_field,
build_int_cst_type( SIZE_T,
- new_var->data.capacity) );
+ new_var->data.capacity()) );
next_field = TREE_CHAIN(next_field);
// SIZE_T, "allocated",
CONSTRUCTOR_APPEND_ELT( CONSTRUCTOR_ELTS(constr),
next_field,
build_int_cst_type( SIZE_T,
- new_var->data.capacity) );
+ new_var->data.capacity()) );
}
else
{
next_field = TREE_CHAIN(next_field);
// SIZE_T, "offset",
-
- if( new_var->type == FldAlphanumeric &&
- new_var->attr & intermediate_e )
- {
- // This is in support of FUNCTION TRIM. That function can make the capacity
- // of the intermediate target smaller so that TRIM("abc ") returns
- // "abc". By putting the capacity here for such variables, we have a
- // mechanism for restoring it the capacity to the original.
- CONSTRUCTOR_APPEND_ELT( CONSTRUCTOR_ELTS(constr),
- next_field,
- build_int_cst_type(SIZE_T, new_var->data.capacity));
- }
- else
- {
- CONSTRUCTOR_APPEND_ELT( CONSTRUCTOR_ELTS(constr),
+ CONSTRUCTOR_APPEND_ELT( CONSTRUCTOR_ELTS(constr),
next_field,
build_int_cst_type(SIZE_T, new_var->offset) );
- }
next_field = TREE_CHAIN(next_field);
next_field = TREE_CHAIN(next_field);
// CHAR_P, "initial",
- if( length_of_initial_string == 0 )
+ if( length_of_initial_string == 0 || !new_var->data.has_initial_value() )
{
CONSTRUCTOR_APPEND_ELT( CONSTRUCTOR_ELTS(constr),
next_field,
// INT, "encoding",
// For FldLiteralN we force the encoding to be ASCII.
// See initial_from_initial() for an explanation.
+ // For FldClass, we force the encoding to be UTF32; see
+
+ cbl_encoding_t encoding;
+ if( new_var->type == FldLiteralN )
+ {
+ encoding = new_var->codeset.default_encodings.source->type;
+ }
+ else if( new_var->type == FldClass )
+ {
+ encoding = DEFAULT_32_ENCODING;
+ }
+ else
+ {
+ encoding = new_var->codeset.encoding;
+ }
+
CONSTRUCTOR_APPEND_ELT(CONSTRUCTOR_ELTS(constr),
next_field,
- build_int_cst_type(INT,
- new_var->type == FldLiteralN ?
- DEFAULT_SOURCE_ENCODING
- : new_var->codeset.encoding));
+ build_int_cst_type(INT, encoding));
next_field = TREE_CHAIN(next_field);
// INT, "alphabet",
// to do a linear search of the symbol table for each symbol
if( !our_index
- && new_var->type != FldLiteralN
+ && ! new_var->is_numeric_constant()
&& !(new_var->attr & intermediate_e))
{
our_index = field_index(new_var);
return new_var_decl;
}
-#if 1
static void
psa_FldLiteralA(struct cbl_field_t *field )
{
}
// We are constructing a completely static constant structure. We know the
- // capacity. We'll create it from the data.initial. The cblc_field_t:data
- // will be a copy of the .initial data. The var_decl_node will be an ordinary
- // cblc_field_t, which means that at this point in time, a FldLiteralA can be
- // used anywhere a FldGroup or FldAlphanumeric can be used. We are counting
- // on the parser not allowing a FldLiteralA to be a left-hand-side variable.
-
- // First make room
- static size_t buffer_size = 1024;
- static char *buffer = static_cast<char *>(xmalloc(buffer_size));
- if( buffer_size < field->data.capacity+1 )
- {
- buffer_size = field->data.capacity+1;
- buffer = static_cast<char *>(xrealloc(buffer, buffer_size));
- }
- gcc_assert(buffer);
-
- cbl_figconst_t figconst = cbl_figconst_of( field->data.initial );
- gcc_assert(figconst == normal_value_e);
-
- memcpy(buffer, field->data.initial, field->data.capacity);
- buffer[field->data.capacity] = '\0';
+ // capacity. We'll create it from the data.initial.
- // We have the original nul-terminated text at data.initial. We have a
- // copy of it in buffer[] in the internal codeset.
+ tree converted = convert_data_initial(field);
static const char name_base[] = "_literal_a_";
- // We will reuse a single static structure for each string
- static std::unordered_map<std::string, int> seen_before;
-
- std::string field_string(buffer);
-
-#if 0
- /* This code is suppoed to re-use literals, and seems to work just fine in
- x86_64-linux and on an Apple aarch64 M1 Macbook Pro. But on an M1
- mini, using -Os optimization, attempts were made in the generated
- assembly language to define _literal_a_1 more than once.
-
- I didn't know how to try to track this one down, so I decided simply to
- punt by removing the code.
+ static int nvar = 0;
+ nvar += 1;
- I am leaving the code here because of a conviction that it someday should
- be tracked down. */
-
- std::unordered_map<std::string, int>::const_iterator it =
- seen_before.find(field_string);
-
- if( it != seen_before.end() )
- {
- // We've seen that string before.
- int nvar = it->second;
- char ach[32];
- sprintf(ach, "%s%d", name_base, nvar);
- field->var_decl_node = gg_declare_variable(cblc_field_type_node,
- ach,
- NULL,
- vs_file_static);
- }
- else
-#endif
- {
- // We have not seen that string before
- static int nvar = 0;
- nvar += 1;
- seen_before[field_string] = nvar;
-
- char ach[32];
- sprintf(ach, "%s%d", name_base, nvar);
- field->var_decl_node = gg_define_variable( cblc_field_type_node,
- ach,
- vs_file_static);
- actually_create_the_static_field(
- field,
- build_string_literal(field->data.capacity,
- buffer),
- field->data.capacity,
- field->data.initial,
- NULL_TREE,
- field->var_decl_node);
- TREE_READONLY(field->var_decl_node) = 1;
- TREE_USED(field->var_decl_node) = 1;
- TREE_STATIC(field->var_decl_node) = 1;
- DECL_PRESERVE_P (field->var_decl_node) = 1;
- }
+ char ach[32];
+ sprintf(ach, "%s%d", name_base, nvar);
+ field->var_decl_node = gg_define_variable( cblc_field_type_node,
+ ach,
+ vs_file_static);
+ actually_create_the_static_field(
+ field,
+ converted,
+ strlen(field->data.original())+1,
+ field->data.original(),
+ NULL_TREE,
+ field->var_decl_node);
+ TREE_READONLY(field->var_decl_node) = 1;
+ TREE_USED(field->var_decl_node) = 1;
+ TREE_STATIC(field->var_decl_node) = 1;
+ DECL_PRESERVE_P (field->var_decl_node) = 1;
}
-#endif
void
parser_local_add(struct cbl_field_t *new_var )
if( new_var->level == LEVEL01 || new_var->level == LEVEL77)
{
// We need to allocate memory on the stack for this variable
- tree array_type = build_array_type_nelts(UCHAR, new_var->data.capacity);
+ tree array_type = build_array_type_nelts(UCHAR, new_var->data.capacity());
tree data_decl_node = gg_define_variable( array_type,
NULL,
vs_stack);
Analyze();
SHOW_PARSE
{
- do
- {
- fprintf(stderr, "( %d ) %s:", CURRENT_LINE_NUMBER, __func__);
- }
- while(0);
+ char ach[1024];
+ SHOW_PARSE_HEADER
- fprintf(stderr, " %2.2u %s<%s> off:" HOST_SIZE_T_PRINT_UNSIGNED " "
+ sprintf(ach, " %2.2u %s<%s> off:" HOST_SIZE_T_PRINT_UNSIGNED " "
"msiz:%u cap:%u dig:%u rdig:%d attr:0x" HOST_SIZE_T_PRINT_HEX_PURE " loc:%p",
new_var->level,
new_var->name,
cbl_field_type_str(new_var->type),
(fmt_size_t)new_var->offset,
new_var->data.memsize,
- new_var->data.capacity,
+ new_var->data.capacity(),
new_var->data.digits,
new_var->data.rdigits,
(fmt_size_t)new_var->attr,
static_cast<void*>(new_var));
+ SHOW_PARSE_TEXT(ach)
if( is_table(new_var) )
{
- fprintf(stderr," OCCURS:" HOST_SIZE_T_PRINT_DEC,
+ sprintf(ach, " OCCURS:" HOST_SIZE_T_PRINT_DEC,
(fmt_size_t)new_var->occurs.ntimes());
+ SHOW_PARSE_TEXT(ach)
}
const cbl_field_t *parent = parent_of(new_var);
if( parent )
{
- fprintf(stderr,
+ sprintf(ach,
" parent:(" HOST_SIZE_T_PRINT_DEC ")%s",
(fmt_size_t)new_var->parent,
parent->name);
+ SHOW_PARSE_TEXT(ach)
}
else
{
const symbol_elem_t *e = symbol_at(parent_index);
if( e->type == SymFile )
{
- fprintf(stderr,
+ sprintf(ach,
" parent_file:(" HOST_SIZE_T_PRINT_DEC ")%s",
(fmt_size_t)new_var->parent,
e->elem.file.name);
+ SHOW_PARSE_TEXT(ach)
if( e->elem.file.attr & external_e )
{
- fprintf(stderr, " (flagged external)");
+ sprintf(ach, " (flagged external)");
+ SHOW_PARSE_TEXT(ach)
}
}
}
if( symbol_redefines(new_var) )
{
- fprintf(stderr,
+ sprintf(ach,
" redefines:(%p)%s",
static_cast<void*>(symbol_redefines(new_var)),
symbol_redefines(new_var)->name);
+ SHOW_PARSE_TEXT(ach)
}
+ if( new_var->type == FldGroup
+ || new_var->type == FldAlphanumeric
+ || new_var->type == FldNumericEdited
+ || new_var->type == FldAlphaEdited
+ || new_var->type == FldLiteralA
+ )
+ {
+ if( new_var->data.initial && new_var->data.capacity() )
+ {
+ SHOW_PARSE_INDENT
+ for(size_t i=0; i<new_var->data.capacity(); i++)
+ {
+ fprintf(stderr, "%2.2X ", static_cast<unsigned char>(new_var->data.initial[i]));
+ }
+ }
+ }
+ if( new_var->data.original() && strlen(new_var->data.original()) )
+ {
+ SHOW_PARSE_INDENT
+ sprintf(ach,
+ "\"%s\" (%d)",
+ new_var->data.original(),
+ static_cast<int>(strlen(new_var->data.original())));
+ SHOW_PARSE_TEXT(ach);
+ }
SHOW_PARSE_END
}
if( new_var->level == 1 && new_var->occurs.bounds.upper )
{
- if( new_var->data.memsize < new_var->data.capacity * new_var->occurs.bounds.upper )
+ if( new_var->data.memsize < new_var->data.capacity() * new_var->occurs.bounds.upper )
{
cbl_internal_error("LEVEL 01 (%s) OCCURS "
"has insufficient data.memsize", new_var->name);
{
gg_free(member(new_var, "data"));
gg_assign(member(new_var, "data"),
- gg_cast(UCHAR_P, gg_malloc(new_var->data.capacity)));
+ gg_cast(UCHAR_P, gg_malloc(new_var->data.capacity())));
gg_assign(member(new_var, "allocated"),
- build_int_cst_type(SIZE_T, new_var->data.capacity));
+ build_int_cst_type(SIZE_T, new_var->data.capacity()));
}
ELSE
{
}
size_t length_of_initial_string = 0;
- char *new_initial = NULL;
+ const char *new_initial = NULL;
// Make sure we have a new variable to work with.
if( !new_var )
TRACE1_END
}
- if( is_table(new_var) && new_var->data.capacity == 0)
+ if( is_table(new_var) && new_var->data.capacity() == 0)
{
cbl_internal_error(
"%s: %d %s is a table, but it improperly has a capacity of zero",
"%<var_decl_node%>", __func__, new_var->name);
}
- switch( new_var->type )
+ switch( new_var->type ) // Trap_here for ordinary variables.
{
static int counter=1;
char ach[2*sizeof(cbl_name_t)];
if( new_var->type == FldNumericBinary
|| new_var->type == FldNumericBin5 )
{
- switch( new_var->data.capacity )
+ switch( new_var->data.capacity() )
{
case 1:
case 2:
"%s is type %s and has capacity %u\n",
new_var->name,
cbl_field_type_str(new_var->type),
- new_var->data.capacity);
+ new_var->data.capacity());
gcc_unreachable();
break;
}
size_t level_88_string_size = 0;
char *level_88_string = NULL;
+ char *class_string = NULL;
if( ancestor )
{
- level_88_string = get_level_88_domain(ancestor->data.capacity, new_var, level_88_string_size);
+ level_88_string = get_level_88_domain(ancestor->data.capacity(),
+ new_var,
+ level_88_string_size);
+ if( level_88_string )
+ {
+ // At this point, the string is in source_code encoding, no matter what
+ // the variable's encoding might be. In the run-time, we will be doing
+ // any comparisons of text strings using UTF32 (because that's how we
+ // handle somebody specifying a UTF-8 exec-charset.) Rather than
+ // convert this string at run-time, we convert it here:
+ size_t converted_length;
+ const char *converted = __gg__iconverter(
+ new_var->codeset.default_encodings.source->type,
+ DEFAULT_32_ENCODING,
+ level_88_string,
+ level_88_string_size, // Convert the NUL
+ &converted_length);
+ level_88_string_size = converted_length;
+ level_88_string = static_cast<char *>(xrealloc(level_88_string,
+ level_88_string_size));
+ memcpy(level_88_string, converted, level_88_string_size);
+ // level_88_string is now a UTF32 string with a terminating four-byte
+ // NUL.
+ }
}
if( !new_var->data.picture )
if( new_var->type == FldClass && new_var->level != 88 )
{
- new_var->data.initial = get_class_condition_string(new_var);
+ class_string = get_class_condition_string(new_var);
+ length_of_initial_string = strlen(class_string)+1;
+ new_initial = class_string;
}
-
- if( new_var->type == FldLiteralA )
+ else if( new_var->type == FldLiteralA )
{
- length_of_initial_string = new_var->data.capacity;
+ length_of_initial_string = new_var->data.capacity();
}
- else if( new_var->data.initial && new_var->data.initial[0] != '\0' )
+ else if( new_var->data.original() && new_var->data.original()[0] != '\0' )
{
if( new_var->type == FldClass )
{
- length_of_initial_string = strlen(new_var->data.initial)+1;
+ length_of_initial_string = strlen(new_var->data.original())+1;
}
else if( new_var->type == FldNumericDisplay )
{
- length_of_initial_string = strlen(new_var->data.initial)+1;
+ length_of_initial_string = strlen(new_var->data.original())+1;
}
else
{
- // This is an ordinary string
- // fprintf(stderr, ">>>>>>> parser_symbol_add %s %s \n", cbl_field_type_str(new_var->type), new_var->name);
- // fprintf(stderr, " %d %d\n", (int)strlen(new_var->data.initial), (int)new_var->data.capacity);
- //length_of_initial_string = strlen(new_var->data.initial) + 1;
- length_of_initial_string = new_var->data.capacity + 1;
+ length_of_initial_string = new_var->data.capacity() + 1;
}
}
else
&& new_var->type != FldLiteralN
&& !(new_var->attr & intermediate_e))
{
- // During the early stages of implementing cbl_field_t::our_index, there
- // were execution paths in parse.y and parser.cc that resulted in
- // our_index not being set. Those should be gone.
- fprintf(stderr, "our_index is NULL under unanticipated circumstances");
- gcc_assert(false);
+ if( ! (new_var->type == FldFloat && new_var->has_attr(constant_e)) )
+ {
+ // N.B. If level is 0 then we're not participating in a hierarchy.
+ // During the early stages of implementing cbl_field_t::our_index, there
+ // were execution paths in parse.y and parser.cc that resulted in
+ // our_index not being set. Those should be gone.
+ cbl_errx("%<our_index%> is NULL under unanticipated circumstances");
+ }
}
// When we create the cblc_field_t structure, we need a data pointer
* As of Tue Apr 4 10:29:35 2023, we support 01 CONSTANT numeric values as follows:
* 1. FldNumericBin5
* 2. always constant_e, also potentially global_e
- * 3. compile-time value in cbl_field_data_t::value
+ * 3. compile-time value in cbl_field_data_t::valuer
* 4. cbl_field_data_t::capacity is 0 because it requires no working storage
*/
- if( new_var->data.capacity == 0
+ if( new_var->data.capacity() == 0
&& new_var->level != 88
&& new_var->type != FldClass
&& new_var->type != FldLiteralN
if( level_88_string )
{
- new_var->data.initial = level_88_string;
+ new_var->data.original(level_88_string);
+ new_initial = level_88_string;
length_of_initial_string = level_88_string_size;
}
{
char achDataName[256];
sprintf(achDataName, "__%s_vardata", external_record_base);
- tree array_type = build_array_type_nelts(UCHAR, new_var->data.capacity);
+ tree array_type = build_array_type_nelts(UCHAR, new_var->data.capacity());
new_var->data_decl_node = gg_define_variable(
array_type,
achDataName,
{
// Create a static array of UCHAR, and make that the data_decl_node
// size_t bytes_to_allocate = new_var->data.memsize ?
- // new_var->data.memsize : new_var->data.capacity;
+ // new_var->data.memsize : new_var->data.capacity();
size_t bytes_to_allocate = std::max(new_var->data.memsize,
- new_var->data.capacity);
+ new_var->data.capacity());
// A FldClass actually doesn't need any bytes, because the only important
// thing about it is the .initial field. We will allocate a single byte,
}
}
- if( new_var->data.initial )
+ // At this point, new_initial might have been set by
+ // get_class_condition_string. If not, we set it another way:
+ if( !level_88_string && !class_string)
{
- new_initial = initial_from_initial(new_var);
- }
- if( new_initial )
- {
- switch(new_var->type)
- {
- case FldGroup:
- case FldAlphanumeric:
- case FldLiteralA:
- length_of_initial_string = new_var->data.capacity+1;
- break;
-
- case FldLiteralN:
- length_of_initial_string = strlen(new_initial)+1;
- break;
-
- default:
- length_of_initial_string = new_var->data.capacity;
- break;
- }
- }
- else
- {
- new_initial = static_cast<char *>(xmalloc(length_of_initial_string));
- gcc_assert(new_initial);
- memcpy(new_initial, new_var->data.initial, length_of_initial_string);
+ new_initial = const_cast<char *>(new_var->data.initial);
+ length_of_initial_string = new_var->data.capacity();
}
actual_allocate:
new_initial,
immediate_parent,
new_var_decl);
- free(new_initial);
-
- if( level_88_string )
- {
- free(level_88_string);
- }
+ free(level_88_string);
+ free(class_string);
if( !(new_var->attr & ( linkage_e | based_e)) )
{
void
parser_exception_file( cbl_field_t *tgt, cbl_file_t* file = NULL );
+void
+parser_intrinsic_convert(cbl_field_t *tgt,
+ const cbl_refer_t& input,
+ convert_type_t src_fmt,
+ unsigned int dst_fmt );
+
+void
+parser_intrinsic_find_string(cbl_field_t *tgt,
+ const cbl_refer_t& haystack,
+ const cbl_refer_t& needle,
+ const cbl_refer_t *after,
+ bool last,
+ bool anycase);
+
void
parser_module_name( cbl_field_t *tgt, module_type_t type );
temp_field.type = remainder->field->type;
temp_field.attr = (remainder->field->attr | intermediate_e) & ~initialized_e;
temp_field.level = 1;
- temp_field.data.memsize = remainder->field->data.memsize ;
- temp_field.data.capacity = remainder->field->data.capacity;
- temp_field.data.digits = remainder->field->data.digits ;
- temp_field.data.rdigits = remainder->field->data.rdigits ;
- temp_field.data.initial = remainder->field->data.initial ;
- temp_field.data.picture = remainder->field->data.picture ;
- temp_field.codeset = remainder->field->codeset ;
+ temp_field.data = remainder->field->data;
+ temp_field.codeset = remainder->field->codeset ;
parser_symbol_add(&temp_field);
temp_remainder.field = &temp_field;
{
// This is an integer type that can be worked with quickly
is_negative |= ( A[i].field->attr & signable_e );
- max_capacity = std::max(max_capacity, A[i].field->data.capacity);
+ max_capacity = std::max(max_capacity, A[i].field->data.capacity());
retval = tree_type_from_size(max_capacity, is_negative);
}
else
// We now either accumulate into C[n] or assign to C[n]:
for(size_t i=0; i<nC; i++ )
{
- tree dest_type = tree_type_from_size(C[i].refer.field->data.capacity, 0);
+ tree dest_type = tree_type_from_size(C[i].refer.field->data.capacity(), 0);
tree dest_addr = gg_add(member(C[i].refer.field->var_decl_node, "data"),
refer_offset(C[i].refer));
tree ptr = gg_cast(build_pointer_type(dest_type), dest_addr);
// We now either accumulate into C[n] or assign to C[n]:
for(size_t i=0; i<nC; i++ )
{
- tree dest_type = tree_type_from_size(C[i].refer.field->data.capacity, 0);
+ tree dest_type = tree_type_from_size(C[i].refer.field->data.capacity(), 0);
tree dest_addr = gg_add(member(C[i].refer.field->var_decl_node, "data"),
refer_offset(C[i].refer));
tree ptr = gg_cast(build_pointer_type(dest_type), dest_addr);
// We now either multiply into C[n] or assign A * B to C[n]:
for(size_t i=0; i<nC; i++ )
{
- tree dest_type = tree_type_from_size(C[i].refer.field->data.capacity, 0);
+ tree dest_type = tree_type_from_size(C[i].refer.field->data.capacity(), 0);
tree dest_addr = gg_add(member(C[i].refer.field->var_decl_node, "data"),
refer_offset(C[i].refer));
tree ptr = gg_cast(build_pointer_type(dest_type), dest_addr);
for(size_t i=0; i<nC; i++ )
{
tree dest_type =
- tree_type_from_size(C[i].refer.field->data.capacity, 0);
+ tree_type_from_size(C[i].refer.field->data.capacity(), 0);
tree dest_addr = gg_add(member( C[i].refer.field->var_decl_node,
"data"),
refer_offset(C[i].refer));
{
dest_addr = gg_add( member(remainder.field->var_decl_node, "data"),
refer_offset(remainder));
- dest_type = tree_type_from_size(remainder.field->data.capacity, 0);
+ dest_type = tree_type_from_size(remainder.field->data.capacity(), 0);
ptr = gg_cast(build_pointer_type(dest_type), dest_addr);
gg_assign(gg_indirect(ptr),
error,
not_error,
compute_error,
- "__gg__fixed_phase2_assign_to_c");
+ "__gg__float_phase2_assign_to_c");
}
arithmetic_error_handler( error,
not_error,
}
else
{
- return build_int_cst_type(SIZE_T, field->data.capacity);
+ return build_int_cst_type(SIZE_T, field->data.capacity());
}
}
const cbl_enabled_exceptions_t&
enabled_exceptions( cdf_enabled_exceptions() );
+ // These calculations are based on position within the field, so offset and
+ // length have to be multiplied by the stride of the encoding:
+ const charmap_t *charmap = __gg__get_charmap(refer.field->codeset.encoding);
+ tree stride = build_int_cst_type(LONG, charmap->stride());
+
if( !enabled_exceptions.match(ec_bound_ref_mod_e) )
{
// This is normal operation -- no exception checking. Thus, we won't
refer.refmod.from->field,
refer_offset(*refer.refmod.from));
gg_decrement(refstart);
+ gg_assign(refstart, gg_multiply(refstart, stride));
if( refer.refmod.len )
{
get_integer_value(reflen,
refer.refmod.len->field,
refer_offset(*refer.refmod.len));
+ // Modify refer.length by stride:
+ gg_assign(reflen, gg_multiply(reflen, stride));
}
else
{
// The length was not specified, so we need to return the distance
// between refmod.from and the end of the field:
- gg_assign(reflen, gg_subtract( get_any_capacity(refer.field), refstart) );
+ gg_assign(reflen,
+ gg_subtract( get_any_capacity(refer.field),
+ refstart) );
}
return;
}
// Make refstart zero-based:
gg_decrement(refstart);
+ gg_assign(refstart, gg_multiply(refstart, stride));
IF( refstart, lt_op, build_int_cst_type(LONG, 0 ) )
{
gg_assign(refstart, gg_cast(LONG, integer_zero_node));
// Set reflen to one here, because otherwise it won't be established.
gg_assign(reflen, gg_cast(TREE_TYPE(reflen), integer_one_node));
+ gg_assign(reflen, gg_multiply(reflen, stride));
}
ELSE
{
IF( refstart, gt_op, gg_cast(TREE_TYPE(refstart), get_any_capacity(refer.field)) )
{
- // refstart greater than zero is an error condition:
+ // refstart greater than capacity is an error condition:
set_exception_code(ec_bound_ref_mod_e);
gg_assign(refstart, gg_cast(LONG, integer_zero_node));
// Set reflen to one here, because otherwise it won't be established.
gg_assign(reflen, gg_cast(TREE_TYPE(reflen), integer_one_node));
+ gg_assign(reflen, gg_multiply(reflen, stride));
}
ELSE
{
refer.refmod.len->field,
refer_offset(*refer.refmod.len),
CHECK_FOR_FRACTIONAL_DIGITS);
+ gg_assign(reflen, gg_multiply(reflen, stride));
IF( var_decl_rdigits,
ne_op,
integer_zero_node )
// length is not an integer, which is an error condition
set_exception_code(ec_bound_ref_mod_e);
gg_assign(reflen, gg_cast(LONG, integer_one_node));
+ gg_assign(reflen, gg_multiply(reflen, stride));
gg_assign(var_decl_rdigits, integer_zero_node);
}
ELSE
// length is too small, which is an error condition.
set_exception_code(ec_bound_ref_mod_e);
gg_assign(reflen, gg_cast(LONG, integer_one_node));
+ gg_assign(reflen, gg_multiply(reflen, stride));
}
ELSE
{
// as the TODO item.
gg_assign(refstart, gg_cast(LONG, integer_zero_node));
gg_assign(reflen, gg_cast(LONG, integer_one_node));
+ gg_assign(reflen, gg_multiply(reflen, stride));
}
ELSE
{
ENDIF
}
ENDIF
+ // Modify the length calculation for stride:
+ //gg_assign(reflen, gg_multiply(reflen, stride));
}
else
{
// We need to start with the rightmost subscript, and work our way up through
// our parents. As we find each parent with an OCCURS, we increment qual_data
- // by (subscript-1)*An->data.capacity
+ // by (subscript-1)*An->data.capacity()
// Establish the field_t pointer for walking up through our ancestors:
cbl_field_t *parent = refer.field;
case FldNumericDisplay:
{
Analyzer.Message("FldNumericDisplay");
+ const charmap_t *charmap = __gg__get_charmap(field->codeset.encoding);
+ int stride = charmap->stride();
+
// Establish the source
tree source_address = get_data_address(field, field_offset);
// The final byte is '+' or '-'
gg_assign(signp,
gg_add(source_address,
- build_int_cst_type( SIZE_T,
- field->data.digits)));
+ build_int_cst_type(SIZE_T,
+ field->data.digits*stride)));
}
}
else
gg_assign(signp,
gg_add(source_address,
build_int_cst_type( SIZE_T,
- field->data.digits-1)));
+ (field->data.digits-1)*stride)));
}
}
}
tree source = get_data_address(field, field_offset);
size_t dest_nbytes = gg_sizeof(value);
- size_t source_nbytes = field->data.capacity;
+ size_t source_nbytes = field->data.capacity();
if( debugging )
{
}
tree source_address = get_data_address(field, field_offset);
tree dest_type = TREE_TYPE(value);
- tree source_type = tree_type_from_size( field->data.capacity,
+ tree source_type = tree_type_from_size( field->data.capacity(),
field->attr & signable_e);
if( debugging && rdigits)
{
get_data_address( field,
field_offset),
build_int_cst_type(INT,
- field->data.capacity),
+ field->data.capacity()),
NULL_TREE)));
break;
}
tree_type_from_field(const cbl_field_t *field)
{
gcc_assert(field);
- return tree_type_from_size(field->data.capacity, field->attr & signable_e);
+ return tree_type_from_size(field->data.capacity(), field->attr & signable_e);
}
tree
}
scale_by_power_of_ten_N(value, dest->data.rdigits - rhs_rdigits);
- tree dest_type = tree_type_from_size( dest->data.capacity,
+ tree dest_type = tree_type_from_size( dest->data.capacity(),
dest->attr & signable_e);
tree dest_pointer = gg_add(member(dest->var_decl_node, "data"),
dest_offset);
get_literal_string(cbl_field_t *field)
{
assert(field->type == FldLiteralA);
- size_t buffer_length = field->data.capacity+1;
+ size_t buffer_length = field->data.capacity()+1;
char *buffer = static_cast<char *>(xcalloc(1, buffer_length));
size_t charsout;
const char *converted = __gg__iconverter(DEFAULT_SOURCE_ENCODING,
field->codeset.encoding,
- field->data.initial,
- field->data.capacity,
+ field->data.original(),
+ field->data.capacity(),
&charsout);
- memcpy(buffer, converted, field->data.capacity+1);
+ memcpy(buffer, converted, field->data.capacity()+1);
return buffer;
}
bool
refer_is_clean(const cbl_refer_t &refer)
{
- if( !refer.field || refer.field->type == FldLiteralN )
+ if( !refer.field || refer.field->is_numeric_constant() )
{
// It is routine for a refer to have no field. It happens when the parser
// passes us a refer for an optional parameter that has been omitted, for
// like.
return true;
}
-
+
return !refer.all
&& !refer.addr_of
&& !refer.nsubscript()
// We multiply the ODO value by the size of the data capacity to get the
// shortened length:
- tree mult_expr = gg_multiply( build_int_cst_type(TREE_TYPE(value64), odo->data.capacity),
+ tree mult_expr = gg_multiply( build_int_cst_type(TREE_TYPE(value64),
+ odo->data.capacity()),
value64 );
// And we add that to the distance from the requested variable to the odo
*/
- // This test has to be here, otherwise there are failures in regression
- // testing.
if( !refer.field )
{
return size_t_zero_node;
"%{D*} %{E} %{I*} %{M} %{fmax-errors*} %{fsyntax-only*} "
"%{fcobol-exceptions*} "
"%{copyext} "
+ "%{fexec-charset*} "
+ "%{fexec-national-charset*} "
"%{fstatic-call} %{fdefaultbyte} "
"%{ffixed-form} %{ffree-form} %{indicator-column*} "
"%{preprocess} "
"%{dialect} "
"%{include} "
"%{Wno-apply-commit} "
+ "%{Wno-any-length} "
"%{Wno-file-code-set} "
"%{Wno-high-order-bit} "
"%{Wno-bad-line-directive} "
"%{Wno-bad-numeric} "
"%{Wno-binary-long-long} "
"%{Wno-call-giving} "
+ "%{Wno-call-literal} "
"%{Wno-cdf-dollar} "
"%{Wno-cdf-invalid-parameter} "
"%{Wno-cdf-name-not-found} "
"%{Wno-literal-concat} "
"%{Wno-locale-error} "
"%{Wno-move-corresponding} "
+ "%{Wno-move-index} "
"%{Wno-move-pointer} "
"%{Wno-nllanginfo-error} "
"%{Wno-operator-space} "
Cobol Joined Separate Var(cobol_copyext) Init(0)
Define alternative implicit copybook filename extension.
+fexec-charset=
+Cobol Joined Var(cobol_charset) RejectNegative
+; Documented in c.opt
+
+fexec-national-charset=
+Cobol Joined Var(cobol_national_charset) RejectNegative
+Set the default execution character set for NATIONAL data items
+
;; warnings
; Par78CdfDefinedW
Cobol Warning Var(call_giving, 1) Init(1)
Warn if CALL ... GIVING is used.
+; MfCallLiteral
+Wcall-literal
+Cobol Warning Var(call_literal, 1) Init(1)
+Warn if CALL is used is used with a literal parameter by reference.
+
; MfCdfDollar
Wcdf-dollar
Cobol Warning Var(cdf_dollar, 1) Init(1)
Cobol Warning Var(level_78, 1) Init(1)
Warn if Level 78 is used.
+; MfAnyLengthContained
+Wany-length
+Cobol Warning Var(cobol_any_length, 1) Init(1)
+Warn if ANY LENGTH is used in outermost program.
+
+; MfMoveIndex
+Wmove-index
+Cobol Warning Var(move_index, 1) Init(1)
+Warn if MOVE INDEX is used
+
; MfMovePointer
Wmove-pointer
Cobol Warning Var(move_pointer, 1) Init(1)
Cobol RejectNegative Joined Separate UInteger Var(indicator_column) Init(0) IntegerRange(0, 8)
-findicator-column=<n> Column after which Region A begins.
-finternal-ebcdic
-Cobol Var(cobol_ebcdic, 1) Init(0)
--finternal-ebcdic Internal processing is in EBCDIC Code Page 1140.
-
fstatic-call
Cobol Var(cobol_static_call, 1) Init(1)
Enable/disable static linkage for CALL literals.
if( yy_flex_debug ) {
size_t nnl = 1 + count_newlines(mfile.data, copy_stmt.p);
size_t nst = 1 + count_newlines(copy_stmt.p, copy_stmt.pend);
- dbgmsg("%s:%d: line " HOST_SIZE_T_PRINT_UNSIGNED
+ dbgmsg("%s:%d: %s:" HOST_SIZE_T_PRINT_UNSIGNED
": COPY directive is " HOST_SIZE_T_PRINT_UNSIGNED " lines '%.*s'",
- __func__, __LINE__,
+ __func__, __LINE__, cobol_filename(),
(fmt_size_t)nnl, (fmt_size_t)nst, copy_stmt.size(), copy_stmt.p);
}
}
{ MfBinaryLongLong, "-Wbinary-long-long", diagnostics::kind::error, dialect_mf_gnu },
{ MfCallGiving, "-Wcall-giving", diagnostics::kind::error, dialect_mf_gnu },
+ { MfCallLiteral, "-Wcall-literal", diagnostics::kind::error, dialect_mf_e },
{ MfCdfDollar, "-Wcdf-dollar", diagnostics::kind::error, dialect_mf_gnu },
{ MfComp6, "-Wcomp-6", diagnostics::kind::error, dialect_mf_gnu },
{ MfCompX, "-Wcomp-x", diagnostics::kind::error, dialect_mf_gnu },
- { MfLevel_1_Occurs, "Wlevel-1-occurs", diagnostics::kind::error, dialect_mf_gnu },
+ { MfLevel_1_Occurs, "-Wlevel-1-occurs", diagnostics::kind::error, dialect_mf_gnu },
{ MfLevel78, "-Wlevel-78", diagnostics::kind::error, dialect_mf_gnu },
+ { MfAnyLength, "-Wany-length", diagnostics::kind::error, dialect_mf_gnu },
+ { MfMoveIndex, "-Wmove-index", diagnostics::kind::error, dialect_gnu_e },
{ MfMovePointer, "-Wmove-pointer", diagnostics::kind::error, dialect_mf_gnu },
{ MfReturningNum, "-Wreturning-number", diagnostics::kind::error, dialect_mf_gnu },
{ MfUsageTypename, "-Wusage-typename", diagnostics::kind::error, dialect_mf_gnu },
return false;
}
-bool
-cbl_diagnostic_kind( cbl_dialect_t dialect, diagnostics::kind kind ) {
- bool ok = true;
- for( auto diag : cbl_diagnostics ) {
- if( diag.dialect == dialect ) {
- if( ! cbl_diagnostic_kind(diag.id, kind) ) ok = false;
- }
- }
- return ok;
-}
-
void
cobol_warning( cbl_diag_id_t id, int yn, bool warning_as_error ) {
gcc_assert( 0 <= yn && yn <= 1 );
cbl_diagnostic_kind(id, kind);
}
+/*
+ * Set diagnostics associated with a dialog to be ignored, because the
+ * constructs are valid for that dialog. We cannot use cbl_diagnostic_kind()
+ * for this purpose because it modified the std::set that we're iterating over.
+ */
+void
+cobol_warning_suppress( cbl_dialect_t dialect ) {
+ std::set<cbl_diag_t> modified;
+
+ for( auto diag : cbl_diagnostics ) {
+ if( diag.dialect & dialect ) {
+ switch(diag.id) {
+ case IbmSectionNegE:
+ case IbmSectionRangeE:
+ case IbmSectionSegmentW:
+ break; // do not suppress
+ default:
+ diag.kind = diagnostics::kind::ignored;
+ break;
+ }
+ }
+ modified.insert(diag);
+ }
+ cbl_diagnostics.clear();
+ cbl_diagnostics.insert(modified.begin(), modified.end());
+}
+
static inline const char *
option_of( cbl_diag_id_t id ) {
auto diag = cbl_diagnostics.find(cbl_diag_t(id));
}
return true;
}
+ cbl_encoding_t encode_as() const {
+ switch(prefix[0]) {
+ case '\0':
+ case 'X':
+ case 'Z':
+ return current_encoding('A');
+ case 'N':
+ return current_encoding('N');
+ default:
+ dbgmsg("no such prefix '%s'", prefix);
+ if( prefix[0] != ftoupper(prefix[0]) ) {
+ gcc_unreachable();
+ }
+ break;
+ }
+ gcc_unreachable();
+ }
};
struct acrc_t { // Abbreviated combined relation condition
struct cbl_field_t;
static inline cbl_field_t *
- new_literal( const char initial[], enum radix_t radix );
+ new_literal( const cbl_loc_t loc, const char initial[], enum radix_t radix );
#pragma GCC diagnostic pop
enum select_clause_t {
%type <field_data> value78
%type <field> literal name nume typename
-%type <field> num_literal signed_literal
+%type <field> num_constant num_literal signed_literal
%type <number> perform_start
%type <refer> perform_times
%type <refer> advancing advance_by
%type <refer> alphaval alpha_val numeref scalar scalar88
%type <refer> tableref tableish
-%type <refer> varg varg1 varg1a
+%type <refer> varg varg1 varg1a start_after start_pos
%type <refer> expr expr_term compute_expr free_tgt by_value_arg
%type <refer> move_tgt selected_name read_key read_into vary_by
%type <refer> accept_refer num_operand envar search_expr any_arg
%type <field> intrinsic0
%type <number> intrinsic_v intrinsic_I intrinsic_N intrinsic_X
%type <number> intrinsic_I2 intrinsic_N2 intrinsic_X2
-%type <number> lopper_case
+%type <number> lopper_case
%type <number> return_body return_file
%type <field> trim_trailing function_udf
%type <boolean> io_invalid read_eof write_eop
global is_global anycase backward
end_display
- exh_changed exh_named
+ exh_changed exh_named last
override
%type <number> mistake globally first_last
-%type <io_mode> io_mode
+%type <io_mode> io_mode
%type <label_pair> xmlprocs
%type <error> xmlexcept xmlexcepts
%type <field> xmlencoding xmlvalidating
+%type <field> xmlgen_count
%type <number> xmlreturning
%type <label> xmlparse_body
+%type <xml_decl_attr> xmlgen_decl
%type <labels> labels
%type <label> label_1 section_name
%type <replacement> init_by
%type <replacements> init_bys init_replace
%type <refer> init_data exit_with stop_status
-%type <float128> cce_expr cce_factor const_value
+%type <cce_type> cce_expr cce_factor const_value
%type <prog_end> end_program1
%type <substitution> subst_input
%type <substitutions> subst_inputs
%type <namelocs> repo_func_names
%type <codeset> codeset_name
%type <locale_phrase> locale_phrase
+%type <number> convert_hex convert_nat convert_alpha // convert_fmt
%union {
bool boolean;
int number;
char *string;
- REAL_VALUE_TYPE float128;
+ struct { REAL_VALUE_TYPE r; char *s; } cce_type;
literal_t literal;
cbl_field_attr_t field_attr;
ec_type_t ec_type;
cbl_namelocs_t *namelocs;
declarative_list_t* dcl_list_t;
isym_list_t* isym_list;
- struct { radix_t radix; char *string; } numstr;
+ struct { bool is_float; radix_t radix; char *string; } numstr;
struct { YYLTYPE loc; int token; literal_t name; } prog_end;
struct { int token; special_name_t id; } special_type;
struct { char locale_type; const char * name; } locale_phrase;
struct { enum select_clause_t clause; cbl_file_t *file; } select_clause;
struct { size_t clauses; cbl_file_t *file; } select_clauses;
struct { YYLTYPE loc; char *on, *off; } switches;
- struct cbl_domain_t *false_domain;
+ struct { cbl_encoding_t encoding; cbl_domain_t *domain; } false_domain;
struct { size_t also; unsigned char *low, *high; } colseq;
struct { cbl_field_attr_t attr; int nbyte; } pic_part;
struct { bool is_locale; cbl_refer_t *arg2; } numval_locale_t;
locale_tgt_t *token_list;
+ struct xml_decl_attr_t { bool with_decl, with_attr; } xml_decl_attr;
+
cbl_options_t::arith_t opt_arith;
cbl_round_t opt_round;
cbl_section_type_t opt_init_sect;
module_type_t module_type;
}
+%printer { fprintf(yyo, "%s", $$->field? name_of($$->field) : "[omitted]"); } alloc_ret
%printer { fprintf(yyo, "clauses: 0x%04x", $$); } data_clauses
-%printer { fprintf(yyo, "%s %s %s",
+
+%printer { fprintf(yyo, "%s{%u/%u} %s '%s' (%s)",
refer_type_str($$),
+ $$ && $$->field? $$->field->char_capacity() : 0,
+ $$ && $$->field? $$->field->data.capacity() : 0,
$$? $$->name() : "<none>",
- $$ && $$->field? $$->field->codeset.name() : "<none>"); } <refer>
-%printer { fprintf(yyo, "%s", $$->field? name_of($$->field) : "[omitted]"); } alloc_ret
-%printer { fprintf(yyo, "%s %s '%s' (%s)",
+ $$ && $$->field? $$->field->data.original()?
+ $$->field->data.original() : "<nil>" : "",
+ $$ && $$->field? $$->field->value_str() : "" ); } <refer>
+
+%printer { fprintf(yyo, "%s{%u/%u} %s '%s' (%s)",
$$? cbl_field_type_str($$->type) : "<%empty>",
+ $$? $$->char_capacity() : 0,
+ $$? $$->data.capacity() : 0,
$$? name_of($$) : "",
- $$? $$->data.initial? $$->data.initial : "<nil>" : "",
+ $$? $$->data.original()?
+ $$->data.original() : "<nil>" : "",
$$? $$->value_str() : "" ); } <field>
%printer { fprintf(yyo, "%c %s",
%printer { fprintf(yyo, "%s'%.*s'{" HOST_SIZE_T_PRINT_UNSIGNED "} %s",
$$.prefix, int($$.len), $$.data, (fmt_size_t)$$.len,
$$.symbol_name()); } <literal>
-%printer { fprintf(yyo,"%s (1st of" HOST_SIZE_T_PRINT_UNSIGNED")",
+%printer { fprintf(yyo,"%s (1st of " HOST_SIZE_T_PRINT_UNSIGNED")",
$$->targets.empty()? "" : $$->targets.front().refer.field->name,
(fmt_size_t)$$->targets.size() ); } <targets>
%printer { fprintf(yyo, "#" HOST_SIZE_T_PRINT_UNSIGNED ": %s",
teed_up_names().front(), (fmt_size_t) teed_up_names().size() ); } qname
%printer { fprintf(yyo, "{%d}", $$ ); } <number>
%printer { fprintf(yyo, "'%s'", $$.string ); } <numstr>
-%printer { const char *s = string_of($$);
- fprintf(yyo, "{%s}", s? s : "??" ); } <float128>
+%printer { const char *s = string_of($$.r);
+ fprintf(yyo, "{%s}", s? s : "??" ); } <cce_type>
%printer { fprintf(yyo, "{%s %c%u}", cbl_field_type_str($$.type),
$$.signable? '+' : ' ',
$$.capacity ); } <computational>
return xstrdup(output);
}
- static inline char * string_of( tree cce ) {
- return string_of (TREE_REAL_CST (cce));
+ static inline const char * string_of( tree cce ) {
+ tree_node *node = TREE_TYPE(cce);
+ if( INTEGRAL_TYPE_P(node) ) {
+ return "integer";
+ }
+ return string_of (TREE_REAL_CST (cce));
}
cbl_field_t *
- new_literal( const literal_t& lit, enum cbl_field_attr_t attr );
+ new_literal( const cbl_loc_t loc, const literal_t& lit, enum cbl_field_attr_t attr );
static YYLTYPE first_line_of( YYLTYPE loc );
%}
if( $1.len != 1 ) {
error_msg(@1, "1-byte hexadecimal literal required");
}
- char ach[16];
- sprintf(ach, "%d", (int)($1.data[0]));
+ char ach[16];
+ sprintf(ach, "%d", (int)($1.data[0]));
//auto f = new_literal($1.data);
- auto f = new_literal(ach);
+ auto f = new_literal(@1, ach);
f = field_add(@1, f);
$$ = field_index(f);
}
}
| LITERAL {
if( $$.prefix[0] != '\0' ) {
- error_msg(@1, "literal cannot use %s prefix in this context",
- $$.prefix);
- YYERROR;
+ if( $$.prefix[0] != 'N' ) {
+ error_msg(@1, "literal cannot use %s prefix in this context",
+ $$.prefix);
+ YYERROR;
+ }
}
if( !is_cobol_charset($$.data) ) {
error_msg(@1, "literal '%s' must be a COBOL or C identifier",
struct cbl_field_t field = { FldClass, 0, {}, 0, $NAME };
if( !namcpy(@NAME, field.name, $2) ) YYERROR;
+ assert( ! domains.empty() );
+ auto encoding = domains.front().encoding;
+
struct cbl_domain_t *domain =
new cbl_domain_t[ domains.size() + 1 ] ;
-
+ auto p = std::find_if( domains.begin(), domains.end(),
+ [enc = encoding]
+ ( const auto& dom ) {
+ return ! dom.encoding_ok(enc);
+ } );
+ if( p != domains.end() ) {
+ error_msg( @domains, "%qs has encoding %qs "
+ "but value %qs has encoding %qs",
+ $NAME,
+ current_t::cbl_encoding_str(domains.front().encoding),
+ p->first.name(),
+ current_t::cbl_encoding_str(p->encoding) );
+ }
std::copy(domains.begin(), domains.end(), domain);
+ domains.clear();
- field.data.false_value_as($domains);
+ field.data.false_value_as($domains.domain);
field.data.domain_as(domain);
- field.codeset.set();
- domains.clear();
+ field.codeset.set(encoding);
if( field_add(@2, &field) == NULL ) {
dbgmsg("failed class");
* "CLASS NAME is domains".
*/
domains: domain
- | domains domain { $$ = $1? $1 : $2; }
+ | domains domain { $$ = $1.domain? $1 : $2; }
;
domain: all LITERAL[a]
{
- $$ = NULL;
- cbl_domain_t domain(@a, $all, $a.len, $a.data);
- domains.push_back(domain);
+ $$.domain = nullptr;
+ cbl_domain_t domain($all, $a.len, $a.data);
+ domains.push_back( domain_t($a.encode_as(), domain) );
}
| all[a_all] LITERAL[a] THRU all[z_all] LITERAL[z]
{
- $$ = NULL;
- cbl_domain_elem_t first(@a, $a_all, $a.len, $a.data),
- last(@z, $z_all, $z.len, $z.data);
- domains.push_back(cbl_domain_t(first, last));
+ $$.domain = nullptr;
+ cbl_domain_elem_t first($a_all, $a.len, $a.data),
+ last($z_all, $z.len, $z.data);
+ if( $a.encode_as() == $z.encode_as() ) {
+ domains.push_back( domain_t($a.encode_as(),
+ cbl_domain_t(first, last)) );
+ } else {
+ error_msg(@z, "encooding of %qs differs from that of %qs",
+ $a.data, $z.data);
+ }
}
| all NUMSTR[n]
{
- $$ = NULL;
- cbl_domain_t dom(@n, $all, strlen($n.string), $n.string, true);
- domains.push_back(dom);
+ $$.domain = nullptr;
+ cbl_domain_t dom($all, strlen($n.string), $n.string, true);
+ domains.push_back( domain_t(dom) );
}
| all[n_all] NUMSTR[n] THRU all[m_all] NUMSTR[m]
{
- $$ = NULL;
- cbl_domain_elem_t first(@n, $n_all, strlen($n.string), $n.string, true),
- last(@m, $m_all, strlen($m.string), $m.string, true);
- domains.push_back(cbl_domain_t(first, last));
+ $$.domain = nullptr;
+ cbl_domain_elem_t first($n_all, strlen($n.string), $n.string, true),
+ last($m_all, strlen($m.string), $m.string, true);
+ domains.push_back( domain_t(cbl_domain_t(first, last)) );
}
| all reserved_value {
- $$ = NULL;
+ $$.domain = nullptr;
if( $2 == NULLS ) YYERROR;
auto value = constant_of(constant_index($2))->data.initial;
- struct cbl_domain_t domain( @2, $all, strlen(value), value );
- domains.push_back(domain);
+ struct cbl_domain_t domain( $all, strlen(value), value );
+ domains.push_back(domain_t(domain));
}
| all[a_all] reserved_value[a] THRU all[z_all] LITERAL[z] {
- $$ = NULL;
+ $$.domain = nullptr;
if( $a == NULLS ) YYERROR;
auto value = constant_of(constant_index($a))->data.initial;
- cbl_domain_elem_t first(@a, $a_all, strlen(value), value),
- last(@z, $z_all, $z.len, $z.data);
- domains.push_back(cbl_domain_t(first, last));
+ cbl_domain_elem_t first($a_all, strlen(value), value),
+ last($z_all, $z.len, $z.data);
+ domains.push_back( domain_t($z.encode_as(),
+ cbl_domain_t(first, last)) );
}
| all[a_all] reserved_value[a] THRU all[z_all] NUMSTR[z] {
- $$ = NULL;
+ $$.domain = nullptr;
if( $a == NULLS ) YYERROR;
auto value = constant_of(constant_index($a))->data.initial;
- cbl_domain_elem_t first(@a, $a_all, strlen(value), value, true),
- last(@z, $z_all, strlen($z.string), $z.string, true);
- domains.push_back(cbl_domain_t(first, last));
+ cbl_domain_elem_t first($a_all, strlen(value), value, true),
+ last($z_all, strlen($z.string), $z.string, true);
+ domains.push_back( domain_t(cbl_domain_t(first, last)) );
}
| when_set_to FALSE_kw is LITERAL[value]
{
const char *dom = $value.data;
- $$ = new cbl_domain_t(@value, false, $value.len, dom);
+ $$.domain = new cbl_domain_t(false, $value.len, dom);
+ $$.encoding = $value.encode_as();
}
| when_set_to FALSE_kw is reserved_value
{
if( $4 == NULLS ) YYERROR;
auto value = constant_of(constant_index($4))->data.initial;
- $$ = new cbl_domain_t(@4, false, strlen(value), value );
+ $$.domain = new cbl_domain_t(false, strlen(value), value );
+ $$.encoding = no_encoding_e;
}
| when_set_to FALSE_kw is NUMSTR[n]
{
- $$ = new cbl_domain_t(@n, false, strlen($n.string), $n.string, true);
+ $$.domain = new cbl_domain_t(false,
+ strlen($n.string), $n.string, true);
+ $$.encoding = current_encoding('A');
}
;
when_set_to: %empty
| WHEN SET TO
;
-data_div: %empty
+data_div: %empty
| DATA_DIV
| DATA_DIV { current_division = data_div_e; } data_sections
{
auto f = cbl_file_of(symbol_at(file_section_fd));
f->varying_size.min = $1.min;
f->varying_size.max = $1.max;
- auto& cap = cbl_field_of(symbol_at(f->default_record))->data.capacity;
- cap = std::max(cap, uint32_t(f->varying_size.max));
+ auto& data = cbl_field_of(symbol_at(f->default_record))->data;
+ data.capacity( std::max(data.capacity(),
+ uint32_t(f->varying_size.max)) );
// If min != max now, we know varying is explicitly defined.
f->varying_size.explicitly = f->varies();
if( f->varying_size.max != 0 ) {
f->attr |= external_e;
cbl_unimplemented("AS LITERAL");
}
+ | is error
+ {
+ error_msg(@1, "invalid FD phrase");
+ }
| fd_linage { cbl_unimplemented("LINAGE"); }
| fd_report {
cbl_unimplemented("REPORT WRITER");
}
field_done();
+#if 0
const auto& field(*$data_descr);
// Format data.initial per picture
}
}
}
+#endif
}
;
;
const_value: cce_expr
- | BYTE_LENGTH of name { set_real_from_capacity(@name, $name, &$$); }
- | LENGTH of name { set_real_from_capacity(@name, $name, &$$); }
- | LENGTH_OF of name { set_real_from_capacity(@name, $name, &$$); }
+ | BYTE_LENGTH of name {
+ $$.s = nullptr;
+ set_real_from_capacity(@name, $name, &$$.r);
+ }
+ | LENGTH of name {
+ $$.s = nullptr;
+ set_real_from_capacity(@name, $name, &$$.r);
+ }
+ | LENGTH_OF of name {
+ $$.s = nullptr;
+ set_real_from_capacity(@name, $name, &$$.r);
+ }
| LENGTH_OF of binary_type[type] {
- real_from_integer(&$$, VOIDmode, $type, SIGNED); }
+ $$.s = nullptr;
+ real_from_integer(&$$.r, VOIDmode, $type, SIGNED);
+ }
;
value78: literalism
{
cbl_field_data_t data;
- data.capacity = capacity_cast(strlen($1.data));
- data.initial = $1.data;
+ data.capacity( capacity_cast(strlen($1.data)) );
+ data.original($1.data);
$$.encoding = $1.encoding;
$$.data = new cbl_field_data_t(data);
}
| const_value
{
cbl_field_data_t data;
- data = build_real (float128_type_node, $1);
- $$.encoding = current_encoding('A');
+ data = build_real (float128_type_node, $1.r);
+ auto s = $1.s ? $1.s : reinterpret_cast<char*>(data.etc.value);
+ data.original(s);
+ $$.encoding = no_encoding_e;
$$.data = new cbl_field_data_t(data);
}
| reserved_value[value]
{
- const auto field = constant_of(constant_index($value));
+ const auto figconst = constant_of(constant_index($value));
$$.encoding = current_encoding('A');
- $$.data = new cbl_field_data_t(field->data);
+ $$.data = new cbl_field_data_t(figconst->data);
}
| true_false
}
}
- | level_name CONSTANT is_global as const_value
+ | level_name CONSTANT is_global as const_value[cce]
{
cbl_field_t& field = *$1;
if( field.level != 1 ) {
field.attr |= constant_e;
if( $is_global ) field.attr |= global_e;
field.type = FldLiteralN;
- field.data = build_real (float128_type_node, $const_value);
- field.data.initial = string_of($const_value);
+ field.data = build_real (float128_type_node, $cce.r);
+ const char *s = $cce.s? $cce.s : string_of($cce.r);
+ field.data.original( s );
field.codeset.set();
+ field.set_initial(@cce);
- if( !cdf_value(field.name, cdfval_t($const_value)) ) {
- error_msg(@1, "%s was defined by CDF", field.name);
+ if( cdf_value(field.name) ) {
+ cbl_message(@1, Par78CdfDefinedW,
+ "%s was defined by CDF", field.name);
}
}
field.type = FldLiteralA;
auto fig = constant_of(constant_index($value));
field.data = fig->data;
+ field.codeset.set();
+ field.set_initial(@value);
}
| level_name CONSTANT is_global as literalism[lit]
field.attr |= constant_e;
if( $is_global ) field.attr |= global_e;
field.type = FldLiteralA;
- field.data.capacity = $lit.len;
- field.data.initial = $lit.data;
field.attr |= literal_attr($lit.prefix);
+
+ if( ! field.codeset.set($lit.encode_as()) ) {
+ error_msg(@lit, "CONSTANT inconsistent with encoding %s",
+ cbl_alphabet_t::encoding_str(field.codeset.encoding));
+ }
+ field.data.original( $lit.data );
+ field.set_initial(@lit);
+
if( field.level != 1 ) {
error_msg(@lit, "%s must be an 01-level data item", field.name);
YYERROR;
}
- if( !cdf_value(field.name, $lit.data) ) {
- error_msg(@1, "%s was defined by CDF", field.name);
- }
- if( ! field.codeset.set() ) {
- error_msg(@lit, "CONSTANT inconsistent with encoding %s",
- cbl_alphabet_t::encoding_str(field.codeset.encoding));
+ if( cdf_value(field.name) ) {
+ cbl_message(@1, Par78CdfDefinedW,
+ "%s was defined by CDF", field.name);
}
-
- value_encoding_check(@lit, $1);
}
| level_name CONSTANT is_global FROM NAME
{
assert($1 == current_field());
+ cbl_field_t& field(*$1);
+ if( cdf_value(field.name) ) {
+ cbl_message(@1, Par78CdfDefinedW,
+ "%s was defined by CDF", field.name);
+ }
const cdfval_t *cdfval = cdf_value($NAME);
if( !cdfval ) {
- error_msg(@1, "%s was defined by CDF", $NAME);
+ error_msg(@NAME, "%s was not defined by CDF", $NAME);
YYERROR;
}
- cbl_field_t& field = *$1;
field.attr |= ($is_global | constant_e);
- field.data.capacity = cdfval->string ? strlen(cdfval->string)
- : sizeof(field.data.value_of());
- field.data.initial = cdfval->string;
- field.data = cdfval->number;
- if( !cdf_value(field.name, *cdfval) ) {
- error_msg(@1, "%s was defined by CDF", field.name);
+ field.codeset.set();
+ // Does a const field want an initial string for a numeric value? --jkl
+ if( cdfval->string ) {
+ field.data.original( cdfval->string );
+ field.set_initial(@NAME);
+ } else {
+ field.data.capacity(sizeof(field.data.value_of()));
+ field.data = cdfval->number;
}
}
-
| LEVEL78 NAME[name] VALUE is value78[data]
{
dialect_ok(@1, MfLevel78, "LEVEL 78");
cbl_field_t field = { FldLiteralA, constant_e, *$data.data,
78, $name, @name.first_line };
- if( field.data.initial ) {
- field.attr |= quoted_e;
- field.codeset.set($data.encoding);
- if( !cdf_value(field.name, field.data.initial) ) {
- cbl_message(Par78CdfDefinedW,
- "%s was defined by CDF", field.name);
- }
- } else {
+ // cce reports no encoded initial value
+ if( $data.encoding == no_encoding_e ) {
field.type = FldLiteralN;
+ field.codeset.set();
field.data.initial = string_of(field.data.value_of());
+ if( cdf_value(field.name) ) {
+ cbl_message(@name, Par78CdfDefinedW,
+ "%s was defined by CDF", field.name);
+ }
+ } else{
+ field.attr |= quoted_e;
field.codeset.set($data.encoding);
- if( !cdf_value(field.name, field.as_integer()) ) {
- cbl_message(Par78CdfDefinedW,
+ field.set_initial(@data);
+ if( cdf_value(field.name) ) {
+ cbl_message(@name, Par78CdfDefinedW,
"%s was defined by CDF", field.name);
}
}
+
if( ($$ = field_add(@name, &field)) == NULL ) {
error_msg(@name, "failed level 78");
- YYERROR;
}
}
auto fig = constant_of(constant_index(NULLS))->data.initial;
struct cbl_domain_t *domain = new cbl_domain_t[2];
- domain[0] = cbl_domain_t(@NAME, false, strlen(fig), fig);
+ domain[0] = cbl_domain_t(false, strlen(fig), fig);
field.data.domain_as(domain);
cbl_domain_t *domain =
new cbl_domain_t[ domains.size() + 1];
+ auto enc = domains.front().encoding;
+ auto p = std::find_if( domains.begin(), domains.end(),
+ [enc]( const auto& dom ) {
+ return !dom.encoding_ok(enc);
+ } );
+ if( p != domains.end() ) {
+ error_msg( @domains, "%qs has encoding %qs "
+ "but value %qs has encoding %qs",
+ $NAME,
+ current_t::cbl_encoding_str(domains.front().encoding),
+ p->first.name(),
+ current_t::cbl_encoding_str(p->encoding) );
+ }
std::copy(domains.begin(), domains.end(), domain);
+ domains.clear();
field.data.domain_as(domain);
- field.data.false_value_as($domains);
+ field.data.false_value_as($domains.domain);
field.codeset.set();
- domains.clear();
if( ($$ = field_add(@2, &field)) == NULL ) {
error_msg(@NAME, "failed level 88");
YYERROR;
}
+ cbl_field_t *parent = parent_of($$);
+ if( parent->codeset.valid() &&
+ enc != parent->codeset.encoding ) {
+ error_msg( @NAME, "%qs has encoding %qs "
+ "but values have encoding %qs",
+ $NAME,
+ current_t::cbl_encoding_str(parent->codeset.encoding),
+ current_t::cbl_encoding_str(enc) );
+ }
}
| name66[alias] RENAMES name[orig]
| level_name[field] data_clauses
{
gcc_assert($field == current_field());
+ //// cbl_field_t& F(*$field);
+
if( $data_clauses == value_clause_e ) { // only VALUE, no PIC
// Error unless VALUE is a figurative constant or (quoted) string.
- if( $field->type != FldPointer &&
- ! $field->has_attr(quoted_e) &&
- normal_value_e == cbl_figconst_of($field->data.initial) )
- {
- error_msg(@field, "%s numeric VALUE %s requires PICTURE",
- $field->name, $field->data.initial);
+ if( $field->type != FldPointer && ! $field->has_attr(quoted_e) ) {
+ switch( cbl_figconst_of($field->data.initial) ) {
+ case normal_value_e:
+ case zero_value_e:
+ break;
+ default:
+ error_msg(@field, "%qs numeric VALUE %qs requires PICTURE",
+ $field->name, $field->data.original());
+ YYERROR; // do not attempt to set capacity
+ }
}
if( null_value_e == cbl_figconst_of($field->data.initial) ) {
// don't change the type
assert(FldPointer != $field->type);
$field->type = FldAlphanumeric;
if( $field->data.initial ) {
- $field->data.capacity = strlen($field->data.initial);
+ $field->set_capacity(strlen($field->data.initial));
}
}
}
// Increase numeric display capacity by 1 for SIGN SEPARATE.
if( $field->type == FldNumericDisplay &&
- is_signable($field) &&
- $field->has_attr(separate_e) ){
- $field->data.capacity++;
+ $field->has_attr(separate_e) ) {
+ gcc_assert(is_signable($field));
+ $field->add_capacity(1);
+ $field->blank_initial($field->char_capacity());
}
// Set Packed-Decimal capacity
if( $field->type == FldPacked ) {
- $field->data.capacity = type_capacity($field->type,
- $field->data.digits);
- if( $field->attr & separate_e )
- {
- // This is a gentle kludge required by the the belated
- // introduction of COMP-6, which is like COMP-3 but with no
- // sign nybble. The code in type_capacity assumes a sign
- // nybble.
- $field->data.capacity = ($field->data.digits+1)/2;
- }
+ // COMP-6 is like COMP-3 but with no sign nybble. The
+ // code in type_capacity assumes a sign nybble.
+ auto one_less = $field->has_attr(separate_e) ? 1 : 0;
+
+ $field->set_capacity(type_capacity($field->type,
+ $field->data.digits - one_less));
}
// Check COMP-5 capacity
// No capacity means no PICTURE, valid only for a (potential) group
- if( $field->type == FldNumericBin5 && $field->data.capacity == 0 ) {
- if( has_clause ($data_clauses, usage_clause_e) &&
- !has_clause ($data_clauses, picture_clause_e) ) {
- // invalidate until a child is born
- $field->type = FldInvalid;
+ if( $field->type == FldNumericBin5 ) {
+ if( $field->data.capacity() == 0 ) {
+ if( has_clause ($data_clauses, usage_clause_e) &&
+ !has_clause ($data_clauses, picture_clause_e) ) {
+ // invalidate until a child is born
+ $field->type = FldInvalid;
+ }
+ } else {
+ if( ($field->data.initial) ) {
+ if( strlen($field->data.initial) < $field->data.capacity() ) {
+ $field->blank_initial( $field->data.capacity() );
+ }
+ }
}
}
// Ensure signed initial VALUE is for signed numeric type
if( is_numeric($field) ) {
- if( $field->data.initial && $field->type != FldFloat ) {
- switch( $field->data.initial[0] ) {
+ auto original = $field->data.original();
+ if( original && $field->type != FldFloat ) {
+ switch( original[0] ) {
case '-':
if( !$field->has_attr(signable_e) ) {
error_msg(@field, "%s is unsigned but has signed VALUE '%s'",
- $field->name, $field->data.initial);
+ $field->name, original);
}
}
}
}
// Verify VALUE
- $field->report_invalid_initial_value(@data_clauses);
-
- bool numerical =
- $field->type == FldNumericDisplay || is_numeric($field);
-
- if( $field->data.initial && ! numerical ) {
- if( normal_value_e == cbl_figconst_of($field->data.initial) ) {
- value_encoding_check(@data_clauses, $field);
- }
- }
+ bool bad_value = $field->report_invalid_initial_value(@data_clauses);
// verify REDEFINES
const auto parent = parent_of($field);
if( parent && $field->level == parent->level ) {
valid_redefine(@field, $field, parent); // calls yyerror
}
+
+ // verify VALUE for Numeric Edited
+ if( $field->type == FldNumericEdited ) {
+ if( !validate_numeric_edited ($field) ) {
+ error_msg(@data_clauses, "%s: PICTURE and VALUE are incompatible",
+ $field->name);
+ }
+ }
+
+ // verify VALUE for numeric
+ if ($data_clauses & value_clause_e) {
+ bool good_value =
+ ($field->data.etc_ok() || $field->data.original()) && ! bad_value;
+ if( good_value ) { // ensure VALUE had a value
+ if( is_numeric($field) || $field->type == FldNumericEdited ) {
+ if( zero_value_e == cbl_figconst_of($field->data.original()) ) {
+ $field->blank_initial($field->char_capacity());
+ }
+ $field->encode_numeric($field->data.original(),
+ data_clause_locations[value_clause_e],
+ $field->data.original_numeric());
+ }
+ }
+ } else { // no VALUE clause
+ if( false && $field->data.initial ) {
+ free(const_cast<char*>($field->data.initial));
+ $field->data.initial = nullptr;
+ }
+ }
+
+ // Any field may become a group, so may have VALUE with no PICTURE
+ const auto stooges3 = (picture_clause_e |
+ value_clause_e |
+ usage_clause_e);
+ if( ($data_clauses & stooges3) == value_clause_e ) { // only
+ $field->type = FldInvalid;
+ auto fig = cbl_figconst_of($field->data.original());
+ if( null_value_e != fig ) {
+ $field->set_initial( $field->data.capacity(),
+ data_clause_locations[value_clause_e]);
+ }
+ }
}
;
YYERROR;
}
+ // We could be more judicious. We could clear the map when
+ // the first clause is encountered, and e.g. set the location
+ // to just the VALUE string, not the whole clause. As of now
+ // the map isn't used, though.
+ data_clause_locations[data_clause_t($2)] = @data_clause;
+
if( $data_clause == redefines_clause_e ) {
error_msg(@2, "REDEFINES must appear "
"immediately after LEVEL and NAME");
}
if( gcobol_feature_embiggen() ) {
- if( field->is_binary_integer() && field->data.capacity == 4) {
+ if( field->is_binary_integer() && field->data.capacity() == 4) {
auto redefined = symbol_redefines(field);
if( redefined && redefined->type == FldPointer ) {
dbgmsg("expanding %s size from %u bytes to %lu "
| value_clause { $$ = value_clause_e;
cbl_field_t *field = current_field();
- if( field->type != FldAlphanumeric &&
- field->data.initial && field->data.initial[0] )
- {
- // Embedded NULs are valid only in FldAlphanumeric, and are
- // already handled.
- if( strlen(field->data.initial) < field->data.capacity ) {
- auto p = blank_pad_initial( field->data.initial,
- strlen(field->data.initial),
- field->data.capacity );
- if( !p ) YYERROR;
- field->data.initial = p;
- }
- }
const cbl_field_t *parent;
+ if( (parent = parent_has_picture(field)) != NULL ) {
+ error_msg(@1, "VALUE invalid because group %s (%s) has PICTURE clause",
+ parent->name, 3 + cbl_field_type_str(parent->type));
+ }
if( (parent = parent_has_value(field)) != NULL ) {
- error_msg(@1, "VALUE invalid because group %s has VALUE clause",
- parent->name);
+ error_msg(@1, "VALUE invalid because group %s (%s) has VALUE clause",
+ parent->name, 3 + cbl_field_type_str(parent->type));
}
}
| volatile_clause { $$ = volatile_clause_e; }
picture_clause: PIC signed nps[fore] nines nps[aft]
{
cbl_field_t *field = current_field();
+ if( ! field->codeset.set() ) {
+ error_msg(@nines, "PICTURE inconsistent with encoding %s",
+ cbl_alphabet_t::encoding_str(field->codeset.encoding));
+ }
if( !field_type_update(field, FldNumericDisplay, @$) ) {
YYERROR;
}
ERROR_IF_CAPACITY(@PIC, field);
- field->attr |= $signed;
- field->data.capacity = type_capacity(field->type, $4);
- field->data.digits = $4;
- if( long(field->data.digits) != $4 ) {
- error_msg(@2, "indicated size would be %d bytes, "
- "maximum data item size is %u",
- $4, UINT32_MAX);
+ // If signable_e is inherited from the group, it is effective
+ // regardless of an 'S' in PICTURE.
+ if( field->has_attr(signable_e) && ! $signed ) {
+ dbgmsg("%s PICTURE must be signed for SIGN IS", field->name);
}
-
+ field->attr |= $signed;
+ field->data.digits = $nines;
+ auto nchar = type_capacity(field->type, $nines);
+ field->set_capacity(nchar);
+ field->blank_initial(nchar);
if( $fore && $aft ) { // leading and trailing P's
error_msg(@2, "PIC cannot have both leading and trailing P");
YYERROR;
}
if( ! field->reasonable_capacity() ) {
error_msg(@2, "%s limited to capacity of %d (would need %u)",
- field->name, MAX_FIXED_POINT_DIGITS, field->data.capacity);
+ field->name, MAX_FIXED_POINT_DIGITS, field->char_capacity());
}
}
| PIC signed NINEV[left] nine[rdigits]
{
cbl_field_t *field = current_field();
+ if( ! field->codeset.set() ) {
+ error_msg(@$, "PICTURE inconsistent with encoding %s",
+ cbl_alphabet_t::encoding_str(field->codeset.encoding));
+ }
field->data.digits = $left + $rdigits;
field->attr |= $signed;
if( field->is_binary_integer() ) {
- field->data.capacity = type_capacity(field->type,
- field->data.digits);
+ field->set_capacity(type_capacity(field->type,
+ field->data.digits));
field->data.rdigits = $rdigits;
} else {
if( !field_type_update(field, FldNumericDisplay, @$) ) {
YYERROR;
}
ERROR_IF_CAPACITY(@PIC, field);
- field->data.capacity = field->data.digits;
+ field->set_capacity(field->data.digits);
field->data.rdigits = $rdigits;
}
+ // data.initial has blanks for character-encoded data
+ // data.capacity may reflect the binary size, if any.
+ field->blank_initial(field->data.digits);
+
if( ! field->reasonable_capacity() ) {
error_msg(@2, "%s limited to capacity of %d (would need %u)",
- field->name, MAX_FIXED_POINT_DIGITS, field->data.capacity);
+ field->name, MAX_FIXED_POINT_DIGITS, field->char_capacity());
}
}
| PIC signed NINEDOT[left] nine[rdigits]
{
+ cbl_field_t *field = current_field();
+ if( ! field->codeset.set() ) {
+ error_msg(@$, "PICTURE inconsistent with encoding %s",
+ cbl_alphabet_t::encoding_str(field->codeset.encoding));
+ }
+
uint32_t size = $left + $rdigits;
- cbl_field_t *field = current_field();
if( !field_type_update(field, FldNumericEdited, @$) ) {
YYERROR;
}
ERROR_IF_CAPACITY(@PIC, field);
field->attr |= $signed;
field->data.digits = size;
- field->data.capacity = ++size;
+ field->set_capacity(++size);
field->data.rdigits = $rdigits;
+ field->blank_initial(size);
if( ! field->reasonable_capacity() ) {
error_msg(@2, "%s limited to capacity of %d (would need %u)",
- field->name, MAX_FIXED_POINT_DIGITS, field->data.capacity);
+ field->name, MAX_FIXED_POINT_DIGITS, field->char_capacity());
}
- }
+ }
- | PIC alphanum_pic[size]
+ | PIC alphanum_pic[nchar]
{
cbl_field_t *field = current_field();
-
+ if( ! field->codeset.valid() ) { // set by the picture
+ dbgmsg("%s:%d: %s has invalid encoding",
+ __FILE__, __LINE__, field->name);
+ }
if( field->type == FldNumericBin5 &&
- field->data.capacity == 0xFF &&
+ field->data.capacity() == 0xFF &&
dialect_ok(@2, MfCompX, "alphanumeric PICTURE with numeric USAGE") )
{ // PIC X COMP-X or COMP-9
if( ! field->has_attr(all_x_e) ) {
YYERROR;
}
}
- assert(0 < $size);
- if( field->data.initial != NULL ) {
- if( 0 < field->data.capacity &&
- field->data.capacity < uint32_t($size) ) {
- auto p = blank_pad_initial(field->data.initial,
- field->data.capacity, $size );
- if( !p ) YYERROR;
- field->data.initial = p;
- }
+ assert(0 < $nchar);
+ field->data.picture = nullptr;
+ auto nchar = std::min($nchar, MAXIMUM_ALPHA_LENGTH);
+ if( nchar < $nchar ) {
+ error_msg(@2, "alphanumeric data-item size (%d) "
+ "exceeds maximum of %d bytes",
+ $nchar, MAXIMUM_ALPHA_LENGTH);
}
-
- charmap_t *charmap =
- __gg__get_charmap(field->codeset.encoding);
- field->data.capacity = $size * charmap->stride();
- field->data.picture = NULL;
-
- if( false ) dbgmsg("PIC alphanum_pic[size]:%d: %s",
- field->line, field_str(field));
+ field->set_initial(nchar, @nchar);
}
| PIC numed[picture]
{
cbl_field_t *field = current_field();
+ if( ! field->codeset.set() ) {
+ error_msg(@picture, "PICTURE inconsistent with encoding %s",
+ cbl_alphabet_t::encoding_str(field->codeset.encoding));
+ }
if( !field_type_update(field, FldNumericEdited, @$) ) {
YYERROR;
}
YYERROR;
}
field->data.picture = $picture;
- field->data.capacity = length_of_picture($picture);
field->data.digits = digits_of_picture($picture, false);
field->data.rdigits = rdigits_of_picture($picture);
if( is_picture_scaled($picture) ) field->attr |= scaled_e;
+ auto nchar = length_of_picture($picture);
+ field->set_capacity(nchar);
+ field->blank_initial(nchar);
}
| PIC ALPHED[picture]
{
- bool is_alpha_edited( const char picture[] );
-
cbl_field_t *field = current_field();
ERROR_IF_CAPACITY(@PIC, field);
- field->data.capacity = length_of_picture($picture);
- field->data.picture = $picture;
-
// In case the lexer guesses wrong.
cbl_field_type_t type = is_numeric_edited($picture)?
FldNumericEdited : FldAlphaEdited;
if( !field_type_update(field, type, @$) ) {
YYERROR;
}
+ field->data.picture = $picture;
+ field->data.capacity(length_of_picture($picture));
switch( type ) {
case FldNumericEdited:
if( is_picture_scaled($picture) ) field->attr |= scaled_e;
break;
case FldAlphaEdited:
- if( !is_alpha_edited(field->data.picture) ) {
+ if( ! field->data.is_alpha_edited() ) {
error_msg(@picture, "invalid picture for Alphanumeric-edited");
YYERROR;
}
default:
gcc_unreachable();
}
+ if( ! field->codeset.set() ) {
+ error_msg(@picture, "PICTURE inconsistent with encoding %s",
+ cbl_alphabet_t::encoding_str(field->codeset.encoding));
+ }
+ field->set_initial(@picture);
}
| PIC ones
;
$$.attr = uniform_picture($picture);
$$.nbyte = strlen($picture);
auto count($count);
+ if( false && count == 0 ) { // zero count has no effect
+ error_msg(@2, "PICTURE count %<(%d)%> is zero", count );
+ YYERROR;
+ }
if( count > 0 ) {
--count;
$$.nbyte += count; // AX9(3) has count 5
}
| '(' NAME ')'
{
+ int64_t output = 1;
auto value = cdf_value($NAME);
- if( ! (value && value->is_numeric()) ) {
- error_msg(@NAME, "PICTURE %qs requires a CONSTANT value", $NAME );
- YYERROR;
- }
- int nmsg = 0;
- auto e = symbol_field(PROGRAM, 0, $NAME);
- if( e ) { // verify not floating point with nonzero fraction
- auto field = cbl_field_of(e);
- assert(is_literal(field));
- REAL_VALUE_TYPE vi;
- real_from_integer (&vi, VOIDmode, field->as_integer(), SIGNED);
- if( !real_identical (TREE_REAL_CST_PTR (field->data.value_of()),
- &vi) ) {
- nmsg++;
- error_msg(@NAME, "invalid PICTURE count %<(%s)%>",
- field->data.initial );
- }
- }
- $$ = value->as_number();
- if( $$ <= 0 && !nmsg) {
- error_msg(@NAME, "invalid PICTURE count %<(%s)%>", $NAME );
- }
+ if( value && ! value->is_numeric() ) {
+ error_msg(@NAME,
+ "PICTURE %qs requires a CONSTANT NUMERIC value",
+ $NAME );
+ output = value->as_number();
+ } else {
+ auto e = symbol_field(PROGRAM, 0, $NAME);
+ if( ! e ) {
+ error_msg(@NAME, "%qs not defined", $NAME );
+ } else { // verify it's integer-like
+ auto field = cbl_field_of(e);
+ if( ! field->has_attr(constant_e) ) {
+ error_msg(@NAME, "count %qs must be CONSTANT", $NAME);
+ }
+ if( ! is_numeric(field) ) {
+ auto s = field->data.original();
+ error_msg(@NAME, "%qs invalid as PICTURE count %<(%s)%>",
+ $NAME, s? s : "" );
+ } else {
+ auto ok = field->data.int64_of();
+ if( ! ok.second ) {
+ error_msg(@NAME, "%qs invalid as PICTURE count %<(%ld)%>",
+ $NAME, long(ok.first) );
+ } else {
+ output = ok.first;
+ }
+ }
+ }
+ }
+ $$ = output;
+ dbgmsg("%s:%d: count: (%s) is %ld", __FILE__, __LINE__, $NAME, long($$));
}
;
}
| usage BINARY_INTEGER [comp] is_signed
{
- // action for BINARY_INTEGER is repeated for COMPUTATIONAL, below.
- // If it changes, consolidate in a function.
- bool infer = true;
- cbl_field_t *field = current_field();
+ bool signable = $is_signed? $comp.signable : false;
- if( ! $is_signed ) {
- $comp.signable = false;
- }
-
- // Some binary types have defined capacity;
- switch($comp.type) {
- // COMPUTATIONAL and COMP-5 rely on PICTURE.
- case FldNumericBinary:
- field->attr |= big_endian_e;
- __attribute__((fallthrough));
- case FldNumericBin5:
- // If no capacity yet, then no picture, infer $comp.capacity.
- // If field has capacity, ensure USAGE is compatible.
- if( field->data.capacity > 0 ) { // PICTURE before USAGE
- infer = false;
- switch( field->type ) {
- case FldAlphanumeric: // PIC X COMP-5 or COMP-X
- assert( field->data.digits == 0 );
- assert( field->data.rdigits == 0 );
- dialect_ok(@2, MfCompX, "alphanumeric PICTURE with numeric USAGE");
-
- field->type = $comp.type;
- field->clear_attr(signable_e);
- break;
- case FldNumericDisplay: // PIC 9 COMP-5 or COMP-X
- if( $comp.capacity == 0xFF ) { // comp-x is a bit like comp-5
- assert( field->data.digits == field->data.capacity );
- dialect_ok(@2, MfCompX, "alphanumeric PICTURE with numeric USAGE");
- }
- field->type = $comp.type;
- field->data.capacity = type_capacity(field->type,
- field->data.digits);
- break;
- default: break;
- }
- }
- break;
- case FldPacked: // comp-6 is unsigned comp-3
- assert(! $comp.signable); // else PACKED_DECIMAL from scanner
- field->attr |= separate_e;
- dialect_ok(@2, MfComp6, "COMP-6");
- if( field->type == FldNumericDisplay ) {// PICTURE before USAGE
- infer = false;
- assert(field->data.capacity > 0);
- field->type = $comp.type;
- field->data.capacity = type_capacity(field->type,
- field->data.digits);
- }
- break;
- default:
- break;
- }
-
- if( infer ) {
- if( $comp.capacity > 0 ) {
- if( field->data.capacity > 0 ) {
- error_msg(@comp, "%s is BINARY type, incompatible with PICTURE",
- field->name);
- YYERROR;
- }
- field->data.capacity = $comp.capacity;
- field->type = $comp.type;
- if( $comp.signable ) {
- field->attr = (field->attr | signable_e);
- }
- }
- }
- $$ = $comp.type;
+ $$ = field_binary_usage( @comp, current_field(),
+ $comp.type, $comp.capacity,
+ signable );
}
+
| usage COMPUTATIONAL[comp] native
{
- // logic below duplicates BINARY_INTEGER, above.
- // If it changes, consolidate in a function.
- bool infer = true;
- cbl_field_t *field = current_field();
-
- // Some binary types have defined capacity;
- switch($comp.type) {
- // COMPUTATIONAL and COMP-5 rely on PICTURE.
- case FldNumericBinary:
- field->attr |= big_endian_e;
- __attribute__((fallthrough));
- case FldNumericBin5:
- // If no capacity yet, then no picture, infer $comp.capacity.
- // If field has capacity, ensure USAGE is compatible.
- if( field->data.capacity > 0 ) { // PICTURE before USAGE
- infer = false;
- switch( field->type ) {
- case FldAlphanumeric: // PIC X COMP-5 or COMP-X
- assert( field->data.digits == 0 );
- assert( field->data.rdigits == 0 );
- dialect_ok(@2, MfCompX, "alphanumeric PICTURE with numeric USAGE");
- field->type = $comp.type;
- field->clear_attr(signable_e);
- break;
- case FldNumericDisplay: // PIC 9 COMP-5 or COMP-X
- if( $comp.capacity == 0xFF ) { // comp-x is a bit like comp-5
- assert( field->data.digits == field->data.capacity );
- dialect_ok(@2, MfCompX, "alphanumeric PICTURE with numeric USAGE");
- }
- field->type = $comp.type;
- field->data.capacity = type_capacity(field->type,
- field->data.digits);
- break;
- default: break;
- }
- }
- break;
- case FldPacked: // comp-6 is unsigned comp-3
- assert(! $comp.signable); // else PACKED_DECIMAL from scanner
- field->attr |= separate_e;
- dialect_ok(@2, MfComp6, "COMP-6");
- if( field->type == FldNumericDisplay ) {// PICTURE before USAGE
- infer = false;
- assert(field->data.capacity > 0);
- field->type = $comp.type;
- field->data.capacity = type_capacity(field->type,
- field->data.digits);
- }
- break;
- default:
- break;
- }
-
- if( infer ) {
- if( $comp.capacity > 0 ) {
- if( field->data.capacity > 0 ) {
- error_msg(@comp, "%s is BINARY type, incompatible with PICTURE",
- field->name);
- YYERROR;
- }
- field->data.capacity = $comp.capacity;
- field->type = $comp.type;
- if( $comp.signable ) {
- field->attr = (field->attr | signable_e);
- }
- }
+ $$ = field_binary_usage( @comp, current_field(),
+ $comp.type, $comp.capacity,
+ $comp.signable );
+ }
+ | usage DISPLAY native {
+ auto field = current_field();
+ if( ! field->codeset.set() ) {
+ error_msg(@2, "USAGE DISPLAY conflicts with PICTURE");
}
- $$ = $comp.type;
+ $$ = FldDisplay;
+ }
+ | usage PACKED_DECIMAL native {
+ cbl_field_t *field = current_field();
+ if( field->data.capacity() > 0 &&
+ field->type != FldNumericDisplay) {
+ error_msg(@2, "USAGE PACKED DECIMAL conflicts with PICTURE");
+ YYERROR;
+ }
+ $$ = FldPacked;
}
- | usage DISPLAY native { $$ = FldDisplay; }
- | usage PACKED_DECIMAL native { $$ = FldPacked; }
| usage PACKED_DECIMAL with NO SIGN
{
cbl_field_t *field = current_field();
- if( field->data.capacity > 0 &&
+ if( field->data.capacity() > 0 &&
field->type != FldNumericDisplay) {
- error_msg(@2, "%s PICTURE is incompatible with USAGE PACKED DECIMAL",
- field->name);
+ error_msg(@2, "USAGE PACKED DECIMAL conflicts with PICTURE");
YYERROR;
}
+ if( field->has_attr(signable_e) ) {
+ error_msg(@$, "signed PICTURE conflicts with NO SIGN");
+ }
+ if( field->has_attr(separate_e) ) {
+ error_msg(@$, "SIGN clause conflicts with NO SIGN");
+ }
field->clear_attr(separate_e);
field->clear_attr(signable_e);
- if( field->type == FldNumericDisplay ) {// PICTURE before USAGE
- assert(field->data.capacity > 0);
- field->data.capacity = type_capacity(FldPacked,
- field->data.digits);
- }
$$ = field->type = FldPacked;
}
| usage INDEX {
}
| usage NATIONAL {
auto field = current_field();
- if( ! field->codeset.set(EBCDIC_e) ) {
- error_msg(@2, "usage NATIONAL conflicts with PICTURE");
+ auto encoding = current_encoding('N');
+ if( ! field->codeset.set(encoding) ) {
+ error_msg(@2, "USAGE NATIONAL conflicts with PICTURE");
}
- $$ = FldInvalid;
+ $$ = FldDisplay;
}
// We should enforce data/code pointers with a different type.
| usage POINTER
}
if( gcobol_feature_embiggen() && redefined &&
is_numeric(redefined->type) && redefined->size() == 4) {
- // For now, we allow POINTER to expand a 32-bit item to 64 bits.
- field->data.capacity = int_size_in_bytes(ptr_type_node);
+ // Allow POINTER to expand a 32-bit item to 64 bits.
+ field->data.capacity(int_size_in_bytes(ptr_type_node));
dbgmsg("%s: expanding #" HOST_SIZE_T_PRINT_UNSIGNED
" %s capacity %u => %u", __func__,
(fmt_size_t)field_index(redefined), redefined->name,
- redefined->data.capacity, field->data.capacity);
+ redefined->data.capacity(), field->data.capacity());
redefined->embiggen();
$lit.prefix, $lit.data);
}
- field->data.initial = $lit.data;
field->attr |= literal_attr($lit.prefix);
field->attr |= quoted_e;
- if( field->data.capacity == 0 ) {
- field->data.capacity = $lit.len;
- } else {
- if( $all ) {
- field_value_all(field);
- } else {
- if( $lit.len < field->data.capacity ) {
- auto p = blank_pad_initial( $lit.data, $lit.len,
- field->data.capacity );
- if( !p ) YYERROR;
- field->data.initial = p;
- }
+ auto capacity = field->data.capacity();
+ field->data.original($lit.data, $all); // oops, sets capacity to strlen
+
+ if( capacity ) { // via PICTURE or USAGE
+ field->data.capacity(capacity); // restore it (unnecessary)
+ if (field->data.initial) { // PICTURE created blank initial
+ field->encode($lit.len, @lit);
}
}
}
- | VALUE all cce_expr[value] {
+ | VALUE all cce_expr[cce] {
+ /*
+ * cce has two parts:
+ * cce.r) Host binary value
+ * cce.s) string value, if supplied as numeric literal
+ */
cbl_field_t *field = current_field();
- auto orig_str = original_number();
- REAL_VALUE_TYPE orig_val;
- real_from_string3 (&orig_val, orig_str,
- TYPE_MODE (float128_type_node));
- char *initial = NULL;
-
- if( real_identical (&orig_val, &$value) ) {
- initial = orig_str;
- pristine_values.insert(initial);
+ if( $cce.s ) {
+ field->data.original($cce.s, $all);
} else {
- initial = string_of($value);
- gcc_assert(initial);
+ field->data.original($cce.r);
}
-
- char decimal = symbol_decimal_point();
- std::replace(initial, initial + strlen(initial), '.', decimal);
-
- field->data.initial = initial;
- field->data = build_real (float128_type_node, $value);
-
- if( $all ) field_value_all(field);
}
| VALUE all reserved_value[value]
{
cbl_field_t *field = current_field();
- if( ! field->codeset.set() ) {
- error_msg(@value, "VALUE inconsistent with encoding %s",
- cbl_alphabet_t::encoding_str(field->codeset.encoding));
+ field->codeset.set();
+ assert( $value != NULLS );
+ auto fig = constant_of(constant_index($value));
+ auto orig_str = fig->data.initial;
+ auto capacity = field->data.capacity();
+ field->data.original(orig_str, $all);
+
+ if( capacity ) { // via PICTURE or USAGE
+ field->data.capacity(capacity); // restore it
+ if (field->data.initial) { // PICTURE created blank initial
+ field->encode( strlen(orig_str), @value );
+ }
}
- if( $value != NULLS ) {
- auto fig = constant_of(constant_index($value));
- cbl_field_t *field = current_field();
- field->data.initial = fig->data.initial;
+ if( $value != ZERO ) {
+ if( ($value != SPACES && field->type == FldNumericEdited)
+ || is_numeric(field) )
+ {
+ error_msg(@value,
+ "unexpected initial value for %s",
+ cbl_field_type_name(field->type));
+ }
}
}
| /* VALUE is */ NULLPTR
{
- auto fig = constant_of(constant_index(NULLS));
- cbl_field_t *field = current_field();
- field->data.initial = fig->data.initial;
+ auto fig = constant_of(constant_index(NULLS));
+ cbl_field_t *field = current_field();
+ auto capacity = field->data.capacity();
+ field->data.original(fig->data.initial);
+ if( capacity ) { // via PICTURE or USAGE
+ field->data.capacity(capacity); // restore it
+ // do not encode
+ }
}
| VALUE error
{
- error_msg(@2, "no valid VALUE supplied");
+ error_msg(@2, "invalid VALUE");
}
;
}
const char *prog_name = current.program()->name;
bool is_compat = 0 < compat_programs.count(prog_name);
- if( ! (field->level == 1 &&
- current_data_section == linkage_datasect_e &&
- (1 < current.program_level() ||
- current.program()->is_function() ||
- is_compat)) ) {
- error_msg(@1, "ANY LENGTH valid only for 01 "
- "in LINKAGE SECTION of a function or contained program");
- YYERROR;
+ if( field->level != 1 || current_data_section != linkage_datasect_e ) {
+ error_msg(@1, "ANY LENGTH valid only for 01 data-item "
+ "in LINKAGE SECTION");
+ }
+ if( ! current.program()->is_function() ) {
+ if( 1 == current.program_level() && ! is_compat) {
+ cbl_message(@$, MfAnyLength,
+ "ANY LENGTH not valid in outermost program");
+ }
}
field->attr |= any_length_e;
}
sign_clause: sign_is sign_leading sign_separate
{
cbl_field_t *field = current_field();
+ if( 0 < field->data.capacity() ) { // PICTURE set size
+ if( ! field->has_attr(signable_e) ) {
+ error_msg(@1, "%qs must be signed for SIGN IS", field->name);
+ }
+ }
+ field->set_attr(signable_e);
if( $sign_leading ) {
- field->attr |= leading_e;
+ field->set_attr(leading_e);
} else {
- field->attr &= ~uint64_t(leading_e); // turn off in case inherited
- field->attr |= signable_e;
+ field->clear_attr(leading_e); // turn off in case inherited
}
- if( $sign_separate ) field->attr |= separate_e;
+ if( $sign_separate ) field->set_attr(separate_e);
}
;
sign_is: %empty
}
;
by_value_arg: scalar
- | LITERAL { $$ = new_reference(new_literal($1, quoted_e)); }
+ | LITERAL { $$ = new_reference(new_literal(@1, $1, quoted_e)); }
| reserved_value
{
$$ = new_reference(constant_of(constant_index($1)));
envar: scalar { $$ = $1; $$->field->attr |= envar_e; }
| LITERAL {
- $$ = new_reference(new_literal($1, quoted_e));
+ $$ = new_reference(new_literal(@1, $1, quoted_e));
$$->field->attr |= envar_e;
}
;
name_queue.qualify(@1, $1);
auto namelocs( name_queue.pop() );
auto names( name_queue.namelist_of(namelocs) );
- if( ($$ = field_find(names)) == NULL ) {
+ if( ($$ = field_find(@1, names)) == NULL ) {
if( procedure_div_e == current_division ) {
error_msg(namelocs.back().loc,
"DATA-ITEM '%s' not found", names.back() );
rel_term1: all LITERAL
{
$$.invert = false;
- $$.term = new_reference(new_literal($2, quoted_e));
+ $$.term = new_reference(new_literal(@2, $2, quoted_e));
$$.term->all = $all;
}
| all spaces_etc[value]
auto namelocs( name_queue.pop() );
auto names( name_queue.namelist_of(namelocs) );
auto inner = namelocs.back();
- if( ($$ = field_find(names)) == NULL ) {
+ if( ($$ = field_find(@1, names)) == NULL ) {
if( procedure_div_e == current_division ) {
error_msg(inner.loc,
"DATA-ITEM '%s' not found", inner.name );
$$ = s; } // USAGE clause and FLOAT-DECIMAL clause
| BLINK { static char s[] ="BLINK";
$$ = s; } // screen description entry and SET attribute statement
+ | BYTE { static char s[] ="BYTE";
+ $$ = s; } // CONVERT intrinsic function
| BYTE_LENGTH { static char s[] ="BYTE-LENGTH";
$$ = s; } // constant entry
| CAPACITY { static char s[] ="CAPACITY";
{
statement_begin(@1, MOVE);
if( $scalar->field->type == FldIndex ) {
- error_msg(@1, "%qs cannot be MOVEd because it is an %<INDEX%>",
- name_of($scalar->field) );
- YYERROR;
+ cbl_message(@1, MfMoveIndex,
+ "cannot MOVE %qs because it is an %<INDEX%>",
+ name_of($scalar->field) );
}
if( !parser_move2($tgts, *$scalar) ) { YYERROR; }
}
| MOVE all literalism[input] TO move_tgts[tgts]
{
statement_begin(@1, MOVE);
- struct cbl_refer_t *src = new_reference(new_literal($input,
+ struct cbl_refer_t *src = new_reference(new_literal(@input, $input,
quoted_e));
src->all = $all;
if( !parser_move2($tgts, *src) ) { YYERROR; }
} );
if( p != $tgts->targets.end() ) {
- error_msg(@src, "cannot MOVE %s "
- "to numeric receiving field %s",
+ error_msg(@src, "cannot MOVE %qs "
+ "to numeric receiving field %qs",
constant_of(constant_index($src))->name,
field->name );
YYERROR;
}
auto litcon = field.name[0] == '_'? "literal" : "constant";
error_msg(@literal, "%s is a %s", value_str, litcon);
- $$ = NULL;
+ $$ = nullptr;
}
| error
{
error_at = yytext;
error_msg(first_line_of(@1), "invalid receiving operand");
}
- $$ = NULL;
+ $$ = nullptr;
}
;
location_set(@1);
$$ = new cbl_refer_t( new_tempnumeric(none_e) );
dialect_ok(@1, IbmLengthOf, "LENGTH OF");
- parser_set_numeric($$->field, $val->data.capacity);
+ parser_set_numeric($$->field, $val->data.capacity());
}
| LENGTH_OF name[val] subscripts[subs] {
location_set(@1);
cbl_refer_t r1($val);
subscript_dimension_error( @subs, $subs->refers.size(), &r1 );
}
- parser_set_numeric($$->field, $val->data.capacity);
+ parser_set_numeric($$->field, $val->data.capacity());
}
;
cce_expr: cce_factor
| cce_expr '+' cce_expr {
- real_arithmetic (&$$, PLUS_EXPR, &$1, &$3);
- real_convert (&$$, TYPE_MODE (float128_type_node), &$$);
+ $$.s = nullptr;
+ real_arithmetic (&$$.r, PLUS_EXPR, &$1.r, &$3.r);
+ real_convert (&$$.r, TYPE_MODE (float128_type_node), &$$.r);
}
| cce_expr '-' cce_expr {
- real_arithmetic (&$$, MINUS_EXPR, &$1, &$3);
- real_convert (&$$, TYPE_MODE (float128_type_node), &$$);
+ $$.s = nullptr;
+ real_arithmetic (&$$.r, MINUS_EXPR, &$1.r, &$3.r);
+ real_convert (&$$.r, TYPE_MODE (float128_type_node), &$$.r);
}
| cce_expr '*' cce_expr {
- real_arithmetic (&$$, MULT_EXPR, &$1, &$3);
- real_convert (&$$, TYPE_MODE (float128_type_node), &$$);
+ $$.s = nullptr;
+ real_arithmetic (&$$.r, MULT_EXPR, &$1.r, &$3.r);
+ real_convert (&$$.r, TYPE_MODE (float128_type_node), &$$.r);
}
| cce_expr '/' cce_expr {
- real_arithmetic (&$$, RDIV_EXPR, &$1, &$3);
- real_convert (&$$, TYPE_MODE (float128_type_node), &$$);
+ $$.s = nullptr;
+ real_arithmetic (&$$.r, RDIV_EXPR, &$1.r, &$3.r);
+ real_convert (&$$.r, TYPE_MODE (float128_type_node), &$$.r);
}
- | '+' cce_expr %prec NEG { $$ = $2; }
- | '-' cce_expr %prec NEG { $$ = real_value_negate (&$2); }
- | '(' cce_expr ')' { $$ = $2; }
+ | '+' cce_expr %prec NEG { $$.r = $2.r; }
+ | '-' cce_expr %prec NEG { $$.r = real_value_negate (&$2.r); }
+ | '(' cce_expr ')' { $$.r = $2.r; }
;
cce_factor: NUMSTR {
/* real_from_string does not allow arbitrary radix. */
// When DECIMAL IS COMMA, commas act as decimal points.
gcc_assert($1.radix == decimal_e);
- auto p = $1.string, pend = p + strlen(p);
- std::replace(p, pend, ',', '.');
- real_from_string3( &$$, $1.string,
+ $$.s = $1.string;
+ std::string numstr($1.string);
+ std::replace(numstr.begin(), numstr.end(), ',', '.');
+ real_from_string3( &$$.r, numstr.c_str(),
TYPE_MODE (float128_type_node) );
}
;
{
statement_begin(@1, STOP);
dialect_ok(@2, IbmStopNumber, "STOP <number>");
- cbl_refer_t status( new_literal($status.string, $status.radix) );
+ cbl_refer_t status( new_literal(@status, $status.string, $status.radix) );
parser_see_stop_run( status, NULL );
}
| STOP LITERAL[name] // CCVS-85 && IBM syntax
stop_status: status { $$ = NULL; }
| status scalar { $$ = $2; }
| status NUMSTR {
- $$ = new_reference(new_literal($2.string, $2.radix));
+ $$ = new_reference(new_literal(@2, $2.string, $2.radix));
}
;
| arg_list any_arg { $1->push_back($2); $$ = $1; }
;
any_arg: expr
- | LITERAL {$$ = new_reference(new_literal($1, quoted_e)); }
+ | LITERAL {$$ = new_reference(new_literal(@1, $1, quoted_e)); }
;
/*
location_set(@1);
$$ = new_tempnumeric(none_e);
dialect_ok(@1, IbmLengthOf, "LENGTH OF");
- parser_set_numeric($$, $val->data.capacity);
+ parser_set_numeric($$, $val->data.capacity());
}
| LENGTH_OF name[val] subscripts[subs] {
location_set(@1);
cbl_refer_t r1($val);
subscript_dimension_error( @subs, $subs->refers.size(), &r1 );
}
- parser_set_numeric($$, $val->data.capacity);
+ parser_set_numeric($$, $val->data.capacity());
}
;
-num_literal: NUMSTR { $$ = new_literal($1.string, $1.radix); }
+num_constant: NUMSTR {
+ if( $1.is_float ) {
+ $$ = new_literal_float(@1, $1.string);
+ } else {
+ $$ = new_literal(@1, $1.string, $1.radix); }
+ }
+ ;
+num_literal: num_constant
| ZERO { $$ = constant_of(constant_index(ZERO)); }
;
cbl_refer_t r1($val);
subscript_dimension_error( @subs, $subs->refers.size(), &r1 );
}
- parser_set_numeric($$->field, $val->data.capacity);
+ parser_set_numeric($$->field, $val->data.capacity());
}
;
$$ = $1.isymbol()?
cbl_field_of(symbol_at($1.isymbol()))
:
- new_literal($1, quoted_e);
- }
- | NUMSTR
- {
- $$ = new_literal($1.string, $1.radix);
+ new_literal(@1, $1, quoted_e);
}
+ | num_constant
| DATETIME_FMT
{
$$ = new_literal(strlen($1), $1, quoted_e);
auto namelocs( name_queue.pop() );
auto names( name_queue.namelist_of(namelocs) );
auto inner = namelocs.back();
- if( ($$ = field_find(names)) == NULL ) {
+ if( ($$ = field_find(@1, names)) == NULL ) {
error_msg(inner.loc, "no record name '%s'", inner.name);
YYERROR;
}
* number of lines is negative. So, we use the
* negative Number Of The Beast as a PAGE flag.
*/
- $$ = new_reference( new_literal(xstrdup("-666")) );
+ $$ = new_reference( new_constant(xstrdup("-666")) );
}
| device_name { $$ = new_reference(literally_one); }
;
auto literal = $src.isymbol()?
cbl_field_of(symbol_at($src.isymbol()))
:
- new_literal($src, quoted_e);
+ new_literal(@src, $src, quoted_e);
ast_set_pointers($tgts->targets, literal);
}
// Format 12 (save-locale):
}
;
search_expr: expr
- | LITERAL { $$ = new_reference(new_literal($1, quoted_e)); }
+ | LITERAL { $$ = new_reference(new_literal(@1, $1, quoted_e)); }
;
sort: sort_table
std::vector <cbl_key_t> keys($sort_keys->key_list.size());
if( ! is_table($table->field) ) {
error_msg(@1, "%s has no OCCURS clause", $table->field->name);
+ YYERROR;
}
// 23) If data-name-1 is omitted, the data item referenced by
// data-name-2 is the key data item.
statement_begin(@1, SORT);
if( ! is_table($table->field) ) {
error_msg(@1, "%s has no OCCURS clause", $table->field->name);
+ YYERROR;
}
cbl_key_t
key = cbl_key_t($table->field->occurs.keys[0]),
{
if( $all ) {
$replace_oper->all = true;
- if( is_literal($replace_oper->field) ) {
- if( $replace_oper->field->data.capacity != 1 ) {
+ cbl_field_t *field( $replace_oper->field );
+ if( is_literal(field) ) {
+ if( field->data.capacity() != field->codeset.stride() ) {
error_msg(@all, "ALL %s must be a single character",
- $replace_oper->field->data.initial);
+ field->data.initial);
YYERROR;
}
} else {
*replace = $replace_oper->field;
if( is_literal(match) && is_literal(replace) ) {
if( !$match->all && !$replace_oper->all) {
- if( match->data.capacity != replace->data.capacity ) {
+ if( match->data.capacity() != replace->data.capacity() ) {
// Make a copy of replace, because nice_name returns a static
char *replace_name = xstrdup(nice_name_of(replace));
error_msg(@match, "%qs, size %u NOT EQUAL %qs, size %u",
- nice_name_of(match), match->data.capacity,
- replace_name, replace->data.capacity);
+ nice_name_of(match), match->char_capacity(),
+ replace_name, replace->char_capacity());
free(replace_name);
YYERROR;
}
}
;
-alphaval: LITERAL { $$ = new_reference(new_literal($1, quoted_e)); }
+alphaval: LITERAL { $$ = new_reference(new_literal(@1, $1, quoted_e)); }
| reserved_value
{
$$ = new_reference( constant_of(constant_index($1)) );
}
;
init_data: alpha_val
- | NUMSTR {
- $$ = new_reference(new_literal($1.string, $1.radix));
- }
+ | num_constant { $$ = new_reference($1); }
;
call: call_impl end_call
entry: ENTRY LITERAL
{ statement_begin(@1, ENTRY);
- auto name = new_literal($2, quoted_e);
+ auto name = new_literal(@2, $2, quoted_e);
parser_entry( name );
}
| ENTRY LITERAL USING parameters
{ statement_begin(@1, ENTRY);
- auto name = new_literal($2, quoted_e);
+ auto name = new_literal(@2, $2, quoted_e);
ffi_args_t *params = $parameters;
size_t narg = params? params->elems.size() : 0;
cbl_ffi_arg_t *pargs = NULL;
{
// Pretend hex-encoded because that means use verbatim.
auto attr = cbl_field_attr_t(quoted_e | hex_encoded_e);
- $$ = new_reference(new_literal($1, attr));
+ $$ = new_reference(new_literal(@1, $1, attr));
}
;
{
$$ = new cbl_ffi_arg_t(by_reference_e, $refer);
}
+ | LITERAL
+ {
+ cbl_message(@1, MfCallLiteral,
+ "cannot pass %qs BY REFERENCE", $1.data);
+ cbl_refer_t *r = new_reference(new_literal(@1, $1, quoted_e));
+ $$ = new cbl_ffi_arg_t(by_content_e, r);
+ }
| ADDRESS OF scalar_arg[refer]
{
$$ = new cbl_ffi_arg_t(by_reference_e, $refer, address_of_e);
}
| LITERAL
{
- cbl_refer_t *r = new_reference(new_literal($1, quoted_e));
+ cbl_refer_t *r = new_reference(new_literal(@1, $1, quoted_e));
$$ = new cbl_ffi_arg_t(by_content_e, r);
}
| OMITTED
}
| cce_expr %prec NAME
{
- auto r = new_reference(new_literal(string_of($1)));
+ const char *s = $1.s? $1.s : string_of($1.r);
+ auto r = new_reference(new_literal(@1, s));
$$ = new cbl_ffi_arg_t(by_value_e, r);
}
| ADDRESS OF scalar
;
str_input: scalar
- | LITERAL { $$ = new_reference(new_literal($1, quoted_e)); }
+ | LITERAL { $$ = new_reference(new_literal(@1, $1, quoted_e)); }
| reserved_value
{
$$ = new_reference(constant_of(constant_index($1)));
;
str_size: SIZE { $$ = new_reference(NULL); }
- | LITERAL { $$ = new_reference(new_literal($1, quoted_e)); }
+ | LITERAL { $$ = new_reference(new_literal(@1, $1, quoted_e)); }
| scalar
| reserved_value
{
| intrinsic_call
| LITERAL
{
- $$ = new_reference(new_literal($1, quoted_e));
+ $$ = new_reference(new_literal(@1, $1, quoted_e));
}
;
// Pretend hex-encoded because that means use verbatim.
auto attr = cbl_field_attr_t(quoted_e | hex_encoded_e);
auto name = new_literal(strlen(L->name), L->name, attr);
+ symbol_temporary_location(name, @1);
ast_call( @1, name, $$, args.size(), args.data(), NULL, NULL, true );
}
| FUNCTION_UDF_0 {
// Pretend hex-encoded because that means use verbatim.
auto attr = cbl_field_attr_t(quoted_e | hex_encoded_e);
auto name = new_literal(strlen(L->name), L->name, attr);
+ symbol_temporary_location(name, @1);
ast_call( @1, name, $$, narg, args, NULL, NULL, true );
}
;
$$ = new_alphanumeric(1,"CHAR");
if( ! intrinsic_call_1($$, CHAR, $r1, @r1)) YYERROR;
}
-
- | CONVERT '(' varg[r1] convert_src[src] convert_dst[dst] ')' {
+ /* convert formulations:
+ * 1. ANY to ALNUM HEX, or NAT HEX
+ * 2. HEX to BYTE
+ * 3. ALNUM to NAT, ALNUM HEX, or NAT HEX
+ * 4. NAT to ALNUM, ALNUM HEX, or NAT HEX
+ */
+ | CONVERT '(' varg[r1] ANY convert_alpha[dst] convert_hex[hex]')' {
+ location_set(@1);
+ $$ = new_alphanumeric("CONVERT");
+ unsigned int dst = ($dst | $hex );
+ parser_intrinsic_convert($$, *$r1, convert_any_e, dst);
+ }
+ | CONVERT '(' varg[r1] ANY convert_nat[dst] convert_hex[hex]')' {
+ location_set(@1);
+ $$ = new_alphanumeric("CONVERT", current_encoding('N'));
+ unsigned int dst = ($dst | $hex );
+ parser_intrinsic_convert($$, *$r1, convert_any_e, dst);
+ }
+ | CONVERT '(' varg[r1] HEX BYTE ')' {
+ location_set(@1);
+ $$ = new_alphanumeric("CONVERT");
+ parser_intrinsic_convert($$, *$r1, convert_hex_e, convert_byte_e);
+ }
+ | CONVERT '(' varg[r1] convert_alpha[src] convert_nat[dst] ')' {
+ location_set(@1);
+ $$ = new_alphanumeric("CONVERT", current_encoding('N'));
+ auto src = convert_type_t($src);
+ parser_intrinsic_convert($$, *$r1, src, $dst);
+ }
+ | CONVERT '(' varg[r1] convert_alpha[src] convert_alpha[dst] convert_hex[hex] ')' {
+ location_set(@1);
+ $$ = new_alphanumeric("CONVERT");
+ auto src = convert_type_t($src);
+ unsigned int dst = ($dst | $hex );
+ parser_intrinsic_convert($$, *$r1, src, dst);
+ }
+ | CONVERT '(' varg[r1] convert_nat[src] convert_alpha[dst] ')' {
+ location_set(@1);
+ $$ = new_alphanumeric("CONVERT");
+ auto src = convert_type_t($src);
+ parser_intrinsic_convert($$, *$r1, src, $dst);
+ }
+ | CONVERT '(' varg[r1] convert_nat[src] convert_nat[dst] convert_hex[hex] ')' {
location_set(@1);
- $$ = new_alphanumeric(1,"CONVERT");
- cbl_unimplemented("CONVERT");
- /* if( ! intrinsic_call_3($$, CONVERT, $r1, $src, $dst) ) YYERROR; */
+ $$ = new_alphanumeric("CONVERT", current_encoding('N'));
+ auto src = convert_type_t($src);
+ unsigned int dst = ($dst | $hex );
+ parser_intrinsic_convert($$, *$r1, src, dst);
}
| DISPLAY_OF '(' varg[r1] ')' {
parser_exception_file( $$, $filename );
}
- | FIND_STRING '(' varg[r1] last start_after anycase ')' {
+ /* FIND-STRING argument-1 argument-2
+ * [LAST] [[START AFTER] argument-3] [ANYCASE] */
+ | FIND_STRING '(' varg[r1] varg[r2] last start_after[after] anycase ')' {
location_set(@1);
- $$ = new_alphanumeric("FIND-STRING");
- /* auto r1 = new_reference(new_literal(strlen($r1), $r1, quoted_e)); */
- cbl_unimplemented("%<FIND_STRING%>");
- /* if( ! intrinsic_call_4($$, FIND_STRING, r1, $r2) ) YYERROR; */
+ $$ = new_tempnumeric("FIND-STRING");
+ if( $after && ! is_numeric($after->field) ) {
+ error_msg(@after, "START AFTER %qs must be numeric", $after->name());
+ YYERROR;
+ }
+ parser_intrinsic_find_string($$, *$r1, *$r2, $after, $last, $anycase);
}
| FORMATTED_DATE '(' DATE_FMT[r1] expr[r2] ')' {
location_set(@1);
$$ = new_alphanumeric(MAXLENGTH_FORMATTED_DATE, "FORMATTED-DATE");
auto r1 = new_reference(new_literal(strlen($r1), $r1, quoted_e));
+ symbol_temporary_location(r1->field, @r1);
if( ! intrinsic_call_2($$, FORMATTED_DATE, r1, $r2) ) YYERROR;
}
location_set(@1);
$$ = new_alphanumeric(MAXLENGTH_FORMATTED_DATETIME, "FORMATTED-DATETIME");
auto r1 = new_reference(new_literal(strlen($r1), $r1, quoted_e));
+ symbol_temporary_location(r1->field, @r1);
static cbl_refer_t r3(literally_zero);
if( ! intrinsic_call_4($$, FORMATTED_DATETIME,
r1, $r2, $r3, &r3) ) YYERROR;
location_set(@1);
$$ = new_alphanumeric(MAXLENGTH_FORMATTED_DATETIME, "FORMATTED-DATETIME");
auto r1 = new_reference(new_literal(strlen($r1), $r1, quoted_e));
+ symbol_temporary_location(r1->field, @r1);
if( ! intrinsic_call_4($$, FORMATTED_DATETIME,
r1, $r2, $r3, $r4) ) YYERROR;
}
location_set(@1);
$$ = new_alphanumeric(MAXLENGTH_FORMATTED_TIME, "FORMATTED-DATETIME");
auto r1 = new_reference(new_literal(strlen($r1), $r1, quoted_e));
+ symbol_temporary_location(r1->field, @r1);
if( ! intrinsic_call_3($$, FORMATTED_TIME,
r1, $r2, $r3) ) YYERROR;
}
| FORMATTED_TIME '(' TIME_FMT[r1] expr[r2] ')' {
location_set(@1);
$$ = new_alphanumeric(MAXLENGTH_FORMATTED_TIME, "FORMATTED-TIME");
- auto r3 = new_reference(new_literal("0"));
auto r1 = new_reference(new_literal(strlen($r1), $r1, quoted_e));
+ auto r3 = new_reference(new_constant("0"));
+ symbol_temporary_location(r1->field, @r1);
if( ! intrinsic_call_3($$, FORMATTED_TIME,
r1, $r2, r3) ) YYERROR;
}
location_set(@1);
$$ = new_alphanumeric(MAXLENGTH_FORMATTED_DATETIME, "FORMATTED-CURRENT_DATE");
auto r1 = new_reference(new_literal(strlen($r1), $r1, quoted_e));
+ symbol_temporary_location(r1->field, @r1);
if( ! intrinsic_call_1($$, FORMATTED_CURRENT_DATE, r1, @r1) )
YYERROR;
}
| TEST_FORMATTED_DATETIME '(' DATE_FMT[r1] varg[r2] ')' {
- location_set(@1);
- $$ = new_tempnumeric("TEST-FORMATTED-DATETIME");
+ location_set(@1);
+ $$ = new_tempnumeric("TEST-FORMATTED-DATETIME");
auto r1 = new_reference(new_literal(strlen($r1), $r1, quoted_e));
+ symbol_temporary_location(r1->field, @r1);
if( ! intrinsic_call_2($$, TEST_FORMATTED_DATETIME,
r1, $r2) ) YYERROR;
}
location_set(@1);
$$ = new_tempnumeric("TEST-FORMATTED-DATETIME");
auto r1 = new_reference(new_literal(strlen($r1), $r1, quoted_e));
+ symbol_temporary_location(r1->field, @r1);
if( ! intrinsic_call_2($$, TEST_FORMATTED_DATETIME,
r1, $r2) ) YYERROR;
}
location_set(@1);
$$ = new_tempnumeric("TEST-FORMATTED-DATETIME");
auto r1 = new_reference(new_literal(strlen($r1), $r1, quoted_e));
+ symbol_temporary_location(r1->field, @r1);
if( ! intrinsic_call_2($$, TEST_FORMATTED_DATETIME,
r1, $r2) ) YYERROR;
}
location_set(@1);
$$ = new_tempnumeric("INTEGER-OF-FORMATTED-DATE");
auto r1 = new_reference(new_literal(strlen($r1), $r1, quoted_e));
+ symbol_temporary_location(r1->field, @r1);
if( ! intrinsic_call_2($$, INTEGER_OF_FORMATTED_DATE,
r1, $r2) ) YYERROR;
}
location_set(@1);
$$ = new_tempnumeric("INTEGER-OF-FORMATTED-DATE");
auto r1 = new_reference(new_literal(strlen($r1), $r1, quoted_e));
+ symbol_temporary_location(r1->field, @r1);
if( ! intrinsic_call_2($$, INTEGER_OF_FORMATTED_DATE,
r1, $r2) ) YYERROR;
}
location_set(@1);
$$ = new_tempnumeric("SECONDS-FROM-FORMATTED-TIME");
auto r1 = new_reference(new_literal(strlen($r1), $r1, quoted_e));
+ symbol_temporary_location(r1->field, @r1);
if( ! intrinsic_call_2($$, SECONDS_FROM_FORMATTED_TIME,
r1, $r2) ) YYERROR;
}
location_set(@1);
$$ = new_tempnumeric("SECONDS-FROM-FORMATTED-TIME");
auto r1 = new_reference(new_literal(strlen($r1), $r1, quoted_e));
+ symbol_temporary_location(r1->field, @r1);
if( ! intrinsic_call_2($$, SECONDS_FROM_FORMATTED_TIME,
r1, $r2) ) YYERROR;
}
| LENGTH '(' varg1a[val] ')' {
location_set(@1);
$$ = new_tempnumeric("LENGTH", none_e);
- parser_set_numeric($$, $val->field->data.capacity);
+ parser_set_numeric($$, $val->field->data.capacity());
if( ! intrinsic_call_1($$, LENGTH, $val, @val)) YYERROR;
}
| lopper_case[func] '(' alpha_val[r1] ')' {
location_set(@1);
- $$ = new_alphanumeric($r1->field->data.capacity, "lopper_case[func]");
+ $$ = new_alphanumeric($r1->field->data.capacity(), "lopper_case[func]");
if( ! intrinsic_call_1($$, $func, $r1, @r1)) YYERROR;
}
{
location_set(@1);
static auto r2 = new_reference(FldNumericDisplay, "50");
- static auto one = new cbl_refer_t( new_literal("1") );
- static auto four = new cbl_refer_t( new_literal("4") );
+ static auto one = new cbl_refer_t( new_constant("1") );
+ static auto four = new cbl_refer_t( new_constant("4") );
cbl_span_t year(one, four);
auto r3 = new_reference(new_alphanumeric(MAXLENGTH_CALENDAR_DATE));
r3->refmod = year;
| DATE_TO_YYYYMMDD '(' expr[r1] expr[r2] ')'
{
location_set(@1);
- static auto one = new cbl_refer_t( new_literal("1") );
- static auto four = new cbl_refer_t( new_literal("4") );
+ static auto one = new cbl_refer_t( new_constant("1") );
+ static auto four = new cbl_refer_t( new_constant("4") );
cbl_span_t year(one, four);
auto r3 = new_reference(new_alphanumeric(MAXLENGTH_CALENDAR_DATE));
r3->refmod = year;
{
location_set(@1);
static auto r2 = new_reference(FldNumericDisplay, "50");
- static auto one = new cbl_refer_t( new_literal("1") );
- static auto four = new cbl_refer_t( new_literal("4") );
+ static auto one = new cbl_refer_t( new_constant("1") );
+ static auto four = new cbl_refer_t( new_constant("4") );
cbl_span_t year(one, four);
auto r3 = new_reference(new_alphanumeric(MAXLENGTH_CALENDAR_DATE));
r3->refmod = year;
| DAY_TO_YYYYDDD '(' expr[r1] expr[r2] ')'
{
location_set(@1);
- static auto one = new cbl_refer_t( new_literal("1") );
- static auto four = new cbl_refer_t( new_literal("4") );
+ static auto one = new cbl_refer_t( new_constant("1") );
+ static auto four = new cbl_refer_t( new_constant("4") );
cbl_span_t year(one, four);
auto r3 = new_reference(new_alphanumeric(MAXLENGTH_CALENDAR_DATE));
r3->refmod = year;
| YEAR_TO_YYYY '(' expr[r1] ')'
{
location_set(@1);
- static auto r2 = new_reference(new_literal("50", decimal_e));
- static auto one = new cbl_refer_t( new_literal("1") );
- static auto four = new cbl_refer_t( new_literal("4") );
+ static auto r2 = new_reference(new_constant("50"));
+ static auto one = new cbl_refer_t( new_constant("1") );
+ static auto four = new cbl_refer_t( new_constant("4") );
cbl_span_t year(one, four);
auto r3 = new_reference(new_alphanumeric(MAXLENGTH_CALENDAR_DATE));
r3->refmod = year;
| YEAR_TO_YYYY '(' expr[r1] expr[r2] ')'
{
location_set(@1);
- static auto one = new cbl_refer_t( new_literal("1") );
- static auto four = new cbl_refer_t( new_literal("4") );
+ static auto one = new cbl_refer_t( new_constant("1") );
+ static auto four = new cbl_refer_t( new_constant("4") );
cbl_span_t year(one, four);
auto r3 = new_reference(new_alphanumeric(MAXLENGTH_CALENDAR_DATE));
r3->refmod = year;
| TOP_LEVEL { $$ = module_toplevel_e; }
;
-convert_src: ANY
- | HEX
- | convert_fmt
+//convert_fmt: convert_alpha
+// | convert_nat
+// ;
+convert_alpha: ALPHANUMERIC { $$ = convert_alpha_e; }
+ | ANUM { $$ = convert_alpha_e; }
;
-convert_dst: convert_fmt HEX
- | BYTE
- ;
-convert_fmt: ALPHANUMERIC
- | ANUM
- | NAT
- | NATIONAL
+convert_nat: NAT { $$ = convert_nat_e; }
+ | NATIONAL { $$ = convert_nat_e; }
;
+convert_hex: HEX { $$ = convert_hex_e; }
+ | HEX JUSTIFIED { $$ = convert_just_e; }
+ | HEX JUSTIFIED RIGHT { $$ = convert_rjust_e; }
+ ;
numval_locale: %empty {
$$.is_locale = false;
{
location_set(@1);
$$ = new_alphanumeric();
- cbl_refer_t locale(new_literal($locale_name));
+ cbl_refer_t locale(new_literal(@locale_name, $locale_name));
if( ! intrinsic_call_3($$, LOCALE_COMPARE, $r1, $r2, &locale) ) YYERROR;
}
| UPPER_CASE { $$ = UPPER_CASE; }
;
-trim_trailing: %empty { $$ = new_literal("0"); } // Remove both
- | LEADING { $$ = new_literal("1"); } // Remove leading spaces
- | TRAILING { $$ = new_literal("2"); } // Remove trailing spaces
+trim_trailing: %empty { $$ = new_constant("0"); } // Remove both
+ | LEADING { $$ = new_constant("1"); } // Remove leading spaces
+ | TRAILING { $$ = new_constant("2"); } // Remove trailing spaces
;
intrinsic0: CURRENT_DATE {
location_set(@1);
// Returns YYYYMMDDhhmmssss-0500)
$$ = new_alphanumeric(MAXLENGTH_CALENDAR_DATE, "WHEN-COMPILED");
- parser_intrinsic_call_0( $$, "__gg__when_compiled" );
+ parser_intrinsic_call_0( $$, "__gg__when_compiled" );
}
;
| KEY
;
-last: %empty %prec LAST
- | LAST
+last: %empty { $$ = false; } %prec LAST
+ | LAST { $$ = true; }
;
lines: %empty
| UNSIGNED_kw { $$ = false; }
;
-start_after: %empty %prec AFTER
- | START AFTER varg
+start_after: %empty { $$ = nullptr; } %prec AFTER
+ | START AFTER start_pos { $$ = $start_pos; }
+ | start_pos { $$ = $start_pos; }
;
+start_pos: num_constant { $$ = new_reference($1); }
+ | scalar
+ ;
status: %empty
| STATUS
xmlgen_nameof xmlgen_typeof xmlgen_suppress
;
-xmlgen_count: %empty
- | COUNT in name[id3]
+xmlgen_count: %empty { $$ = nullptr; }
+ | COUNT in name[id3] { $$ = $id3; }
;
-xmlgen_decl: %empty
- | with XML_DECLARATION with ATTRIBUTES
+xmlgen_decl: %empty { $$ = {}; }
+ | with XML_DECLARATION with ATTRIBUTES { $$ = {true, true}; }
+ | with XML_DECLARATION { $$ = {true, false}; }
+ | with ATTRIBUTES { $$ = {false, true}; }
;
xmlgen_namespace:
- %empty
+ %empty
| NAMESPACE is name[id4] namespace_prefix
;
namespace_prefix:
- %empty
- | NAMESPACE_PREFIX is namestr[id5]
+ %empty
+ | NAMESPACE_PREFIX is name[id5]
+ | NAMESPACE_PREFIX is LITERAL[id5]
;
xmlgen_nameof: %empty
| NAME of xmlgen_ids
}
static inline cbl_field_t *
-new_literal( const char initial[], enum radix_t radix ) {
+new_literal( const cbl_loc_t loc, const char initial[], enum radix_t radix ) {
auto attr = constant_e;
switch( radix ) {
attr = bool_encoded_e;
break;
}
- return new_literal(strlen(initial), initial,
- cbl_field_attr_t(constant_e | attr));
+ auto field = new_literal(strlen(initial), initial,
+ cbl_field_attr_t(constant_e | attr));
+ symbol_temporary_location(field, loc);
+ return field;
}
class is_elementary_type { // for INITIALIZE purposes
};
-static const uint32_t * initial_default_value;
- const uint32_t * wsclear() { return initial_default_value; }
+static const uint32_t *initial_default_value;
+const uint32_t *
+wsclear() {
+ return initial_default_value;
+}
void
wsclear( uint32_t i ) {
char ach[5];
int v = *s;
sprintf(ach, "%d", v);
- source.field = new_literal(ach);
+ source.field = new_constant(ach);
source.addr_of = true;
}
if( value_category == data_category_all ||
value_category == data_category_of(tgt) ) {
// apply any applicable VALUE
- if( explicitly || tgt.field->data.initial ) {
+ if( explicitly || tgt.field->data.original() ) {
assert( with_filler || !tgt.field->has_attr(filler_e) );
- if( tgt.field->data.initial ) {
+ if( tgt.field->data.original() ) {
parser_initialize(tgt);
}
}
size_t first, second;
first = second = group_offset(span.first);
if( ! span.second ) {
- second += std::max(span.first->data.capacity,
+ second += std::max(span.first->data.capacity(),
span.first->data.memsize);
} else {
second = group_offset(span.second)
- group_offset(span.first);
- second += std::max(span.second->data.capacity,
+ second += std::max(span.second->data.capacity(),
span.second->data.memsize);
}
return std::make_pair(first, second);
}
cbl_field_t *
-new_literal( const literal_t& lit, enum cbl_field_attr_t attr ) {
+new_literal( const cbl_loc_t loc, const literal_t& lit, enum cbl_field_attr_t attr ) {
bool zstring = lit.prefix[0] == 'Z';
if( !zstring && lit.data[lit.len] != '\0' ) {
dbgmsg("%s:%d: line %d, no NUL terminator '%-*.*s'{"
attrs |= constant_e;
attrs |= literal_attr(lit.prefix);
- return new_literal(lit.len, lit.data, cbl_field_attr_t(attrs), lit.encoding);
+ auto field = new_literal(lit.len, lit.data, cbl_field_attr_t(attrs), lit.encoding);
+ symbol_temporary_location(field, loc);
+ return field;
}
+
bool
cbl_file_t::validate_forward( size_t isym ) const {
if( isym > 0 && FldForward == symbol_field_forward(isym)->type ) {
assert(is_literal(field));
set_prefix( "", 0 );
- set_data( field->data.capacity,
+ set_data( field->data.capacity(),
const_cast<char*>(field->data.initial),
field_index(field) );
return *this;
cbl_field_t::value_str() const {
if( data.etc_type == cbl_field_data_t::value_e )
return string_of( data.value_of() );
- return "???";
+ return data.etc_type_str();
}
static const cbl_division_t not_syntax_only = cbl_division_t(-1);
static bool internal_ebcdic_locked = false;
void internal_ebcdic_lock() {
- internal_ebcdic_locked = true;
+//// internal_ebcdic_locked = true;
}
void internal_ebcdic_unlock() {
internal_ebcdic_locked = false;
bool
cobol_gcobol_feature_set( cbl_gcobol_feature_t gcobol_feature, bool on ) {
- if( gcobol_feature == feature_internal_ebcdic_e ) {
- if( internal_ebcdic_locked ) return false;
- if( ! on ) gcc_unreachable();
- current.default_encoding.set(EBCDIC_e);
- }
if( on ) {
cbl_gcobol_features |= gcobol_feature;
} else {
literal_refmod_valid( YYLTYPE loc, const cbl_refer_t& r ) {
if( r.field->has_attr(any_length_e) ) return true;
+ unsigned int nchar = r.field->char_capacity();
+
const cbl_span_t& refmod(r.refmod);
if( ! is_literal(refmod.from->field) ) {
if( ! is_literal(refmod.len->field) ) return true;
auto edge = refmod.len->field->as_integer();
if( 0 < edge ) {
- if( edge-1 < r.field->data.capacity ) return true;
+ if( edge-1 < nchar ) return true;
}
// len < 0 or not: 0 < from + len <= capacity
error_msg(loc, "%s(%s:%zu) out of bounds, "
r.field->name,
refmod.from->name(),
size_t(edge),
- static_cast<unsigned int>(r.field->data.capacity) );
+ nchar );
return false;
}
auto edge = refmod.from->field->as_integer();
if( edge > 0 ) {
- if( --edge < r.field->data.capacity ) {
+ if( --edge < nchar ) {
if( ! refmod.len ) return true;
if( ! is_literal(refmod.len->field) ) return true;
auto len = refmod.len->field->as_integer();
if( len > 0 ) {
edge += len;
- if( --edge < r.field->data.capacity ) return true;
+ if( --edge < nchar ) return true;
}
// len < 0 or not: 0 < from + len <= capacity
loc = symbol_field_location(field_index(r.field));
r.field->name,
size_t(refmod.from->field->as_integer()),
size_t(len),
- static_cast<unsigned int>(r.field->data.capacity) );
+ nchar );
return false;
}
}
error_msg(loc,"%s(%zu) out of bounds, size is %u",
r.field->name,
size_t(refmod.from->field->as_integer()),
- static_cast<unsigned int>(r.field->data.capacity) );
+ nchar );
return false;
}
return result;
}
+/*
+ * Define a binary field according to USAGE: attr, type, and capacity.
+ * Return type.
+ * Do not set initial value; that is up to PICTURE and VALUE.
+ */
+static cbl_field_type_t
+field_binary_usage( YYLTYPE loc, cbl_field_t *field,
+ cbl_field_type_t type, uint32_t capacity,
+ bool signable )
+{
+ bool infer = true;
+
+ // Some binary types have defined capacity;
+ switch(type) {
+ // COMPUTATIONAL and COMP-5 rely on PICTURE.
+ case FldNumericBinary:
+ field->attr |= big_endian_e;
+ __attribute__((fallthrough));
+ case FldNumericBin5:
+ // If no capacity yet, then no picture, infer $comp.capacity.
+ // If field has capacity, ensure USAGE is compatible.
+ if( field->data.capacity() > 0 ) { // PICTURE before USAGE
+ infer = false;
+ auto true_capacity = field->data.capacity();
+
+ switch( field->type ) {
+ case FldAlphanumeric: // PIC X COMP-5 or COMP-X
+ assert( field->data.digits == 0 );
+ assert( field->data.rdigits == 0 );
+ dialect_ok(loc, MfCompX, "alphanumeric PICTURE with numeric USAGE");
+ symbol_field_type_update(field, type, true);
+ field->clear_attr(signable_e);
+ // In case PIC X for UTF-16 (say), the X's represent digits.
+ true_capacity /= field->codeset.stride();
+ field->data.capacity(true_capacity);
+ break;
+ case FldNumericDisplay: // PIC 9 COMP-5 or COMP-X
+ if( capacity == 0xFF ) { // comp-x is a bit like comp-5
+ assert( field->data.digits == field->data.capacity() );
+ dialect_ok(loc, MfCompX, "alphanumeric PICTURE with numeric USAGE");
+ }
+ symbol_field_type_update(field, type, true);
+ capacity = type_capacity(field->type, field->data.digits);
+ field->data.capacity(capacity);
+ field->blank_initial(capacity / field->codeset.stride());
+ break;
+ case FldInvalid:
+ symbol_field_type_update(field, type, true);
+ field->data.capacity(capacity);
+ if( signable ) field->set_attr(signable_e);
+ gcc_assert(field->type == FldNumericBin5);
+ break;
+ default:
+ break;
+ }
+ }
+ break;
+ case FldPacked: // comp-6 is unsigned comp-3
+ assert(! signable); // else PACKED_DECIMAL from scanner
+ field->set_attr(separate_e);
+ dialect_ok(loc, MfComp6, "COMP-6");
+ if( symbol_field_type_update(field, type, true) ) {
+ infer = false;
+ field->set_capacity(type_capacity(field->type, field->data.digits));
+ } else {
+ error_msg(loc, "USAGE PACKED DECIMAL conflicts with PICTURE");
+ }
+ break;
+ default:
+ break;
+ }
+
+ if( infer ) {
+ symbol_field_type_update(field, type, true);
+ if( capacity > 0 ) {
+ if( field->data.capacity() > 0 ) {
+ error_msg(loc, "%s is BINARY type, incompatible with PICTURE",
+ field->name);
+ }
+ if( signable ) {
+ field->set_attr(signable_e);
+ }
+ field->set_capacity(capacity);
+ }
+ }
+
+ return field->type;
+}
#include <stack>
#include <string>
-#define MAXLENGTH_FORMATTED_DATE 10
-#define MAXLENGTH_FORMATTED_TIME 19
-#define MAXLENGTH_CALENDAR_DATE 21
-#define MAXLENGTH_FORMATTED_DATETIME 30
+#define MAXLENGTH_FORMATTED_DATE (10*4)
+#define MAXLENGTH_FORMATTED_TIME (19*4)
+#define MAXLENGTH_CALENDAR_DATE (21*4)
+#define MAXLENGTH_FORMATTED_DATETIME (30*4)
#pragma GCC diagnostic push
#pragma GCC diagnostic ignored "-Wmissing-field-initializers"
static const std::set<std::string> compat_programs {
"CBL_ALLOC_MEM",
"CBL_CHECK_FILE_EXIST",
+ "CBL_CLOSE_FILE",
"CBL_DELETE_FILE",
"CBL_FREE_MEM",
+ "CBL_GET_PROGRAM_INFO",
+ "CBL_OPEN_FILE",
+ "CBL_READ_FILE",
+ "CBL_WRITE_FILE",
};
-const char *
-consistent_encoding_check( const YYLTYPE& loc, const char input[] ) {
- cbl_field_t faux = {};
- faux.type = FldAlphanumeric;
- faux.data.capacity = capacity_cast(strlen(input));
- faux.data.initial = input;
-
- auto s = faux.internalize();
- if( !s ) {
- error_msg(loc, "inconsistent string literal encoding for '%s'", input);
- } else {
- if( s != input ) return s;
- }
- return NULL;
-}
-
const char * original_picture();
char * original_number( char input[] = NULL );
typedef_clause_e = 0x8000,
};
+static std::map<data_clause_t,cbl_loc_t> data_clause_locations;
+
static inline bool
has_clause( int data_clauses, data_clause_t clause ) {
return clause == (data_clauses & clause);
cbl_field_t *
new_alphanumeric( size_t capacity = MAXIMUM_ALPHA_LENGTH,
- const cbl_name_t name = nullptr );
+ const cbl_name_t name = nullptr,
+ cbl_encoding_t encoding = no_encoding_e );
static inline cbl_field_t *
-new_alphanumeric( const cbl_name_t name ) {
- return new_alphanumeric(MAXIMUM_ALPHA_LENGTH, name);
+new_alphanumeric( const cbl_name_t name, cbl_encoding_t encoding = no_encoding_e ) {
+ return new_alphanumeric(MAXIMUM_ALPHA_LENGTH, name, encoding);
}
static inline cbl_refer_t *
static const char *
name_of( cbl_field_t *field ) {
assert(field);
- if( field->data.initial == nullptr ) {
- return field->name;
+ if( field->name[0] == '_' && field->data.initial ) {
+ return field->data.original()? field->data.original() : field->data.initial;
}
- return field->name[0] == '_' && field->data.initial?
- field->data.original() : field->name;
+ return field->name;
}
static const char *
bool decide( relop_t op, const cbl_refer_t& object, bool invert ) {
if( pcol == columns.end() ) return false;
dbgmsg("%s() if not %s goto %s", __func__, result->name, when()->name);
-
+
if( compare(op, object, true) ) {
if( invert ) {
parser_logop( result, NULL, not_op, result );
list.push_back(arg);
}
-static list<cbl_domain_t> domains;
-typedef list<cbl_domain_t>::iterator domain_iter;
+struct domain_t : public cbl_domain_t {
+ cbl_encoding_t encoding;
+ domain_t( cbl_encoding_t encoding, const cbl_domain_t& domain )
+ : cbl_domain_t(domain)
+ , encoding(encoding)
+ {}
+ explicit domain_t( const cbl_domain_t& domain )
+ : cbl_domain_t(domain)
+ , encoding( current_encoding('A') )
+ {}
+ bool encoding_ok( cbl_encoding_t enc ) const {
+ return enc == encoding
+ || enc == no_encoding_e
+ || encoding == no_encoding_e
+ || first.is_numeric
+ || last.is_numeric;
+ }
+};
+
+static list<domain_t> domains;
/*
* The name queue is a queue of lists of data-item names recognized by the
std::copy( that.files.begin(), that.files.end(), files.begin() );
}
- static size_t symbol_index( cbl_file_t* file ) {
- return ::symbol_index( symbol_elem_of(file) );
+ static uint64_t symbol_index( cbl_file_t* file ) {
+ uint64_t retval = symbol_unique_index(symbol_elem_of(file));
+ return retval;
}
};
cbl_label_t *declaratives_eval, *paragraph, *section;
const char *collating_sequence;
struct encoding_t {
+ friend bool cobol_gcobol_feature_set( cbl_gcobol_feature_t gcobol_feature,
+ bool on );
struct encoding_base_t {
size_t isym;
cbl_encoding_t encoding;
- encoding_base_t() : isym(0), encoding(CP1252_e) {}
+ encoding_base_t() : isym(0), encoding(custom_encoding_e) {}
encoding_base_t(cbl_encoding_t encoding) : isym(0), encoding(encoding) {}
void set( size_t isym, cbl_encoding_t encoding ) {
this->isym = isym;
this->isym = 0;
this->encoding = encoding;
}
-
} alpha, national;
- encoding_t() : national(EBCDIC_e) {}
+
+ encoding_t() : alpha(alpha_default()), national(national_default()) {}
+
+ bool sizes_ok() const {
+ charmap_t * alp = __gg__get_charmap(alpha.encoding);
+ charmap_t * nat = __gg__get_charmap(national.encoding);
+ return alp->stride() <= nat->stride();
+ }
+
+ protected:
+ /*
+ * Use static default encodings
+ */
+ static cbl_encoding_t alpha_default() {
+ return cbl_field_t::codeset_t::default_encodings.alpha.type;
+ }
+ static cbl_encoding_t national_default() {
+ return cbl_field_t::codeset_t::default_encodings.national.type;
+ }
+
+ // Set static default alpha encoding.
+ // Called only by above friend function in support of ebcdic.
+ static void alpha_default( cbl_encoding_t encoding) {
+ auto alpha = __gg__encoding_iconv_descr(encoding);
+ gcc_assert(alpha);
+ cbl_field_t::codeset_t::default_encodings.alpha = *alpha;
+ }
} alphabet;
-
+
bool locale_add( const cbl_locale_t& locale ) {
auto e = symbol_locale_add(program_index, &locale);
assert(e);
auto p = locales.insert(locale);
return p.second;
}
-
+
cbl_options_t options;
explicit prog_descr_t( size_t isymbol )
rel_part_t antecedent_cache;
public:
- static prog_descr_t::encoding_t::encoding_base_t default_encoding;
-
current_t()
: first_statement(0)
, in_declaratives(false)
tree ena, dcl;
runtime_t() : ena(nullptr), dcl(nullptr) {}
} runtime;
-
+
bool empty() const {
return declaratives_list_t::empty();
}
} );
}
- std::vector<uint64_t>
+ std::vector<uint64_t>
encode() const {
std::vector<uint64_t> encoded;
auto p = std::back_inserter(encoded);
return programs.top().locale_add(locale);
}
+ static inline const char *
+ cbl_encoding_str( cbl_encoding_t encoding ) {
+ return __gg__encoding_iconv_name(encoding);
+ }
+
bool new_program ( const YYLTYPE& loc, cbl_label_type_t type,
const char name[], const char os_name[],
bool common, bool initial )
const cbl_label_t *L;
if( (L = symbol_program_add(parent, &label)) == NULL ) return false;
prog_descr_t program(symbol_index(symbol_elem_of(L)));
-#if 1 //EBCDIC // enable when ready
- auto alpha_encoding =
- programs.empty()? default_encoding : programs.top().alphabet.alpha;
- if( alpha_encoding.encoding == EBCDIC_e ) {
- dbgmsg("%s:%d: We're in EBCDIC", __func__, __LINE__);
- }
- program.alphabet.alpha = alpha_encoding;
-#endif
+ auto encoding = current_encoding('A');
+ if( encoding == EBCDIC_e ) {
+ dbgmsg("%s:%d: We're in EBCDIC", __func__, __LINE__);
+ }
+ program.alphabet.alpha = encoding;
+ program.alphabet.national = current_encoding('N');
+
+ if( ! program.alphabet.sizes_ok() ) {
+ error_msg(loc, "Alphanumeric encoding %qs "
+ "cannot be wider than National encoding %qs",
+ cbl_encoding_str(encoding),
+ cbl_encoding_str(program.alphabet.national.encoding));
+ }
+
programs.push( program );
programs.apply_pending();
// assembly language.
static int eval_count = 1;
char eval[32], lave[32];
-
+
sprintf(eval, "_DECLARATIVES_EVAL%d", eval_count);
sprintf(lave, "_DECLARATIVES_LAVE%d", eval_count++);
ast_enter_section(eval_label);
declarative_runtime_match(declaratives.as_list(), lave_label);
-
+
parser_label_label(lave_label);
-
+
return lave_label;
}
std::swap( programs.top().section, section );
return section;
}
-
+
ec_type_t ec_type_of( file_status_t status ) {
static std::vector<ec_type_t> ec_by_status {
/* 0 */ ec_none_e, // ec_io_warning_e if low byte is nonzero
- /* 1 */ ec_io_at_end_e,
+ /* 1 */ ec_io_at_end_e,
/* 2 */ ec_io_invalid_key_e,
/* 3 */ ec_io_permanent_error_e,
/* 4 */ ec_io_logic_error_e,
* To indicate to the runtime-match function that we want to evaluate
* only the exception condition, unrelated to a file, we set the
* file register to 0 and the handled-exception register to the
- * handled exception condition.
+ * handled exception condition.
*
* declaratives_execute performs the "declarative ladder" produced
* by declaratives_runtime_match. That section CALLs the
cbl_label_t * compute_label() { return error_labels.compute_error; }
} current;
-prog_descr_t::encoding_t::encoding_base_t current_t::default_encoding;
-
void current_enabled_ecs( tree ena ) {
current.declaratives.runtime.ena = ena;
}
cbl_encoding_t
current_encoding( char a_or_n ) {
- cbl_encoding_t retval;
+ cbl_encoding_t encoding;
switch(a_or_n) {
case 'A':
- retval = current.alpha_encoding();
+ encoding = cbl_field_t::codeset_t::default_encodings.alpha.type;
+ if( current.program() )
+ encoding = current.alpha_encoding();
break;
case 'N':
- retval = current.national_encoding();
+ encoding = cbl_field_t::codeset_t::default_encodings.national.type;
+ if( current.program() )
+ encoding = current.national_encoding();
break;
default:
gcc_unreachable();
break;
}
- return retval;
+ return encoding;
}
size_t
static inline cbl_field_t *
new_tempnumeric(const cbl_name_t name = nullptr, cbl_field_attr_t attr = signable_e ) {
- return new_temporary(FldNumericBin5, name, attr == signable_e);
+ return new_temporary(FldNumericBin5, name, attr);
}
static inline cbl_field_t *
new_tempnumeric(const cbl_field_attr_t attr ) {
- return new_temporary(FldNumericBin5, nullptr, attr == signable_e);
+ return new_temporary(FldNumericBin5, nullptr, attr);
}
static inline cbl_field_t *
static bool
is_integer_literal( const cbl_field_t *field ) {
if( field->type == FldLiteralN ) {
- size_t nchar;
- const char *initial = __gg__iconverter(field->codeset.encoding,
- DEFAULT_SOURCE_ENCODING,
- field->data.initial,
- strlen(field->data.initial),
- &nchar);
- assert(strlen(initial) == nchar);
+ const char *initial = field->data.original();
switch( *initial ) {
case '-': case '+': ++initial;
}
const char *eos = initial + strlen(initial);
auto p = std::find_if_not( initial, eos, fisdigit );
if( p == eos ) return true;
-
+
if( *p++ == symbol_decimal_point() ) {
switch( *p++ ) {
case 'E': case 'e':
*/
static inline cbl_field_t *
-new_literal( const char initial[] ) {
+new_constant( const char initial[] ) {
return new_literal( strlen(initial), initial );
}
+static inline cbl_field_t *
+new_literal( const cbl_loc_t loc, const char initial[] ) {
+ auto field = new_constant(initial);
+ symbol_temporary_location(field, loc);
+ return field;
+}
cbl_refer_t *
negate( cbl_refer_t * refer, bool neg = true ) {
cbl_field_t *
conditional_set( cbl_field_t *tgt, bool tf ) {
- static cbl_field_t *one = new_literal("1");
+ static cbl_field_t *one = new_constant("1");
enum relop_t op = tf? eq_op : ne_op;
parser_relop( tgt, one, op, one );
}
static inline cbl_field_t *
-field_find( const std::list<const char *>& names ) {
+field_find( cbl_loc_t loc, const std::list<const char *>& names ) {
if( names.size() == 1 ) {
auto value = cdf_value(names.front());
if( value ) {
field = new_tempnumeric();
parser_set_numeric(field, value->as_number());
} else {
- field = new_literal(value->string);
+ field = new_literal(loc, value->string);
}
return field;
}
dbgmsg( "size error redef: %s", field_str(field) );
error_msg(loc, "%s (%s size %u) larger than REDEFINES %s (%s size %u)",
field->name,
- 3 + cbl_field_type_str(field->type), field->size(),
+ 3 + cbl_field_type_str(field->type),
+ field->size()/field->codeset.stride(),
orig->name,
- 3 + cbl_field_type_str(orig->type), orig->size() );
+ 3 + cbl_field_type_str(orig->type),
+ orig->size()/field->codeset.stride() );
}
}
}
return true;
}
+#if 0
static void
field_value_all(struct cbl_field_t * field ) {
// Expand initial by repeating its contents until it is of length capacity:
assert(field->data.initial != NULL);
size_t initial_length = strlen(field->data.initial);
- char *new_initial = static_cast<char*>(xmalloc(field->data.capacity + 1));
+ char *new_initial =
+ static_cast<char*>(xmalloc(field->data.capacity()/
+ field->codeset.stride() + 1));
size_t i = 0;
- while(i < field->data.capacity) {
+
+ while(i < field->data.capacity()/field->codeset.stride()) {
new_initial[i] = field->data.initial[i%initial_length];
i += 1;
}
- new_initial[field->data.capacity] = '\0';
+ new_initial[field->data.capacity()/field->codeset.stride()] = '\0';
free(const_cast<char *>(field->data.initial));
field->data.initial = new_initial;
}
+#endif
+
+static cbl_field_t *
+parent_has_picture( cbl_field_t *field ) {
+ while( (field = parent_of(field)) != NULL ) {
+ if( symbol_redefines(field) ) return nullptr;
+ if( field->data.initial ) break; // initial create by PICTURE clause, usually
+ }
+ return field;
+}
static cbl_field_t *
parent_has_value( cbl_field_t *field ) {
while( (field = parent_of(field)) != NULL ) {
- if( field->data.initial ) break;
+ if( symbol_redefines(field) ) return nullptr;
+ if( field->data.original() ) break;
}
return field;
}
uint32_t parent_capacity = 0;
if( field->parent ) {
auto e = symbol_at(field->parent);
- if( e->type == SymField ) parent_capacity = cbl_field_of(e)->data.capacity;
- }
- /*
- * Field may become a table whose capacity was inherited from a parent with
- * data. If so, the field's capacity will be overwritten by its
- * PICTURE-defined size.
- */
- if( parent_capacity < field->data.capacity && !symbol_redefines(field) ) {
- dbgmsg( "%s: %s", __func__, field_str(field) );
- error_msg(loc, "%s has USAGE incompatible with PICTURE",
- field->name );
- return true;
+ if( e->type == SymField ) parent_capacity = cbl_field_of(e)->data.capacity();
+ /*
+ * Field may become a table whose capacity was inherited from a parent with
+ * data. If so, the field's capacity will be overwritten by its
+ * PICTURE-defined size.
+ */
+ if( parent_capacity < field->data.capacity() && !symbol_redefines(field) ) {
+ dbgmsg( "%s: %s", __func__, field_str(field) );
+ error_msg(loc, "%s has USAGE incompatible with PICTURE",
+ field->name );
+ return true;
+ }
}
return false;
}
#define ERROR_IF_CAPACITY(L, F) \
do { if( field_capacity_error(L, F) ) YYERROR; } while(0)
-static const char *
-blank_pad_initial( const char initial[], size_t capacity, size_t new_size ) {
- assert(capacity < new_size);
- assert(initial != NULL);
-
- if( normal_value_e != cbl_figconst_of(initial) ) return initial;
+template <typename T>
+static void
+blankit( T* beg, size_t n, T ch ) {
+ std::fill(beg, beg + n, ch);
+}
- auto p = reinterpret_cast<char *>( xmalloc(2 + new_size) );
- memset(p, 0x20, new_size);
- memcpy(p, initial, capacity);
- p[new_size] = '\0'; // for debugging
- p[++new_size] = '\0'; // for debugging
- return p;
+/*
+ * Normally blank_initial takes just a length argument and initializes
+ * data.initial to all blanks according to the field's encoding. Optionally it
+ * applies a figurative constant and uses that instead.
+ */
+void
+cbl_field_t::blank_initial( size_t nchar, cbl_figconst_t figconst ) {
+ charmap_t *charmap = __gg__get_charmap(codeset.encoding);
+ cbl_char_t space_char = figconst == normal_value_e?
+ charmap->mapped_character(ascii_space)
+ : charmap->figconst_character(figconst);
+
+ size_t nbyte = nchar * codeset.stride();
+ char *init = static_cast<char *>(xmalloc(nbyte+4));
+ char *enit = init + nbyte;
+ std::fill(enit, enit + 4, '\0'); // append for NULs
+
+ switch(codeset.stride()) {
+ case 1:
+ blankit( reinterpret_cast<uint8_t*>(init), nchar, uint8_t(space_char) );
+ break;
+ case 2:
+ blankit( reinterpret_cast<uint16_t*>(init), nchar, uint16_t(space_char) );
+ break;
+ case 4:
+ blankit( reinterpret_cast<uint32_t*>(init), nchar, uint32_t(space_char) );
+ break;
+ default:
+ gcc_unreachable();
+ }
+ data.initial = init;
}
/*
- * When cbl_field_t::internalize is called, its data.initial value has been
- * set, but nothing has been done to it. It is encoded according to the source
- * code. internalize() converts data.initial to the field's encoding.
- *
- * If syntax used was was PIC VALUE, in that order, then PIC set the field's
- * encoding, and the VALUE clause can verify that its encoding matches. If the
- * order was VALUE PIC, the value leaves the encoding uninitialized unless the
- * value string bore an encoding prefix. When PIC is processed, codeset_t::set
- * allows it to set the encoding only if it's either uninitialized, or the PIC
- * encoding matches the existing one set by VALUE. In no event does one
- * override the other; they must agree.
- *
- * internalize() fails if data.initial cannot be converted to the field's
- * encoding.
+ * When called, data.nbyte, if nonzero, holds the length of data.orig.data.
+ * Set data.capacity to its correct size, and create data.initial as all
+ * blanks, based on that size. Then encode the original string into
+ * data.initial, preserving any trailing blanks.
*/
-static void
-value_encoding_check( const YYLTYPE& loc, cbl_field_t *field ) {
- if( ! field->internalize() ) {
- error_msg(loc, "inconsistent string literal encoding for '%s'",
- field->data.initial);
+void
+cbl_field_t::set_initial( size_t nchar, const cbl_loc_t& loc ) {
+ auto srclen = data.capacity();
+ set_capacity(nchar);
+ blank_initial( char_capacity() );
+ if( data.original() ) {
+ attr |= cbl_figconst_of(data.original());
+ if( has_attr(hex_encoded_e) ) {
+ // If initial value is too long, the caller should report it.
+ auto len = std::min(srclen, data.capacity());
+ std::copy(data.original(), data.original() + len,
+ const_cast<char*>(data.initial));
+ } else {
+ if( 0 < data.capacity() ) {
+ encode(srclen, loc);
+ }
+ }
}
}
+/*
+ * When called without a length, set_initial determines the character count
+ * from the current size, established by the size of the VALUE string or
+ * literal.
+ */
+void
+cbl_field_t::set_initial( const cbl_loc_t& loc ) {
+ set_initial( data.capacity(), loc );
+}
+
#pragma GCC diagnostic push
#pragma GCC diagnostic ignored "-Wmissing-field-initializers"
static const uint32_t level = 0;
cbl_field_t *f, field = { type, 0, cbl_field_data_t(), level, name, yylineno };
field.parent = parent;
-
+
f = field_add(loc, &field);
assert(f);
return f;
if( is_index ) {
if( tgt.field->type != FldIndex && src.field->type != FldIndex) {
- error_msg(src.loc, "invalid SET %s (%s) TO %s (%s): not a field index",
- tgt.field->name, cbl_field_type_str(tgt.field->type),
- src.field->name, cbl_field_type_str(src.field->type));
+ error_msg(src.loc, "invalid SET %qs (%s) TO %qs (%s): not a field index",
+ name_of(tgt.field), 3 + cbl_field_type_str(tgt.field->type),
+ name_of(src.field), 3 + cbl_field_type_str(src.field->type));
delete tgt_list;
return false;
}
if( src.field->type == FldPointer &&
tgt.field->type == FldPointer ) {
dialect_ok(src.loc, MfMovePointer, "MOVE POINTER");
- }
- if( ! is_index ) {
- char ach[16];
- char stype[32];
- char dtype[32];
- strcpy(stype, cbl_field_type_str(src.field->type));
- strcpy(dtype, cbl_field_type_str(tgt.field->type));
-
- if( src.field->attr & all_alpha_e )
- {
- strcpy(stype, "FldAlphabetic");
- }
- if( tgt.field->attr & all_alpha_e )
- {
- strcpy(dtype, "FldAlphabetic");
- }
- if( !(src.field->attr & scaled_e) && src.field->data.rdigits )
- {
- sprintf(ach, ".%d", src.field->data.rdigits);
- strcat(stype, ach);
- }
- if( !(tgt.field->attr & scaled_e) && tgt.field->data.rdigits )
- {
- sprintf(ach, ".%d", tgt.field->data.rdigits);
- strcat(dtype, ach);
- }
- error_msg(src.loc, "cannot MOVE '%s' (%s) to '%s' (%s)",
- name_of(src.field), stype,
- name_of(tgt.field), dtype);
- delete tgt_list;
- return false;
+ } else {
+ error_msg(src.loc, "cannot MOVE %qs (%s) TO %qs (%s)",
+ nice_name_of(src.field), 3 + cbl_field_type_str(src.field->type),
+ nice_name_of(tgt.field), 3 + cbl_field_type_str(tgt.field->type));
}
}
}
if( (nsymbol = symbols_update(nsymbol, nparse_error == 0)) > 0 ) {
if( ! literally_one ) {
// Use strdup so cbl_field_t::internalize can free them if need be.
- literally_one = new_literal(xstrdup("1"));
- literally_zero = new_literal(xstrdup("0"));
+ literally_one = new_constant(xstrdup("1"));
+ literally_zero = new_constant(xstrdup("0"));
}
}
break;
}
// cppcheck-suppress [unreadVariable] obviously not true
- tree = parent_of(tree);
+ tree = parent_of(tree);
}
return retval;
}
apply_cdf_turn(exception_turn);
}
exception_turns.clear();
-
+
// Start the Procedure Division.
size_t narg = ffi_args? ffi_args->elems.size() : 0;
std::vector <cbl_ffi_arg_t> args(narg);
auto file = cbl_file_of(symbol_at(file_section_fd));
auto record_area = cbl_field_of(symbol_at(file->default_record));
- record_area->data.capacity = std::max(record_area->data.capacity,
- field->data.capacity);
+ record_area->data.capacity( std::max(record_area->data.capacity(),
+ field->data.capacity()) );
field->file = file_section_fd;
const auto redefined = symbol_redefines(record_area);
void internal_ebcdic_lock();
void internal_ebcdic_unlock();
+static cbl_field_type_t
+field_binary_usage( YYLTYPE loc, cbl_field_t *field,
+ cbl_field_type_t type, uint32_t capacity,
+ bool signable );
+
void
ast_end_program(const char name[] ) {
std::for_each( symbols_begin(), symbols_end(),
return true;
}
-// false after USE statement, to enter Declarative with EC intact.
+// false after USE statement, to enter Declarative with EC intact.
static bool statement_cleanup = true;
static YYLTYPE current_location;
DOTSEP [.]+[[:space:]]
DOTEOL [[:blank:]]*[.]{BLANK_EOL}
+ /*
+ * "If the symbol ',' or the symbol '.' is the last symbol of
+ * character-string-1, the PICTURE clause shall be the last clause
+ * of the data description entry and shall be followed immediately
+ * (without an intervening separator space) by the separator period."
+ */
+NONPIC [,]{DOTSEP}
+
SKIP [[:blank:]]*SKIP[123][[:blank:]]*[.]?{BLANK_EOL}
TITLE [[:blank:]]*TITLE($|[.]|[^\n]*)
BELL { return BELL; }
BINARY-ENCODING { return BINARY_ENCODING; }
BLINK { return BLINK; }
+BYTE { return BYTE; }
CAPACITY { return CAPACITY; }
CENTER {
INDEX { return INDEX; }
MESSAGE-TAG { not_implemented("USAGE type: MESSAGE-TAG"); }
- NATIONAL { not_implemented("USAGE type: NATIONAL");
- return NATIONAL; }
+ NATIONAL { return NATIONAL; }
OBJECT{SPC}REFERENCE { not_implemented("USAGE type: OBJECT REFERENCE"); }
PACKED-DECIMAL { return PACKED_DECIMAL; }
LEADING { return LEADING; }
LEFT { return LEFT; }
MODE { return MODE; }
+ NO { return NO; }
OCCURS/{SPC}{NAME} { return OCCURS; }
OCCURS { yy_push_state(integer_count); return OCCURS; }
OF { return OF; }
VARYING { return VARYING; }
VOLATILE { return VOLATILE; }
WHEN { return WHEN; }
+ WITH { return WITH; }
COPY {
yy_push_state(copy_state);
yylval.string = xstrdup(yytext); return picset(ALNUM); }
{ALNUM}/[(]{NAME}[)] { yy_push_state(picture_count);
yylval.string = xstrdup(yytext); return picset(ALNUM); }
- {ALNUM} { yylval.string = xstrdup(yytext); return picset(ALNUM); }
+ {ALNUM}/{NONPIC}? { yylval.string = xstrdup(yytext); return picset(ALNUM); }
{ALPHED} { yylval.string = xstrdup(yytext); return picset(ALPHED); }
{NUMEDITED} { yylval.string = xstrdup(yytext); return picset(NUMED); }
BIT-OF/{NONWORD} { pop_return BIT_OF; }
BIT-TO-CHAR/{NONWORD} { pop_return BIT_TO_CHAR; }
BOOLEAN-OF-INTEGER/{NONWORD} { pop_return BOOLEAN_OF_INTEGER; }
+ BYTE/{NONWORD} { pop_return BYTE; }
BYTE-LENGTH/{NONWORD} { pop_return BYTE_LENGTH; }
CHAR-NATIONAL/{NONWORD} { pop_return CHAR_NATIONAL; }
CHAR/{NONWORD} { pop_return CHAR; }
static int
numstr_of( const char string[], radix_t radix = decimal_e ) {
+ yylval.numstr.is_float = false;
yylval.numstr.radix = radix;
ydflval.string = yylval.numstr.string = xstrdup(string);
char *comma = strchr(yylval.numstr.string, ',');
return NO_CONDITION;
}
}
+ yylval.numstr.is_float = true;
}
if( 1 < std::count(input, eoinput, symbol_decimal_point()) ) {
error_msg(yylloc, "invalid numeric literal %qs", ++p);
{
auto f = cbl_field_of(e);
if( is_constant(f) ) {
- if( f->data.initial ) {
- int token = cbl_figconst_tok(f->data.initial);
+ if( f->data.original() ) {
+ int token = cbl_figconst_tok(f->data.original());
if( token ) return token;
}
- int token = datetime_format_of(f->data.initial);
+ int token = datetime_format_of(f->data.original());
if( token ) {
- yylval.string = xstrdup(f->data.initial);
+ yylval.string = xstrdup(f->data.original());
return token;
}
}
if( type == FldLiteralN ) {
yylval.numstr.radix =
f->has_attr(hex_encoded_e)? hexadecimal_e : decimal_e;
- yylval.numstr.string = xstrdup(f->data.initial);
+ yylval.numstr.string = xstrdup(f->data.original());
return NUMSTR;
}
if( !f->has_attr(record_key_e) ) { // not a key-name literal
fprintf(stderr, "%s", (b)->name); \
if( (b)->type == FldLiteralA || (b)->type == FldLiteralN ) \
{ \
- fprintf(stderr, " \"%s\"", (b)->data.initial); \
+ fprintf(stderr, " \"%s\"", (b)->data.original()); \
} \
else \
{ \
size_t nbytes; \
const char *literal = __gg__iconverter((b).field->codeset.encoding, \
DEFAULT_SOURCE_ENCODING, \
- (b).field->data.initial, \
- strlen((b).field->data.initial), \
+ (b).field->data.original(), \
+ strlen((b).field->data.original()), \
&nbytes); \
fprintf(stderr, " \"%s\"", literal); \
} \
else if( b->type == FldLiteralN ) \
{ \
gg_fprintf(trace_handle, 1, " attr 0x%lx", build_int_cst_type(SIZE_T, b->attr)); \
- gg_fprintf(trace_handle, 1, " c:o:d:r %ld", build_int_cst_type(SIZE_T, b->data.capacity)); \
+ gg_fprintf(trace_handle, 1, " c:o:d:r %ld", build_int_cst_type(SIZE_T, b->data.capacity())); \
gg_fprintf(trace_handle, 1, ":%ld", build_int_cst_type(SIZE_T, b->offset)); \
gg_fprintf(trace_handle, 1, ":%d", build_int_cst_type(INT, b->data.digits)); \
gg_fprintf(trace_handle, 1, ":%d", build_int_cst_type(INT, b->data.rdigits)); \
else if( (b).field->type == FldLiteralN ) \
{ \
gg_fprintf(trace_handle, 1, " attr 0x%lx", build_int_cst_type(SIZE_T, (b).field->attr)); \
- gg_fprintf(trace_handle, 1, " c:o:d:r %ld", build_int_cst_type(SIZE_T, (b).field->data.capacity)); \
+ gg_fprintf(trace_handle, 1, " c:o:d:r %ld", build_int_cst_type(SIZE_T, (b).field->data.capacity())); \
gg_fprintf(trace_handle, 1, ":%ld", build_int_cst_type(SIZE_T, (b).field->offset)); \
gg_fprintf(trace_handle, 1, ":%d", build_int_cst_type(INT, (b).field->data.digits)); \
gg_fprintf(trace_handle, 1, ":%d)", build_int_cst_type(INT, (b).field->data.rdigits)); \
// When doing FILE I/O, you need the cblc_file_t structure
/*
-typedef struct cblc_file_t
+typedef struct cblc_file_t*
{
char *name; // This is the name of the structure; might be the name of an environment variable
- size_t symbol_index; // The symbol table index of the related cbl_file_t structure
+ uint64_t symbol_index; // The symbol table index of the related cbl_file_t structure
char *filename; // The name of the file to be opened
FILE *file_pointer; // The FILE *pointer
cblc_field_t *default_record; // The record_area
- size_t record_area_min; // The size of the smallest 01 record in the FD
- size_t record_area_max; // The size of the largest 01 record in the FD
+ size_t record_area_min; // The size of the smallest 01 record in the FD, in characters
+ size_t record_area_max; // The size of the largest 01 record in the FD, in characters
cblc_field_t **keys; // For relative and indexed files. The first is the primary key. Null-terminated.
int *key_numbers; // One per key -- each key has a number. This table is key_number + 1
int *uniques; // One per key
int errnum; // most recent errno; can't reuse "errno" as the name
file_status_t io_status; // See 2014 standard, section 9.1.12
int padding; // Actually a char
- int delimiter; // ends a record; defaults to '\n'.
+ cbl_char_t delimiter; // ends a record; defaults to '\n'.
+ int stride(); // width of a character
int flags; // cblc_file_flags_t
int recent_char; // This is the most recent char sent to the file
int recent_key;
retval = gg_get_filelevel_struct_type_decl( "cblc_file_t",
33,
CHAR_P, "name",
- SIZE_T, "symbol_table_index",
+ ULONGLONG, "symbol_table_index",
CHAR_P, "filename",
FILE_P, "file_pointer",
cblc_field_p_type_node, "default_record",
INT, "errnum",
INT, "io_status",
INT, "padding",
- INT, "delimiter",
+ UINT, "delimiter",
+ INT, "stride",
INT, "flags",
- INT, "recent_char",
+ UINT, "recent_char",
INT, "recent_key",
INT, "prior_op",
INT, "encoding", // Actually cbl_encoding_t
if( 0 != ftruncate(symbols.fd, len) ) {
cbl_err( "%s:%d: could not extend symbol table to %lu elements",
- __func__, __LINE__, gb4(symbols.capacity));
+ __func__, __LINE__, gb4(symbols.capacity));
}
/*
error_msg(loc, __VA_ARGS__); \
} while(0)
+#define WARNING_FIELD(F, ...) \
+ do{ \
+ auto loc = symbol_field_location(field_index(F)); \
+ warn_msg(loc, __VA_ARGS__); \
+ } while(0)
static const struct cbl_occurs_t nonarray = cbl_occurs_t();
public:
group_size_t() : size(0) {}
group_size_t& operator+( const cbl_field_t& field ) {
- size += field.data.capacity;
+ size += field.data.capacity();
return *this;
}
size_t capacity() const { return size; }
};
-enum { constq = constant_e | quoted_e };
+#define constq (constant_e | quoted_e)
static symbol_elem_t
elementize( const cbl_field_t& field ) {
symbol_elem_t sym (SymField);
sym.elem.field = field;
- // Dubner did the following because he didn't feel like creating yet another
- // cbl_field_t constructor that included the hardcoded encoding for the
- // global special registers.
- sym.elem.field.codeset.set();
return sym;
}
static uint32_t
field_size( const struct cbl_field_t *field ) {
size_t n = field->occurs.ntimes();
- return field->data.capacity * (n > 0? n : 1);
+ return field->data.capacity() * (n > 0? n : 1);
}
const char *
uint64_t
cbl_field_t::set_attr( cbl_field_attr_t attr ) {
- if( attr == signable_e ) {
- if( ! has_attr(attr) && this->var_decl_node != NULL ) {
- parser_field_attr_set(this, attr);
- }
- }
return this->attr |= uint64_t(attr);
}
uint64_t
cbl_field_t::clear_attr( cbl_field_attr_t attr ) {
- if( attr == signable_e ) {
- if( this->var_decl_node != nullptr && has_attr(attr) ) {
- parser_field_attr_set(this, attr, false);
- }
- }
return this->attr &= ~uint64_t(attr);
}
static uint32_t
field_memsize( const struct cbl_field_t *field ) {
uint32_t n = field->occurs.ntimes();
- n = field->data.capacity * (n > 0? n : 1);
+ n = field->data.capacity() * (n > 0? n : 1);
return std::max(n, field->data.memsize);
}
static inline bool
is_index( const cbl_field_type_t type ) { return type == FldIndex; }
+static inline const char *
+cbl_encoding_str( cbl_encoding_t encoding ) {
+ return __gg__encoding_iconv_name(encoding);
+}
+
static size_t
symbols_dump( size_t first, bool header ) {
size_t ninvalid = 0;
}
for( struct symbol_elem_t *e = symbols_begin(first); e < symbols_end(); e++ ) {
- char *s;
+ char *s = nullptr;
switch(e->type) {
case SymFilename:
free(part);
}
break;
- default:
+ case SymLocale:
+ s = xasprintf("%4" GCC_PRISZ "u %-18s %s %s collation %s", (fmt_size_t)e->program,
+ "Locale",
+ e->elem.locale.name,
+ cbl_encoding_str(e->elem.locale.encoding),
+ e->elem.locale.collation );
+ break;
+ }
+ if( ! s ) {
dbgmsg("%s: cannot dump symbol type %d", __func__, e->type);
continue;
}
if( is_elementary(group->type) ) { // "group" is in fact just a field
if( is_record_area(group) ) {
- if( group->data.capacity == 0 ) {
+ if( group->data.capacity() == 0 ) {
const auto& file = *cbl_file_of(symbol_at(group->file));
- group->data.capacity = file.varying_size.max;
+ group->data.capacity( file.varying_size.max );
}
// Find 01s for the file that is not a record area field.
assert(record->level == 1);
e = calculate_capacity(p);
auto record_size = std::max(record->data.memsize,
- record->data.capacity);
- group->data.capacity = std::max(group->data.capacity, record_size);
+ record->data.capacity());
+ group->data.capacity( std::max(group->data.capacity(), record_size) );
}
}
// Reach back to that symbol to set its capacity, if need be.
auto area = symbol_redefines(group);
if( area ) {
- area->data.capacity = std::max(area->data.capacity,
- group->data.capacity);
+ area->data.capacity( std::max(area->data.capacity(),
+ group->data.capacity()) );
}
return e; // no 01, return self
if( redefined ) {
redefined->data.memsize = std::max(field_memsize(redefined), field_size(group));
- if( redefined->data.memsize == redefined->data.capacity ) {
+ if( redefined->data.memsize == redefined->data.capacity() ) {
redefined->data.memsize = 0;
}
}
assert(group->type == FldGroup);
- group->data.capacity = 0;
+ group->data.capacity(0);
std::list<cbl_field_t*> members;
field->data.memsize = 0;
- if( redefined->data.memsize == redefined->data.capacity ) {
+ if( redefined->data.memsize == redefined->data.capacity() ) {
redefined->data.memsize = 0;
}
continue;
}
- group->data.capacity += field_size(field);
+ group->data.add_capacity( field_size(field) );
group->data.memsize += field_memsize(field);
// If group has a parent that is a record area, expand it, too.
if( 0 < group->parent ) {
redefined = symbol_redefines(group);
if( redefined && is_record_area(redefined) ) {
- if( redefined->data.capacity < group->data.memsize ) {
- redefined->data.capacity = group->data.memsize;
+ if( redefined->data.capacity() < group->data.memsize ) {
+ redefined->data.capacity( group->data.memsize );
}
}
}
}
group->data.memsize = std::max(max_memsize, group->data.memsize);
- if( group->data.memsize == group->data.capacity ) group->data.memsize = 0;
+ if( group->data.memsize == group->data.capacity() ) group->data.memsize = 0;
- if( 0 < group->data.memsize && group->data.memsize < group->data.capacity ) {
+ if( 0 < group->data.memsize && group->data.memsize < group->data.capacity() ) {
if( yydebug ) {
dbgmsg( "%s:%d: small capacity?\n\t%s", __func__, __LINE__, field_str(group) );
}
- group->data.memsize = group->data.capacity;
+ group->data.memsize = group->data.capacity();
}
- if( group->data.capacity == 0 ) {
+ if( group->data.capacity() == 0 ) {
dbgmsg( "%s:%d: zero capacity?\n\t%s", __func__, __LINE__, field_str(group) );
}
// RENAMES may be included in end_of_group.
size_t isym = field_index(field), esym = end_of_group(isym);
bool odo = std::any_of( symbol_at(isym) + 1, symbol_at_impl(esym),
- [field]( const auto& elem ) {
- if( elem.type == SymField ) {
- auto f = cbl_field_of(&elem);
- if( field->level < f->level ) { // exclude RENAMES
- return 0 < f->occurs.depending_on;
- }
- }
- return false;
- } );
+ [field]( const auto& elem ) {
+ if( elem.type == SymField ) {
+ auto f = cbl_field_of(&elem);
+ if( field->level < f->level ) { // exclude RENAMES
+ return 0 < f->occurs.depending_on;
+ }
+ }
+ return false;
+ } );
return odo;
}
if( 'r' == parredef && field->level == 0 ) parredef = 'p';
if( field->has_attr(typedef_e) ) parredef = 'T';
- const char *init = field->data.initial? field->data.initial : NULL;
+ const char *init = field->data.original();
if( init ) {
auto fig = cbl_figconst_of(init);
if( normal_value_e != fig ) {
init = cbl_figconst_str(fig);
} else {
-#if 0
- // At this point, we might have to convert 'init' back to ASCII
- char *false_init = static_cast<char *>(xmalloc(field->init.capacity+1));
- memcpy(false_init, field->init.initial, field->data.capacity);
- false_data[field->data.capacity] = '\0';
- size_t charsout;
-
- cbl_encoding_t enc_from = field->codeset.encoding;
- if( field->type == FldNumericDisplay )
- {
- // Apparently we need to trace back the meaning of data.literal for
- // field::type == FldNumericDisplay
- enc_from = DEFAULT_SOURCE_ENCODING;
- }
-
- init = __gg__iconverter(enc_from,
- DEFAULT_SOURCE_ENCODING,
- false_data,
- field->data.capacity,
- &charsout);
-#endif
- auto eoinit = init + strlen(init);
char *s = xasprintf("'%s'", init);
// No NUL within the initial data.
+ auto eoinit = init + strlen(init);
auto ok = std::none_of( init, eoinit,
[]( char ch ) { return ch == '\0'; } );
assert(ok);
// If any of the init are unprintable, provide a hex version.
if( ! std::all_of(init, eoinit, fisprint) ) {
if( is_elementary(field->type) && field->type != FldPointer ) {
- const size_t len = strlen(s) + 8 + 2 * field->data.capacity;
+ const size_t len = strlen(s) + 8 + 2 * strlen(init);
s = reinterpret_cast<char*>(xrealloc(s, len));
strcat( s, " (0x" );
char *p = s + strlen(s);
(field->attr & external_e)? 'E' : 0x20,
storage_type,
field->data.memsize,
- field->data.capacity, field->data.digits, field->data.rdigits,
+ field->data.capacity(), field->data.digits, field->data.rdigits,
init, field->attr_str(attrs), field->line );
return string;
}
} else {
++e2;
}
- alias->data.capacity = std::for_each(e, e2, cap).capacity;
- assert(alias->data.capacity > 0);
+ cap = std::for_each(e, e2, cap);
+ alias->data.capacity( cap.capacity );
+ assert(alias->data.capacity() > 0);
}
bool
static std::ostream&
operator<<( std::ostream& os, const cbl_field_data_t& field ) {
return os << field.memsize << ','
- << field.capacity << ','
+ << field.capacity() << ','
<< field.digits << ','
<< field.rdigits << ','
<< (field.picture? field.picture : "");
field->line, field->level_str(), field->name);
} else {
- dbgmsg("%s: error: data item %s #" HOST_SIZE_T_PRINT_UNSIGNED
+ cbl_internal_error("%s: data item %s #" HOST_SIZE_T_PRINT_UNSIGNED
" '%s' capacity %u rejected",
__func__,
3 + cbl_field_type_str(field->type),
- (fmt_size_t)isym, field->name, field->data.capacity);
- gcc_unreachable();
+ (fmt_size_t)isym, field->name, field->data.capacity());
}
}
return 0;
// A shared record area has no 01 child because that child redefines its parent.
for( auto sharer : shared_record_areas ) {
auto redefined = cbl_field_of(symbol_at(sharer->parent));
- sharer->data.capacity = redefined->data.capacity;
+ sharer->data.capacity( redefined->data.capacity() );
}
for( p = symbols_begin(first); p < symbols_end(); p++ ) {
if( field->level != 0 && field->has_attr(constant_e) ) {
auto fig = cbl_figconst_field_of(field->data.initial);
if( fig ) {
- field->var_decl_node = fig->var_decl_node;
- continue;
+ field->var_decl_node = fig->var_decl_node;
+ continue;
}
}
continue;
}
if( is_numeric(field) && ! field->has_attr(constant_e) ) {
- if( field->data.capacity == 0 ) {
+ if( field->data.capacity() == 0 ) {
ERROR_FIELD(field, "numeric %qs has USAGE that requires PICTURE %s",
field->name, field->data.initial);
}
gcc_unreachable();
case FldAlphaEdited:
case FldAlphanumeric:
- case FldClass:
case FldDisplay:
case FldGroup:
case FldLiteralA:
"internal: %qs encoding not defined", field->name);
}
break;
+ case FldClass:
case FldConditional:
case FldFloat:
case FldIndex:
if( ! prior->codeset.set() ) { // needs attention
dbgmsg("'%s' is already National", prior->name);
}
- field->attr |= numeric_group_attrs(prior);
+ field->attr |= numeric_group_attrs(field);
}
// verify level 88 domain value
if( is_numeric(prior) && field->level == 88 ) {
*/
void
symbol_table_init(void) {
+ const static cbl_field_t::codeset_t cp1252(CP1252_e);
assert(symbols.fd == -1);
assert(symbols.nelem == 0);
// These should match the definitions in libgcobol/constants.cc
static cbl_field_t constants[] = {
- { FldAlphanumeric, space_value_e | int(constq) | register_e,
- {1,1,0,0, " \0\xFF"}, 0, "SPACE" },
- { FldAlphanumeric, space_value_e | int(constq) | register_e,
- {1,1,0,0, " \0\xFF"}, 0, "SPACES" },
- { FldAlphanumeric, low_value_e | int(constq) | register_e,
- {1,1,0,0, "L\0\xFF"}, 0, "LOW_VALUES" },
- { FldAlphanumeric, zero_value_e | int(constq) | register_e,
- {1,1,0,0, "0"}, 0, "ZEROS" },
- { FldAlphanumeric, high_value_e | int(constq) | register_e,
- {1,1,0,0, "H\0\xFF"}, 0, "HIGH_VALUES" },
+ { FldAlphanumeric, SPACE_VALUE_E | constq | register_e,
+ {1,1,0,0, " \0\xFF"}, 0, "SPACE", cp1252 },
+ { FldAlphanumeric, SPACE_VALUE_E | constq | register_e,
+ {1,1,0,0, " \0\xFF"}, 0, "SPACES", cp1252 },
+ { FldAlphanumeric, LOW_VALUE_E | constq | register_e,
+ {1,1,0,0, "L\0\xFF"}, 0, "LOW_VALUES", cp1252 },
+ { FldAlphanumeric, ZERO_VALUE_E | constq | register_e,
+ {1,1,0,0, "0"}, 0, "ZEROS", cp1252 },
+ { FldAlphanumeric, HIGH_VALUE_E | constq | register_e,
+ {1,1,0,0, "H\0\xFF"}, 0, "HIGH_VALUES", cp1252 },
// IBM standard: QUOTE is a double-quote unless APOST compiler option
- { FldAlphanumeric, quote_value_e | int(constq) | register_e ,
- {1,1,0,0, "\"\0\xFF"}, 0, "QUOTES" },
- { FldPointer, int(constq) | register_e ,
- {8,8,0,0, zeroes_for_null_pointer}, 0, "NULLS" },
+ { FldAlphanumeric, QUOTE_VALUE_E | constq | register_e ,
+ {1,1,0,0, "\"\0\xFF"}, 0, "QUOTES", cp1252 },
+ { FldPointer, constq | register_e ,
+ {8,8,0,0, zeroes_for_null_pointer}, 0, "NULLS", cp1252 },
// IBM defines TALLY
// 01 TALLY GLOBAL PICTURE 9(5) USAGE BINARY VALUE ZERO.
{ FldNumericBin5, signable_e | register_e,
- {16, 16, MAX_FIXED_POINT_DIGITS, 0, NULL}, 0, "_TALLY" },
+ {16, 16, MAX_FIXED_POINT_DIGITS, 0, NULL}, 0, "_TALLY", cp1252 },
// 01 ARGI is the current index into the argv array
{ FldNumericBin5, signable_e | register_e,
- {16, 16, MAX_FIXED_POINT_DIGITS, 0, NULL}, 0, "_ARGI" },
+ {16, 16, MAX_FIXED_POINT_DIGITS, 0, NULL}, 0, "_ARGI", cp1252 },
// These last two don't require actual storage; they get BOOL var_decl_node
// in parser_symbol_add()
- { FldConditional, constant_e | register_e , {1,1,0,0, ""}, 0, "_VERY_TRUE" },
- { FldConditional, constant_e | register_e , {1,1,0,0, ""}, 0, "_VERY_FALSE" },
+ { FldConditional, constant_e | register_e , {1,1,0,0, ""}, 0, "_VERY_TRUE", cp1252 },
+ { FldConditional, constant_e | register_e , {1,1,0,0, ""}, 0, "_VERY_FALSE", cp1252 },
};
for( struct cbl_field_t *f = constants;
f < constants + COUNT_OF(constants); f++ ) {
static cbl_field_t debug_registers[] = {
{ FldGroup, register_e,
- {132,132,0,0, NULL}, 1, "DEBUG-ITEM" },
+ {132,132,0,0, NULL}, 1, "DEBUG-ITEM", cp1252 },
{ FldAlphanumeric, register_e,
- {6,6,0,0, " "}, 2, "DEBUG-LINE" },
+ {6,6,0,0, " "}, 2, "DEBUG-LINE", cp1252 },
{ FldAlphanumeric, register_e|filler_e,
- {1,1,0,0, " "}, 2, "FILLER" },
+ {1,1,0,0, " "}, 2, "FILLER", cp1252 },
{ FldAlphanumeric, register_e,
- {30,30,0,0, NULL}, 2, "DEBUG-NAME" },
+ {30,30,0,0, NULL}, 2, "DEBUG-NAME", cp1252 },
{ FldAlphanumeric, register_e|filler_e,
- {1,1,0,0, " "}, 2, "FILLER" },
+ {1,1,0,0, " "}, 2, "FILLER", cp1252 },
{ FldNumericDisplay, signable_e | register_e | leading_e | separate_e,
- {5,5,4,0, NULL}, 2, "DEBUG-SUB-1" },
+ {5,5,4,0, NULL}, 2, "DEBUG-SUB-1", cp1252 },
{ FldAlphanumeric, register_e|filler_e,
- {1,1,0,0, " "}, 2, "FILLER" },
+ {1,1,0,0, " "}, 2, "FILLER", cp1252 },
{ FldNumericDisplay, signable_e | register_e | leading_e | separate_e,
- {5,5,4,0, NULL}, 2, "DEBUG-SUB-2" },
+ {5,5,4,0, NULL}, 2, "DEBUG-SUB-2", cp1252 },
{ FldAlphanumeric, register_e|filler_e,
- {1,1,0,0, " "}, 2, "FILLER" },
+ {1,1,0,0, " "}, 2, "FILLER", cp1252 },
{ FldNumericDisplay, signable_e | register_e | leading_e | separate_e,
- {5,5,4,0, NULL}, 2, "DEBUG-SUB-3" },
+ {5,5,4,0, NULL}, 2, "DEBUG-SUB-3", cp1252 },
{ FldAlphanumeric, register_e | filler_e,
- {1,1,0,0, " "}, 2, "FILLER" },
+ {1,1,0,0, " "}, 2, "FILLER", cp1252 },
{ FldAlphanumeric, signable_e | register_e,
- {76,76,0,0, NULL}, 2, "DEBUG-CONTENTS" },
+ {76,76,0,0, NULL}, 2, "DEBUG-CONTENTS", cp1252 },
};
// debug registers
std::accumulate(debug_registers,
debug_registers + COUNT_OF(debug_registers), group_size_t());
debug_registers[0].data.memsize =
- debug_registers[0].data.capacity = group_size.capacity();
+ debug_registers[0].data.capacity( group_size.capacity() );
auto debug_start = p = table.elems + table.nelem;
p = std::transform(debug_registers,
// special registers
static cbl_field_t special_registers[] = {
- { FldNumericDisplay, register_e, {2,2,2,0, NULL}, 0, "_FILE_STATUS" },
- { FldNumericBin5, register_e, {2,2,4,0, NULL}, 0, "UPSI-0" },
- { FldNumericBin5, signable_e|register_e, {2,2,4,0, NULL}, 0, "RETURN-CODE" },
- { FldNumericBin5, register_e, {2,2,4,0, NULL}, 0, "LINAGE-COUNTER" },
- { FldLiteralA, register_e, {0,0,0,0, "/dev/stdin"}, 0, "_dev_stdin" },
- { FldLiteralA, int(constq)|register_e, {0,0,0,0, "/dev/stdout"}, 0, "_dev_stdout" },
- { FldLiteralA, int(constq)|register_e, {0,0,0,0, "/dev/stderr"}, 0, "_dev_stderr" },
- { FldLiteralA, int(constq)|register_e, {0,0,0,0, "/dev/null"}, 0, "_dev_null" },
+ { FldNumericDisplay, register_e, {2,2,2,0, NULL}, 0, "_FILE_STATUS", cp1252 },
+ { FldNumericBin5, register_e, {2,2,4,0, NULL}, 0, "UPSI-0", cp1252 },
+ { FldNumericBin5, signable_e|register_e, {2,2,4,0, NULL}, 0, "RETURN-CODE", cp1252 },
+ { FldNumericBin5, register_e, {2,2,4,0, NULL}, 0, "LINAGE-COUNTER", cp1252 },
+ { FldLiteralA, register_e, {0,0,0,0, "/dev/stdin"}, 0, "_dev_stdin", cp1252 },
+ { FldLiteralA, constq|register_e, {0,0,0,0, "/dev/stdout"}, 0, "_dev_stdout", cp1252 },
+ { FldLiteralA, constq|register_e, {0,0,0,0, "/dev/stderr"}, 0, "_dev_stderr", cp1252 },
+ { FldLiteralA, constq|register_e, {0,0,0,0, "/dev/null"}, 0, "_dev_null", cp1252 },
};
assert(table.nelem + COUNT_OF(special_registers) < table.capacity);
table.nelem = p - table.elems;
assert(table.nelem < table.capacity);
+ const static auto reg_based_any = cbl_field_attr_t(register_e | based_e | any_length_e);
// xml registers
static cbl_field_t xml_registers[] = {
- { FldNumericBin5, register_e, {4,4,9,0, "0"}, 1, "XML-CODE" },
- { FldAlphanumeric, register_e, {30,30,0,0, " "}, 1, "XML-EVENT" },
- { FldNumericBin5, register_e, {4,4,9,0, "0"}, 1, "XML-INFORMATION" },
- { FldAlphanumeric, register_e | based_e | any_length_e, {1,1,0,0, nullptr}, 1, "XML-NAMESPACE" },
- { FldAlphanumeric, register_e | based_e | any_length_e, {1,1,0,0, nullptr}, 1, "XML-NNAMESPACE" },
- { FldAlphanumeric, register_e | based_e | any_length_e, {1,1,0,0, nullptr}, 1, "XML-NAMESPACE-PREFIX" },
- { FldAlphanumeric, register_e | based_e | any_length_e, {1,1,0,0, nullptr}, 1, "XML-NNAMESPACE-PREFIX" },
- { FldAlphanumeric, register_e | based_e | any_length_e, {1,1,0,0, nullptr}, 1, "XML-TEXT" },
- { FldAlphanumeric, register_e | based_e | any_length_e, {1,1,0,0, nullptr}, 1, "XML-NTEXT" },
+ { FldNumericBin5, register_e, {4,4,9,0, "0"}, 1, "XML-CODE", cp1252 },
+ { FldAlphanumeric, register_e, {30,30,0,0, " "}, 1, "XML-EVENT", cp1252 },
+ { FldNumericBin5, register_e, {4,4,9,0, "0"}, 1, "XML-INFORMATION", cp1252 },
+ { FldAlphanumeric, reg_based_any, {1,1,0,0, nullptr}, 1, "XML-NAMESPACE", cp1252 },
+ { FldAlphanumeric, reg_based_any, {1,1,0,0, nullptr}, 1, "XML-NNAMESPACE", cp1252 },
+ { FldAlphanumeric, reg_based_any, {1,1,0,0, nullptr}, 1, "XML-NAMESPACE-PREFIX", cp1252 },
+ { FldAlphanumeric, reg_based_any, {1,1,0,0, nullptr}, 1, "XML-NNAMESPACE-PREFIX", cp1252 },
+ { FldAlphanumeric, reg_based_any, {1,1,0,0, nullptr}, 1, "XML-TEXT", cp1252 },
+ { FldAlphanumeric, reg_based_any, {1,1,0,0, nullptr}, 1, "XML-NTEXT", cp1252 },
}, * const eoxml = xml_registers + COUNT_OF(xml_registers);
assert(table.nelem + COUNT_OF(xml_registers) < table.capacity);
table.nelem = p - table.elems;
assert(table.nelem < table.capacity);
- // Add any CDF values already defined as literals.
- // After symbols are ready, the CDF adds them directly.
+ // Add any CDF values defined on the command line.
+ // After symbols are ready, the CDF adds them directly.
const std::list<cbl_field_t> cdf_values = cdf_literalize();
+ auto icdf = table.nelem;
table.nelem += cdf_values.size();
assert(table.nelem < table.capacity);
-
+ auto ecdf = table.nelem;
+
p = std::transform(cdf_values.begin(), cdf_values.end(), p, elementize);
-
+
// Initialize symbol table.
symbols = table;
+ // Encode CDF literals. Could not be done previously because encoding checks
+ // against figurative constants in the symbol table.
+ for( auto i=icdf; i < ecdf; i++ ) {
+ auto& f = symbols.elems[i].elem.field;
+ if( f.type == FldLiteralA ) {
+ f.set_initial(cbl_loc_t());
+ }
+ }
+
for( auto e = symbols.elems; e < symbols.elems + symbols.nelem; e++ ) {
if( e->type == SymField ) {
update_symbol_map2(e);
static const uint64_t inherit = signable_e | leading_e | separate_e | big_endian_e;
static_assert(sizeof(cbl_field_t::type) < sizeof(inherit), "need bigger type");
assert(field);
- if( field->type == FldNumericDisplay || field->type == FldGroup ) {
- if( field->parent > 0 && symbol_at(field->parent)->type == SymField ) {
- cbl_field_t *parent = parent_of(field);
- assert(parent);
- return inherit & parent->attr;
- }
+ if( field->parent > 0 && symbol_at(field->parent)->type == SymField ) {
+ cbl_field_t *parent = parent_of(field);
+ assert(parent);
+ return inherit & parent->attr;
}
return 0;
}
field->codeset = parent->codeset;
}
// BINARY-LONG, for example, sets capacity.
- if( is_numeric(parent->usage) && parent->data.capacity > 0 ) {
+ if( is_numeric(parent->usage) && parent->data.capacity() > 0 ) {
field->type = parent->usage;
field->data = parent->data;
field->data = 0; // cppcheck-suppress redundantAssignment
symbol_field_alias( struct symbol_elem_t *e, const char name[] )
{
cbl_field_t alias = *cbl_field_of(e);
- cbl_field_data_t data = { alias.data.memsize, alias.data.capacity };
+ cbl_field_data_t data = { alias.data.memsize, alias.data.capacity() };
alias.data = data;
alias.data.memsize = 0;
static bool fd_record_size_cmp( const symbol_elem_t& a, const symbol_elem_t& b ) {
- return cbl_field_of(&a)->data.capacity < cbl_field_of(&b)->data.capacity;
+ return cbl_field_of(&a)->data.capacity() < cbl_field_of(&b)->data.capacity();
}
/*
// Make a copy, update the sizes, and return it.
cbl_file_t::varying_t output = file->varying_size;
- output.min = cbl_field_of(&*p.first)->data.capacity;
- output.max = cbl_field_of(&*p.second)->data.capacity;
+ output.min = cbl_field_of(&*p.first)->data.capacity();
+ output.max = cbl_field_of(&*p.second)->data.capacity();
assert(output.min > 0 && "min record size is 0");
assert(output.min <= output.max);
__gg__encoding_iconv_name(current_encoding(display_encoding_e));
iconv_t cd = iconv_open(tocode, fromcode);
+ const charmap_t *charmap_disp =
+ __gg__get_charmap(current_encoding(display_encoding_e));
+ size_t stride = charmap_disp->stride();
+
#if optimal_reencode
if( fromcode == tocode ) { // semantically
tgt.resize(0);
}
assert(outbytesleft < sizeof(pos));
n = sizeof(pos) - outbytesleft;
- if( 1 < n ) {
+ if( stride < n ) {
error_msg(loc, "%s character '%c' (%x hex) requires %zu bytes as %s",
fromcode, ch, ch, n, tocode);
continue;
using std::deque;
static deque<cbl_field_t*> stack;
+/*
+ * Allocate a temporary field. Assign the type and name, if supplied. Caller
+ * deals with encoding and initial value.
+ */
static cbl_field_t *
new_temporary_impl( enum cbl_field_type_t type, const cbl_name_t name = nullptr )
{
field = new_temporary_impl(FldLiteralA);
field->attr |= attr;
- if(len == 0)
- {
- // This will cover UTF-32, should that arise.
- size_t nbytes = 4;
- char *init = static_cast<char *>(xmalloc(nbytes));
- memset(init, 0, nbytes);
- field->data.initial = init;
- }
- if(len)
- {
- char *init = static_cast<char *>(xmalloc(len+4));
- memcpy(init, initial, len);
- memset(init+len, 0, 4);
- field->data.initial = init;
- }
- field->data.capacity = len;
+ char *orig = static_cast<char *>(xmalloc(len+4));
+
+ auto p = initial? std::copy(initial, initial + len, orig) : orig;
+ std::fill(p, p+4, 0);
+ field->data.original(orig);
+ field->data.capacity(len); // in case of Z-string
}
+ assert(field->name[0] != '\0'); // new_temporary_impl sets literal names
+
if( ! field->has_attr(hex_encoded_e) ) {
// If the literal bore a prefix, set the encoding,
- if( encoding != cbl_field_t::codeset_t::source_encoding->type ) {
+ if( encoding != cbl_field_t::codeset_t::default_encodings.source->type ) {
field->codeset.set(encoding);
+ } else {
+ field->codeset.set();
}
- field->internalize();
}
- static size_t literal_count = 1;
- sprintf(field->name,
- "%s%c_" HOST_SIZE_T_PRINT_DEC,
- "_literal",
- field->type == FldLiteralA ? 'a' : 'n',
- (fmt_size_t)literal_count++);
+ if( field->type == FldLiteralN ) {
+ field->set_initial(cbl_loc_t());
+ } else {
+ field->set_initial(len);
+ }
return parser_symbol_add2(field);
}
static temporaries_t temporaries;
+void
+symbol_temporary_location( const cbl_field_t *field, const cbl_loc_t& loc ) {
+ temporaries.locs[field] = loc;
+}
+
+cbl_loc_t
+symbol_temporary_location( const cbl_field_t *field ) {
+ extern YYLTYPE yylloc;
+ auto p = temporaries.locs.find(field);
+ return p == temporaries.locs.end()? cbl_loc_t(yylloc) : p->second;
+}
+
cbl_field_t *
temporaries_t::literal( uint32_t len, const char value[],
cbl_field_attr_t attr, cbl_encoding_t encoding ) {
return literals[key] = new_literal_add(value, len, attr, encoding);
}
+cbl_field_t *
+new_literal_2( uint32_t len, const char initial[],
+ cbl_field_attr_t attr, cbl_encoding_t encoding ) {
+ encoding = current_encoding('A');
+ const charmap_t *charmap = __gg__get_charmap(encoding);
+ cbl_field_t *retval = temporaries.literal(len*charmap->stride(),
+ initial,
+ attr,
+ encoding);
+ return retval;
+}
+
+
cbl_field_t *
new_literal( uint32_t len, const char initial[],
cbl_field_attr_t attr, cbl_encoding_t encoding ) {
}
cbl_field_t *
-new_alphanumeric( size_t capacity, const cbl_name_t name = nullptr ) {
+new_alphanumeric( size_t capacity, const cbl_name_t name, cbl_encoding_t encoding ) {
cbl_field_t * field = new_temporary_impl(FldAlphanumeric, name);
- field->data.capacity = capacity;
+ field->set_capacity( capacity );
+ if( encoding != no_encoding_e ) {
+ field->codeset.set(encoding);
+ }
+ //// Dubner hacking away: If name is non-null, then assume this is a
+ //// function definition, and force the codeset, which otherwise will have
+ //// defaulted to current_encoding('A'), and the valid() test in codeset.set
+ //// will have prevented it from being changed.
+ if( name && encoding != no_encoding_e ) {
+ field->codeset.set_explicit(encoding);
+ }
temporaries.add(field);
return parser_symbol_add2(field);
}
extern os_locale_t os_locale;
-const encodings_t cbl_field_t::codeset_t::source_encodings[2] = {
- { false, iconv_UTF_8_e, "UTF-8" },
- { true, iconv_CP1252_e, "CP1252" },
-};
-const encodings_t * cbl_field_t::codeset_t::source_encoding = {
- cbl_field_t::codeset_t::source_encodings
-};
-const encodings_t cbl_field_t::codeset_t::standard_internal = {
- true, iconv_CP1252_e, "CP1252"
-};
-#define standard_internal cbl_field_t::codeset_t::standard_internal
+uint8_t
+cbl_field_t::codeset_t::stride() const {
+ const charmap_t *charmap = __gg__get_charmap(encoding);
+ return charmap->stride();
+}
+
+cbl_field_t::codeset_t::default_encodings_t
+cbl_field_t::codeset_t::default_encodings
+ {
+ encodings_t { true, iconv_CP1252_e, "CP1252" } , // encodings_t alpha & national
+ {
+ encodings_t{ false, iconv_UTF_8_e, "UTF-8" } ,
+ encodings_t { true, iconv_CP1252_e, "CP1252" } }
+ };
+
+bool
+cobol_alpha_encoding( const char name[] ) {
+ auto encoding = __gg__encoding_iconv_descr(name);
+ if( encoding ) {
+ cbl_field_t::codeset_t::default_encodings.alpha = *encoding;
+ const charmap_t *charmap = __gg__get_charmap(encoding->type);
+ if( charmap->is_like_ebcdic() ) {
+ cobol_gcobol_feature_set(feature_internal_ebcdic_e);
+ } else {
+ // This handles multiple occurrences of -fexec-charset on a command line
+ cobol_gcobol_feature_set(feature_internal_ebcdic_e, false);
+ }
+ // Let's handle the ordinary situation of just setting alpha, and make
+ // sure national is at least as wide. (We shouldn't have to set national
+ // first, just so we can set alpha.)
+ auto enc_n = cbl_field_t::codeset_t::default_encodings.national;
+ const charmap_t *charmap_n = __gg__get_charmap(enc_n.type);
+ if( charmap_n->stride() < charmap->stride() ) {
+ // Default national is narrower than *encoding, so boost default national
+ // up:
+ cobol_national_encoding(encoding->name);
+ }
+ return true;
+ }
+ return false;
+}
+
+bool
+cobol_national_encoding( const char name[] ) {
+ auto encoding = __gg__encoding_iconv_descr(name);
+ if( encoding ) {
+ cbl_field_t::codeset_t::default_encodings.national = *encoding;
+ return true;
+ }
+ return false;
+}
+
cbl_field_t *
-new_temporary( enum cbl_field_type_t type, const char *initial, bool is_signed ) {
+new_temporary( enum cbl_field_type_t type, const char *initial, cbl_field_attr_t attr ) {
+ const bool is_signed = attr == signable_e;
const bool force_unsigned = type == FldNumericBin5 && ! is_signed;
if( ! initial && ! force_unsigned ) {
return temporaries.acquire(type, initial);
}
if( is_literal(type) ) {
+ const auto& defaults( cbl_field_t::codeset_t::default_encodings );
auto field = temporaries.literal(strlen(initial), initial,
type == FldLiteralA? quoted_e : none_e,
- standard_internal.type);
+ defaults.source->type);
return field;
}
cbl_field_t *field = new_temporary_impl(type, initial);
return parser_symbol_add2(field);
}
+cbl_field_t *
+new_literal_float( const cbl_loc_t& loc, const char initial[] ) {
+ auto field = new_temporary_impl(FldFloat);
+ field->attr = constant_e;
+ std::string init(initial);
+ std::replace( init.begin(), init.end(), ',', '.' );
+ field->encode_numeric(init.c_str(), loc);
+ field->data.original(initial);
+
+ return parser_symbol_add2(field);
+}
+
/*
* This set of ASCII-like encodings is incomplete and possibly wrong. A
- * complete definition would better supported with a Boolean in enccodings_t.
+ * complete definition would better supported with a Boolean in encodings_t.
* If it returns false pessimistically, the only consequence is inefficiency:
* the string is processed by iconv(3).
*/
bool
cbl_field_t::is_ascii() const {
return std::all_of( data.initial,
- data.initial + data.capacity,
+ data.initial + data.capacity(),
isascii );
}
* never reverts.
*/
-const char *
-cbl_field_t::internalize() {
- /* The purpose of this routine is to return a nul-terminated string which
- is data.initial converted from the source-code characters to the
- codeset.encoding characters.
-
- The contract between this routine and the routines that call it is that
- for alphanumeric types, data.initial shall have the same number of
- characters as will be needed to fill data.capacity.
-
- Be aware that for PIC X(32) Z"foo", there are the characters "foo",
- followed by a NUL, and then 28 spaces to fill it out. It turns out that
- iconv, given a character count of 32, converts all 32, including the
- embedded NUL. So, that case works even through strlen(initial) is
- smaller than the length of initial, which is the same as capacity.
- */
-
- static const char *fromcode = codeset.source_encodings[0].name;
- static const size_t noconv = size_t(-1);
- static std::unordered_map<std::string, iconv_t> tocodes;
-
- if( ! codeset.valid() ) {
- dbgmsg("%s:%d: not converting %s", __func__, __LINE__, data.initial);
- return data.initial;
- }
+size_t
+cbl_field_t::source_code_check(const void *initial, size_t length)
+ {
+ size_t retval;
- const char *tocode = __gg__encoding_iconv_name(codeset.encoding);
+ size_t iconv_retval;
- std::string toname(tocode);
- auto p = tocodes.find(toname);
- if( p == tocodes.end() ) {
- tocodes[toname] = iconv_open(tocode, fromcode);
- }
- iconv_t cd = tocodes[toname];
+ try_again:
- if (cd == (iconv_t)-1) {
- cbl_message(ParIconvE,
- "failed %<iconv_open%> tocode = %qs fromcode = %qs",
- tocode, fromcode);
- }
+ cbl_encoding_t fromcode = codeset.default_encodings.source->type;
+ cbl_encoding_t tocode = codeset.encoding;
+ std::string toname = __gg__encoding_iconv_name(tocode);
- if( fromcode == tocode || has_attr(hex_encoded_e) ) {
- return data.initial;
+ __gg__iconverter(fromcode,
+ tocode,
+ initial,
+ length,
+ &retval,
+ &iconv_retval);
+ if( iconv_retval )
+ {
+ // something went wrong on conversion.
+ if( codeset.default_encodings.next_source_encoding() )
+ {
+ goto try_again;
+ }
+ // None of our valid "from" encodings worked
+ dbgmsg("%s:%d: iconv failed for %s: %s", __func__, __LINE__,
+ toname.c_str(), xstrerror(errno));
+ ERROR_FIELD(this, "iconv failed: %s", xstrerror(errno));
+ }
+ return retval;
}
- if( data.capacity == 0 ) {
- assert(0 == strlen(data.initial));
- return data.initial;
- }
- if( holds_ascii() && is_ascii() ) {
- if( type != FldNumericEdited ) {
- if( ! data.initial_within_capacity() ) {
- ERROR_FIELD(this, "%s %s VALUE %qs of %zu exceeds size %u",
- cbl_field_t::level_str(level), name, data.initial,
- strlen(data.initial), data.capacity );
+/*
+ * Look up the encoding handle for a given target based on the current source
+ * encoding.
+ */
+static iconv_t
+iconv_cd( cbl_encoding_t tgt ) {
+ struct iconv_key_t {
+ cbl_encoding_t to, from;
+ iconv_key_t() : to(no_encoding_e), from(no_encoding_e) {}
+ iconv_key_t( cbl_encoding_t to, cbl_encoding_t from ) : to(to), from(from) {}
+ bool operator<( const iconv_key_t& that ) const {
+ if( this->from == that.from ) {
+ return this->to < that.to;
}
+ return this->from < that.from;
+ }
+ };
+ static std::map<iconv_key_t, iconv_t> cds;
+
+ auto src = cbl_field_t::codeset_t::default_encodings.current_source_encoding()->type;
+ assert(src);
+
+ iconv_key_t key(tgt, src);
+ auto p = cds.find(key);
+ iconv_t cd;
+
+ if( p == cds.end() ) {
+ const char *fromcode = __gg__encoding_iconv_name(src);
+ const char *tocode = __gg__encoding_iconv_name(tgt);
+ gcc_assert(fromcode && tocode);
+
+ if( (cd = iconv_open(tocode, fromcode)) == iconv_t(-1) ) {
+ return cd;
}
- return data.initial;
+ cds[key] = cd;
+ } else {
+ cd = p->second;
}
- assert(data.capacity > 0);
- // The final 2 bytes of the output are "!\0". It's a debugging sentinel.
- size_t n;
- size_t inbytesleft = data.capacity;
- size_t outbytesleft = inbytesleft;
- if( !is_literal(this) && inbytesleft < strlen(data.initial) ) {
- inbytesleft = strlen(data.initial);
- }
- if( type == FldNumericEdited ) {
- outbytesleft = inbytesleft;
- }
- const unsigned int in_len = inbytesleft;
+ return cd;
+}
- char *in = const_cast<char*>(data.initial);
- char *out = static_cast<char*>( xcalloc(1, outbytesleft + 2) ), *output = out;
+/*
+ * Precondition: data.nbyte is the length of the output, and encoding is set.
+ * The caller supplies the input length because the string may include embedded
+ * NULs. If conversion succeeds, return NULL. If it fails, return a pointer
+ * to the failed character in data.orig.
+ */
+const char *
+cbl_field_t::encode( size_t srclen, cbl_loc_t loc ) {
+ gcc_assert(data.capacity());
+ gcc_assert(data.initial);
+ gcc_assert(data.original());
+ gcc_assert(codeset.valid());
+
+ if( this->type == FldLiteralN )
+ {
+ // DUBNER made this change; FldLiteralN doesn't get encoded. This probably
+ // should have been nipped in the bud somewhere upstream.
+ return data.original();
+ }
- assert(fromcode != tocode);
+ extern YYLTYPE yylloc;
+ const char *bad_boy = data.original();
+ if( 0 == loc.first_line )
+ loc = level == 0 ? yylloc : symbol_field_location(field_index(this));
/*
- * If we're currently assuming the source code is encoded according to the
- * locale (the default), and there's an iconv failure, try once more using a
- * different assumption, that the source code is encoded as CP1252.
- *
- * This heuristic means that some UTF-8 literals could be converted until a
- * CP1252 byte is encountered. We could be stricter about that.
- *
- * Also possible is a failure to avoid iconv with fromcode and tocode denote
- * the same encoding but with different spellings, e.g. CP1252 and CP1252//.
+ * Hex-encoded means we don't convert. data.initial should be long enough to
+ * hold the encoded string.
*/
-
- do {
- if( (n = iconv( cd, &in, &inbytesleft, &out, &outbytesleft)) == noconv ) {
- if( fromcode == codeset.source_encodings[0].name ) {
- codeset.source_encoding = &codeset.source_encodings[1];
- fromcode = codeset.source_encoding->name;
- tocodes.clear();
- cd = tocodes[toname] = iconv_open(tocode, fromcode);
- dbgmsg("%s: trying input encoding %s", __func__, fromcode);
- if( fromcode == tocode ) return data.initial; // no conversion required.
- n = noconv - 1; // try again
- }
+ if( has_attr(hex_encoded_e) ) {
+ if( data.capacity() < srclen ) {
+ const char *inbuf = data.original() + data.capacity();
+ error_msg( loc, "VALUE %qs is too long to initialize %qs, "
+ "discarded %ld bytes at %qs",
+ data.original(), name, long(srclen - data.capacity()), inbuf);
+ srclen = data.capacity();
}
- if( n == 0 ) break;
- } while( n != noconv );
-
- if( n == noconv ) {
- size_t i = in_len - inbytesleft;
- yyerror("failed to encode %s %qs as %s (%zu of %u bytes left)",
- fromcode, data.initial + i, tocode, inbytesleft, in_len);
- if( false ) return NULL;
- return data.initial;
- }
-
- if( 0 < inbytesleft ) {
- // data.capacity + inbytesleft is not correct if the remaining portion has
- // multibyte characters. But the fact remains that the VALUE is too big.
- ERROR_FIELD(this, "%s %s VALUE '%s' requires %zu bytes for size %u",
- cbl_field_t::level_str(level), name, data.initial,
- data.capacity + inbytesleft, data.capacity );
- }
-
- // Replace data.initial only if iconv output differs.
- if( 0 != memcmp(data.initial, output, out - output) ) {
- assert(out <= output + data.capacity || type == FldNumericEdited);
- dbgmsg("%s: converted '%.*s' to %s",
- __func__, data.capacity, data.initial, tocode);
- struct localspace_t {
- char space[4];
- size_t len, erc;
- explicit localspace_t( iconv_t cd ) {
- static char input[1] = { 0x20 };
- size_t inbytesleft2 = sizeof(input), outbytesleft2 = sizeof(space);
- char *in2 = input, *out2 = space;
-
- erc = iconv(cd, &in2, &inbytesleft2, &out2, &outbytesleft2);
- len = out2 - space;
+ std::copy(data.original(), data.original() + srclen,
+ const_cast<char*>(data.initial));
+ return nullptr; // may return "truncated success" with error.
+ }
+
+ auto figconst = cbl_figconst_of(data.original());
+ if( normal_value_e != figconst ) {
+ blank_initial( char_capacity(), figconst );
+ return nullptr;
+ }
+
+ for( auto src = codeset.default_encodings.current_source_encoding();
+ src;
+ src = codeset.default_encodings.next_source_encoding() ) {
+ /*
+ * Get the iconv handle to convert the source-code encoding to the field's
+ * encoding. If no such handle exists because iconv(3) can't do it
+ * (weird), try the next potential source-code encoding, which probably
+ * won't work either.
+ */
+ iconv_t cd = iconv_cd(codeset.encoding);
+ if( cd == iconv_t(-1) ) {
+ error_msg(loc, "cannot convert from %qs to %qs: %s",
+ cbl_encoding_str(src->type),
+ cbl_encoding_str(codeset.encoding),
+ xstrerror(errno));
+ continue;
+ }
+
+ /*
+ * If conversion succeeds, return NULL.
+ * If it fails, try the next potential encoding.
+ */
+ size_t inbytesleft = srclen, outbytesleft = data.capacity();
+ char *inbuf = const_cast<char*>(data.original());
+ char *outbuf = const_cast<char*>(data.initial);
+
+ size_t erc = iconv(cd, &inbuf, &inbytesleft, &outbuf, &outbytesleft);
+
+ if( erc == size_t(-1) ) {
+ if( outbytesleft == 0 ) { // input doesn't fit
+ gcc_assert(0 < inbytesleft);
+ gcc_assert(0 < level);
+ if( loc.first_line == 0 )
+ loc = symbol_field_location(field_index(this));
+ if( type == FldNumericEdited ) {
+ // Tolerate trailing zeros for P-values
+ if( data.rdigits < 0 ) {
+ if( inbytesleft <= size_t(data.rdigits * -1) ) {
+ bool all_zeros = std::all_of(reinterpret_cast<const char*>(inbuf),
+ data.original() + srclen,
+ [](char ch) {
+ return '0' == ch;
+ });
+ if( all_zeros ) return nullptr;
+ }
+ }
+ }
+ error_msg( loc,
+ "VALUE %qs is too long to initialize %qs, discarded %qs",
+ data.original(), name, inbuf);
+ return nullptr; // success-ish
}
- bool valid() const { return 0 < len && erc != size_t(-1); }
- } spc(cd);
-
- if( ! spc.valid() ) {
- dbgmsg("%s:%d: iconv failed for %s: %s", __func__, __LINE__,
- tocode, xstrerror(errno));
- ERROR_FIELD(this, "iconv failed: %s", xstrerror(errno));
- return data.initial;
+ dbgmsg("'%c' of '%s'[%lu] could not be converted from %s to %s: %s",
+ *inbuf, data.original(), inbuf - data.original(),
+ cbl_encoding_str(
+ codeset.default_encodings.current_source_encoding()->type),
+ cbl_encoding_str(codeset.encoding),
+ xstrerror(errno) );
+ bad_boy = inbuf;
+ continue;
}
- assert( 0 < spc.len && spc.valid() );
- if( is_literal(this) ) {
- data.capacity = out - output; // trailing '!' will be overwritten
- }
- // Pad with trailing blanks, tacking a '!' on the end.
- for( const char *eout = output + data.capacity;
- out < eout;
- out += spc.len ) {
- memcpy(out, spc.space, spc.len);
+ if( inbytesleft == 0 ) {
+ if( data.all() ) {
+ for( size_t len = outbuf - data.initial;
+ outbuf + len <= data.initial + data.capacity();
+ outbuf += len ) {
+ std::copy( data.initial, data.initial + len, outbuf );
+ }
+ }
+ if( is_literal(this) ) {
+ data.capacity( outbuf - const_cast<char*>(data.initial) );
+ }
+ return nullptr; // success
}
- // Numeric literal strings may have leading zeros, making their length
- // longer than their capacity.
- out[0] = type == FldLiteralN? '\0' : '!';
- assert(out[1] == '\0');
- data.orig = data.initial;
- data.initial = output;
- } else {
- free(output);
+ // else try again
}
+ if( 0 == loc.first_line )
+ loc = level == 0 ? yylloc : symbol_field_location(field_index(this));
+ error_msg( loc, "%<%c%> of %qs could not be converted from %s to %s: %s",
+ *bad_boy, data.original(),
+ cbl_encoding_str(
+ codeset.default_encodings.current_source_encoding()->type),
+ cbl_encoding_str(codeset.encoding),
+ xstrerror(errno) );
+ return data.original();
+}
+
+void
+cbl_field_t::set_capacity(size_t nchar) {
+ switch(this->type) {
+ case FldGroup:
+ case FldAlphanumeric:
+ case FldNumericDisplay:
+ case FldNumericEdited:
+ case FldAlphaEdited:
+ case FldLiteralA:
+ case FldInvalid:
+ if( codeset.valid() ) {
+ if( attr & hex_encoded_e ) {
+ data.capacity( capacity_cast(nchar) );
+ } else {
+ data.capacity( capacity_cast(nchar) * codeset.stride() );
+ }
+
+ } else {
+ cbl_internal_error("%s: %s %s has invalid encoding",
+ __func__, cbl_field_type_str(type), name);
+ }
+ break;
+ case FldNumericBinary:
+ case FldFloat:
+ case FldPacked:
+ case FldNumericBin5:
+ case FldLiteralN:
+ case FldClass:
+ case FldConditional:
+ case FldForward:
+ case FldIndex:
+ case FldSwitch:
+ case FldDisplay:
+ case FldPointer:
+ data.capacity( capacity_cast(nchar) );
+ break;
+ }
+}
- return data.initial;
+void
+cbl_field_t::add_capacity(size_t nchar) {
+ data.add_capacity( nchar * codeset.stride() );
+}
+
+uint32_t
+cbl_field_t::char_capacity() const {
+ return data.capacity() / codeset.stride();
}
const char *
/* cppcheck warns that the following statement depends on the order of
evaluation of side effects. Since this isn't my code, and since I don't
think the warning can be eliminated without rewriting it, I am just
- supprressing it.
+ suppressing it.
-- Bob Dubner, 2025-07-14 */
// cppcheck-suppress unknownEvaluationOrder
bool has_section = std::any_of( ++eval, symbols_end(),
[program = eval->program]( const auto& sym ) {
if( program == sym.program && sym.type == SymLabel ) {
const auto& L(sym.elem.label);
- // true if the symbol is an explicit label.
+ // true if the symbol is an explicit label.
return L.type == LblSection && L.name[0] != '_';
}
return false;
uint32_t cbl_file_key_t::
size() {
if( leftmost != 0 ) {
- return cbl_field_of(symbol_at(leftmost))->data.capacity;
+ return cbl_field_of(symbol_at(leftmost))->data.capacity();
}
return std::accumulate(fields, fields + nfield, 0, key_field_size);
}
dbgmsg( "%s:%d: invalid symbol_type_t %d", __func__, __LINE__, type );
return false;
}
+
+bool
+validate_numeric_edited(cbl_field_t *field)
+ {
+ // returns TRUE when data.initial is compatible with PICTURE
+ bool retval = true;
+ if( field->type == FldNumericEdited
+ && field->data.original()
+ && !(field->attr & quoted_e)
+ && !(field->attr & FIGCONST_MASK) )
+ {
+ char *expanded = expand_picture(field->data.picture);
+
+ unsigned int decimal_point_local = __gg__decimal_point;
+ const char *pleft = expanded;
+ const char *pright = pleft + strlen(pleft);
+ const char *pmiddle = strchr(pleft, decimal_point_local);
+ if( !pmiddle )
+ {
+ pmiddle = pright;
+ }
+
+ // Count up digit placeholders to the left of the the decimal point:
+ int currencies_local = 0;
+ int signs = 0;
+ int pldigits = 0;
+ while( pleft < pmiddle )
+ {
+ unsigned int ch = (unsigned char)*pleft++;
+ if( symbol_currency(ch) )
+ {
+ // The very first currency symbol is not a digit placeholder:
+ pldigits += currencies_local;
+ currencies_local = 1;
+ }
+ else if( ch == (unsigned char)ascii_plus
+ || ch == (unsigned char)ascii_minus )
+ {
+ // The very first sign is not a digit placeholder
+ pldigits += signs;
+ signs = 1;
+ }
+ else if( ch == (unsigned char)ascii_Z
+ || ch == (unsigned char)ascii_z
+ || ch == (unsigned char)ascii_P
+ || ch == (unsigned char)ascii_p
+ || ch == (unsigned char)ascii_9
+ || ch == (unsigned char)ascii_asterisk )
+ {
+ // 9 Z and * are digit placeholders:
+ pldigits += 1;
+ }
+ }
+
+ // Count up digit placeholders to the left of the the decimal point:
+ int prdigits = 0;
+ while( pmiddle < pright )
+ {
+ unsigned int ch = (unsigned char)*pmiddle++;
+ if( ch == (unsigned char)ascii_Z
+ || ch == (unsigned char)ascii_z
+ || ch == (unsigned char)ascii_P
+ || ch == (unsigned char)ascii_p
+ || ch == (unsigned char)ascii_9
+ || ch == (unsigned char)ascii_asterisk )
+ {
+ // 9 Z and * are digit placeholders:
+ prdigits += 1;
+ }
+ }
+
+ // We have established the number of left and right digit placeholders.
+ // We now need to do the same for the number that has to go into those
+ // placeholders.
+
+ const char *dleft = field->data.original();
+ const char *dright = dleft + strlen(dleft);
+ const char *dmiddle = strchr(dleft, decimal_point);
+ if( !dmiddle )
+ {
+ dmiddle = dright;
+ }
+
+ // Skip over leading zeros
+ int dldigits = 0;
+ int nonzero = 0;
+ while(dleft < dmiddle)
+ {
+ unsigned int ch = (unsigned char)*dleft++;
+ if( ch == (unsigned char)ascii_0 )
+ {
+ // We are looking at a zero. Ignore leading zeroes if we haven't
+ // already seen a digit
+ dldigits += nonzero;
+ }
+ else if(ch >= (unsigned char)ascii_1 && ch <= (unsigned char)ascii_9 )
+ {
+ nonzero = 1;
+ dldigits += nonzero;
+ }
+ }
+
+ // Now count up the digits to the right of the decimal point:
+ int drdigits = 0;
+ // Adjust dright to skip trailing spaces
+ while( dright > dmiddle )
+ {
+ if( (unsigned char)*(dright-1) != (unsigned char)ascii_space )
+ break;
+ dright -= 1;
+ }
+ // Adjust dright to skip trailing zeroes
+ while( dright > dmiddle )
+ {
+ if( (unsigned char)*(dright-1) != (unsigned char)ascii_0 )
+ break;
+ dright -= 1;
+ }
+ // And count up the remaining characters:
+ while( dmiddle < dright )
+ {
+ unsigned int ch = (unsigned char)*dmiddle++;
+ if(ch >= (unsigned char)ascii_0 && ch <= (unsigned char)ascii_9 )
+ {
+ drdigits += 1;
+ }
+ }
+
+ // After all that, the acceptance test is disturbingly simple:
+ if( dldigits > pldigits || drdigits > prdigits )
+ {
+ retval = false;
+ }
+ free(expanded);
+ }
+ return retval;
+ }
(cbl_gcobol_features & feature_embiggen_e);
}
+bool cobol_alpha_encoding( const char name[] );
+bool cobol_national_encoding( const char name[] );
+
enum cbl_division_t {
identification_div_e,
environment_div_e,
enum cbl_figconst_t cbl_figconst_of( const char *value );
const char * cbl_figconst_str( cbl_figconst_t fig );
-const char * consistent_encoding_check( const YYLTYPE& loc, const char input[] );
-
class cbl_domain_elem_t {
uint32_t length;
const char *value;
cbl_domain_elem_t()
: length(0), value(NULL), is_numeric(false), all(false)
{}
- cbl_domain_elem_t( const YYLTYPE& loc,
- bool all,
+ cbl_domain_elem_t( bool all,
uint32_t length,
const char *value,
bool is_numeric = false )
: length(length), value(value), is_numeric(is_numeric), all(all)
{
if( value && ! is_numeric ) {
- auto s = consistent_encoding_check(loc, value);
- if( s ) this->value = s;
+ this->value = value;
}
}
const char *name() const { return value; }
cbl_domain_elem_t first, last;
cbl_domain_t() : first(), last(first)
{}
- cbl_domain_t( const YYLTYPE& loc,
- bool all,
+ cbl_domain_t( bool all,
uint32_t length,
const char * value,
bool is_numeric = false )
- : first(loc, all, length, value, is_numeric), last(first)
+ : first(all, length, value, is_numeric), last(first)
{}
cbl_domain_t( const cbl_domain_elem_t& a, const cbl_domain_elem_t& z )
: first(a)
// the same.
#define MAXIMUM_ALPHA_LENGTH 8192
-struct cbl_field_data_t {
+class cbl_field_data_t {
+ uint32_t nbyte; // allocated space
+ struct orig_t {
+ bool all;
+ const char *data;
+ REAL_VALUE_TYPE value;
+ orig_t() : all(false), data(nullptr), value{} {}
+ explicit orig_t( const char *data, bool all = false )
+ : all(all), data(data), value{}
+ {}
+ explicit orig_t( REAL_VALUE_TYPE value )
+ : all(false), data(nullptr), value(value)
+ {}
+ } orig;
+public:
uint32_t memsize; // nonzero if larger subsequent redefining field
- uint32_t capacity, // allocated space
- digits; // magnitude: total digits (or characters)
+ uint32_t digits; // magnitude: total digits (or characters)
int32_t rdigits; // digits to the right
- const char *orig, *initial, *picture;
-
- enum etc_type_t { val88_e, upsi_e, value_e } etc_type;
+ const char *initial, *picture;
+ enum etc_type_t { no_value_e, val88_e, upsi_e, value_e } etc_type;
const char *
etc_type_str() const {
switch(etc_type) {
case val88_e: return "val88_e";
case upsi_e: return "upsi_e";
- case value_e: return "value_e";
+ case no_value_e: return "no value";
+ case value_e: return "value_e";
}
return "???";
}
-
+ bool etc_ok() const { return etc_type != no_value_e; }
+
union etc_t {
// "Domain" is an array representing the VALUE of CLASS or 88 type.
struct val88_t {
} etc;
cbl_field_data_t()
- : memsize(0)
- , capacity(0)
+ : nbyte(0)
+ , memsize(0)
, digits(0)
, rdigits(0)
- , orig(0)
, initial(0)
, picture(0)
- , etc_type(value_e)
+ , etc_type(no_value_e)
, etc()
{}
- cbl_field_data_t( uint32_t memsize, uint32_t capacity )
- : memsize(memsize)
- , capacity(capacity)
+ cbl_field_data_t( uint32_t memsize, uint32_t nbyte )
+ : nbyte(nbyte)
+ , memsize(memsize)
, digits(0)
, rdigits(0)
- , orig(0)
, initial(0)
, picture(0)
- , etc_type(value_e)
+ , etc_type(no_value_e)
, etc()
{}
- cbl_field_data_t( uint32_t memsize, uint32_t capacity,
+ cbl_field_data_t( uint32_t memsize, uint32_t nbyte,
uint32_t digits, uint32_t rdigits,
const char *initial,
const char *picture = NULL )
- : memsize(memsize)
- , capacity(capacity)
+ : nbyte(nbyte)
+ , orig(initial)
+ , memsize(memsize)
, digits(digits)
, rdigits(rdigits)
- , orig(0)
- , initial(initial)
+ , initial(initial) // initial == data.orig.data
, picture(picture)
- , etc_type(value_e)
+ , etc_type(no_value_e)
, etc()
{}
+ inline uint32_t capacity( uint32_t size ) { return nbyte = size; }
+ inline uint32_t capacity() const { return nbyte; }
+
+ inline uint32_t add_capacity( uint32_t size ) { return nbyte += size; }
+
cbl_field_data_t( const cbl_field_data_t& that ) {
copy_self(that);
}
return etc.value = build_int_cst_type(integer_type_node, i);
}
+ tree_code value_type() const {
+ gcc_assert(etc_type == value_e);
+ tree_node *node = TREE_TYPE(etc.value);
+ tree_code code = TREE_CODE(node);
+ return code;
+ }
+ bool value_is_float() const {
+ gcc_assert(etc_type == value_e);
+ tree_node *node = TREE_TYPE(etc.value);
+ return SCALAR_FLOAT_TYPE_P(node);
+ }
+ bool value_is_fixed() const {
+ gcc_assert(etc_type == value_e);
+ tree_node *node = TREE_TYPE(etc.value);
+ return FIXED_POINT_TYPE_P(node);
+ }
+ bool value_is_integer() const {
+ gcc_assert(etc_type == value_e);
+ tree_node *node = TREE_TYPE(etc.value);
+ return INTEGRAL_TYPE_P(node);
+ }
+
+ // verify is numeric and zero fraction
+ std::pair<int64_t, bool> int64_of() const {
+ if( etc_type == value_e ) {
+ auto r = TREE_REAL_CST_PTR( value_of() );
+ auto n = real_to_integer(r);
+ REAL_VALUE_TYPE r2;
+ real_from_integer (&r2, VOIDmode, n, SIGNED);
+ // If the orginal value r is equal to r2, derived from its integer
+ // part n, then the fractional component is zero.
+ if( real_identical (r, &r2) ) {
+ return std::make_pair( int64_t(n), true );
+ }
+ }
+ return std::make_pair(int64_t(0), false);
+ }
+
+ bool has_initial_value() const {
+ return orig.data || etc_type != no_value_e;
+ }
+
void set_real_from_capacity( REAL_VALUE_TYPE *r ) const {
- real_from_integer (r, VOIDmode, capacity, SIGNED);
+ real_from_integer (r, VOIDmode, nbyte, SIGNED);
}
time_now_f time_func;
int32_t ldigits() const { return std::max(int(digits), int(digits - rdigits)); }
cbl_field_data_t& valify() {
- assert(initial);
- std::string input(initial);
+ assert(orig.data);
+ std::string input(orig.data);
if( decimal_is_comma() ) {
std::replace(input.begin(), input.end(), ',', '.');
}
}
cbl_field_data_t& valify( const char *input ) {
assert(input);
- initial = input;
- capacity = strlen(initial);
+ original(input);
return valify();
}
- // If initial (of Numeric Edited) has any length but capacity, adjust it.
- bool manhandle_initial() {
- assert(capacity > 0);
- assert(initial != nullptr);
- if( capacity < strlen(initial) ) {
- char *p = const_cast<char*>(initial);
- p[capacity] = '\0';
- return true;
- }
- if( strlen(initial) < capacity ) {
- auto tgt = reinterpret_cast<char *>( xmalloc(capacity + 1) );
- auto pend = tgt + capacity;
- auto p = std::copy(initial, initial + strlen(initial), tgt);
- std::fill(p, pend, 0x20);
- p = pend - 1;
- *p = '\0';
- initial = tgt;
- }
- return false;
+ bool all() const { return orig.data? orig.all : false; }
+ bool is_alpha_edited() const;
+ const char *original() const { return orig.data? orig.data : nullptr; }
+ const REAL_VALUE_TYPE& original_numeric() const { return orig.value; }
+
+ // Set the original string, and set the capacity to its length if nothing
+ // else already did. This function is used only for VALUE numeric literal,
+ // to preserve the VALUE clause until the field is fully defined.
+ const char *original( const char *orig, bool all = false) {
+ if( nbyte == 0 ) nbyte = strlen(orig);
+ this->orig = orig_t( orig, all );
+ return this->orig.data;
}
- bool initial_within_capacity() const {
- return initial[capacity] == '\0'
- || initial[capacity] == '!';
+ // Set the computed cce value. Do not impute capacity.
+ void original( REAL_VALUE_TYPE value ) {
+ orig = orig_t(value);
}
- const char *original() const { return orig? orig : initial; }
-
protected:
cbl_field_data_t& copy_self( const cbl_field_data_t& that ) {
memsize = that.memsize;
- capacity = that.capacity;
+ nbyte = that.nbyte;
digits = that.digits;
rdigits = that.rdigits;
+ orig = that.orig;
initial = that.initial;
picture = that.picture;
etc_type = that.etc_type;
switch(etc_type) {
+ case no_value_e:
case value_e:
etc.value = that.etc.value;
break;
size_t offset, isym;
};
+const encodings_t *
+ __gg__encoding_iconv_descr( cbl_encoding_t encoding );
+const encodings_t *
+ __gg__encoding_iconv_descr( const char name[] );
const char * __gg__encoding_iconv_name( cbl_encoding_t encoding );
bool __gg__encoding_iconv_valid( cbl_encoding_t encoding );
uint32_t level;
cbl_occurs_t occurs;
struct codeset_t {
- static const encodings_t standard_internal, source_encodings[2], *source_encoding;
+ struct default_encodings_t {
+ friend bool cobol_alpha_encoding( const char name[] );
+ friend bool cobol_national_encoding( const char name[] );
+ encodings_t alpha, national;
+ const encodings_t possible_sources[2] = {}, *source = 0;
+
+ default_encodings_t( const encodings_t& alpha,
+ const std::vector<encodings_t>& possible_sources )
+ : alpha(alpha)
+ , national(alpha)
+ {
+ std::copy(possible_sources.begin(),
+ possible_sources.end(),
+ const_cast<encodings_t*>(this->possible_sources));
+ source = this->possible_sources;
+ }
+ const encodings_t *next_source_encoding() {
+ if( ++source < possible_sources + COUNT_OF(possible_sources) ) {
+ return source;
+ }
+ return nullptr;
+ }
+ const encodings_t *current_source_encoding() const {
+ return
+ source < possible_sources + COUNT_OF(possible_sources) ?
+ source : nullptr;
+ }
+
+ };
+ static default_encodings_t default_encodings;
+
cbl_encoding_t encoding;
size_t alphabet; // unlikely
explicit codeset_t(cbl_encoding_t encoding = custom_encoding_e,
: encoding(encoding), alphabet(alphabet)
{}
bool valid() const {
- return
- (alphabet == 0 && encoding != custom_encoding_e)
- ||
- (alphabet != 0 && encoding == custom_encoding_e);
+ switch(encoding) {
+ case no_encoding_e:
+ return false;
+ case custom_encoding_e:
+ return alphabet != 0;
+ default:
+ break;
+ }
+ return alphabet == 0;
}
bool consistent() const {
return valid() && ( encoding == current_encoding('A')
||
encoding == UTF8_e );
}
+ // set_explicit overrides an encoding inferred via e.g. PIC 999.
+ bool set_explicit( cbl_encoding_t encoding ) {
+ assert(valid_encoding(encoding));
+ this->encoding = encoding;
+ this->alphabet = 0;
+ return valid();
+ }
+ bool set_per_source() {
+ return set_explicit( default_encodings.source->type );
+ }
bool set( cbl_encoding_t encoding, size_t alphabet = 0 ) {
assert(valid_encoding(encoding));
if( ! valid() ) { // setting first time
- this->encoding = encoding;
- this->alphabet = alphabet;
- return valid();
+ return set_explicit(encoding);
}
return this->encoding == encoding && this->alphabet == alphabet;
}
cbl_encoding_t set() const {
return valid()? encoding : cbl_encoding_t(-1);
}
- const char *name() const {
+ uint8_t stride() const;
+ inline const char *name() const {
return valid()? __gg__encoding_iconv_name(encoding) : "nocoding";
}
} codeset;
strcpy(this->name, name);
}
+ cbl_field_t( cbl_field_type_t type, uint64_t attr,
+ const cbl_field_data_t& data,
+ uint32_t level, const cbl_name_t name,
+ const cbl_field_t::codeset_t& codeset )
+ : offset(0), type(type), usage(FldInvalid), attr(attr)
+ , parent(0), our_index(0), level(level), codeset(codeset)
+ , line(0), name(""), file(0), data(data)
+ , var_decl_node(nullptr), data_decl_node(nullptr)
+ {
+ gcc_assert(strlen(name) < sizeof this->name);
+ strcpy(this->name, name);
+ }
+
cbl_field_t( cbl_field_type_t type, uint32_t level, int line, uint64_t attr = 0 )
: offset(0), type(type), usage(FldInvalid), attr(attr)
, parent(0), our_index(0), level(level)
}
bool is_valid() const {
- return data.capacity > 0
+ return data.capacity() > 0
|| level == 88
|| level == 66
|| type == FldClass
}
bool reasonable_capacity() const {
- return data.capacity <= MAX_FIXED_POINT_DIGITS;
+ return data.capacity() <= MAX_FIXED_POINT_DIGITS * codeset.stride();
}
cbl_field_t& same_as( const cbl_field_t& that, bool is_typedef ) {
return *this;
}
- void report_invalid_initial_value(const YYLTYPE& loc) const;
+ bool report_invalid_initial_value(const YYLTYPE& loc) const;
bool is_ascii() const;
bool is_integer() const { return is_numeric(type) && data.rdigits == 0; }
return type == FldNumericBinary || type == FldNumericBin5;
}
+ bool is_numeric_constant() const {
+ return type == FldLiteralN
+ || (type == FldFloat && has_attr(constant_e));
+ }
+
HOST_WIDE_INT as_integer() const {
return real_to_integer( TREE_REAL_CST_PTR (data.value_of()) );
}
type = FldNumericBin5;
attr |= embiggened_e;
- data.capacity = eight;
+ data.capacity(eight);
data.digits = 0;
}
bool has_subordinate( const cbl_field_t *that ) const;
- const char * internalize();
+ uint32_t char_capacity() const;
+ void set_capacity(size_t cap);
+ void add_capacity(size_t cap);
+ void set_initial( const cbl_loc_t& loc );
+ void set_initial( size_t nchar, const cbl_loc_t& loc = cbl_loc_t() );
+ size_t source_code_check(const void *initial, size_t length);
+ const char * encode( size_t, cbl_loc_t loc = cbl_loc_t());
+ void encode_numeric( const char input[], cbl_loc_t loc,
+ const REAL_VALUE_TYPE& rvt = {});
const char *value_str() const;
bool is_key_name() const { return has_attr(record_key_e); }
return data.digits?
long(data.digits) - data.rdigits
:
- data.capacity;
+ data.capacity();
}
uint32_t size() const; // table capacity or capacity
inline const char * level_str() const {
return level_str(level);
}
+ void blank_initial( size_t len, cbl_figconst_t figconst = normal_value_e );
};
const cbl_field_t * cbl_figconst_field_of( const char *value );
size_t field_index( const cbl_field_t *f );
+cbl_field_t * new_literal_float( const cbl_loc_t& loc, const char initial[] );
+
cbl_field_t * new_temporary( enum cbl_field_type_t type,
const char initial[] = NULL,
- bool attr = false );
+ cbl_field_attr_t = none_e );
cbl_field_t * new_temporary_like( cbl_field_t skel );
cbl_field_t * new_temporary_clone( const cbl_field_t *orig);
cbl_field_t * keep_temporary( cbl_field_type_t type );
+cbl_field_t * new_literal_2( uint32_t len, const char initial[],
+ cbl_field_attr_t attr,
+ cbl_encoding_t encoding = ASCII_e );
+
cbl_field_t * new_literal( uint32_t len, const char initial[],
cbl_field_attr_t attr,
cbl_encoding_t encoding = ASCII_e );
}
void symbol_temporaries_free();
+void symbol_temporary_location( const cbl_field_t *field,
+ const cbl_loc_t& loc);
+cbl_loc_t symbol_temporary_location( const cbl_field_t *field );
class temporaries_t {
- friend void symbol_temporaries_free();
- struct literal_an {
+ friend void symbol_temporaries_free();
+ friend void symbol_temporary_location( const cbl_field_t *field,
+ const cbl_loc_t& loc);
+ friend cbl_loc_t symbol_temporary_location( const cbl_field_t *field );
+
+ class literal_an {
bool is_quoted, is_verbatim; // verbatim: don't use codeset
+ public:
std::string value;
literal_an() : is_quoted(false), is_verbatim(false), value("???") {}
literal_an( const char value[], bool is_quoted, bool is_verbatim = false )
: is_quoted(is_quoted), is_verbatim(is_verbatim), value(value) {}
+ literal_an( size_t len, const char value[] )
+ : is_quoted(true), is_verbatim(false), value(value, len)
+ {
+ gcc_assert(0 < len);
+ gcc_assert(value[len-1] == '\0');
+ gcc_assert(this->value.back() == '\0');
+ }
literal_an( const literal_an& that )
- : is_quoted(that.is_quoted),
- is_verbatim(that.is_verbatim),
- value(that.value)
+ : is_quoted(that.is_quoted)
+ , is_verbatim(that.is_verbatim)
+ , value(that.value)
{}
literal_an& operator=( const literal_an& that ) {
is_quoted = that.is_quoted;
}
bool operator<( const literal_an& that ) const {
if( value == that.value ) { // alpha before numeric
- if( is_quoted == that.is_quoted ) { // verbatim before not
- return (is_verbatim? 0 : 1) < (that.is_verbatim? 0 : 1);
+ if( is_quoted == that.is_quoted ) { // non-verbatim first
+ return that.is_verbatim;
}
- return (is_quoted? 0 : 1) < (that.is_quoted? 0 : 1);
+ return that.is_quoted; // unquoted first
}
return value < that.value;
}
+ bool terminated() const {
+ // Z strings include the NUL terminator.
+ return !is_verbatim && is_quoted && !value.empty() && '\0' == value.back();
+ }
};
std::map<literal_an, cbl_field_t *> literals;
+ std::map<const cbl_field_t*, cbl_loc_t> locs;
typedef std::set<cbl_field_t *> fieldset_t;
typedef std::map<cbl_field_type_t, fieldset_t> fieldmap_t;
fieldmap_t used, freed;
}
};
+#pragma GCC diagnostic push
+#pragma GCC diagnostic ignored "-Winvalid-offsetof"
# define offsetof(TYPE, MEMBER) __builtin_offsetof (TYPE, MEMBER)
static inline symbol_elem_t *
// cppcheck-suppress cstyleCast
reinterpret_cast<const symbol_elem_t *>((const char*)field - n);
}
+#pragma GCC diagnostic pop
symbol_elem_t * symbols_begin( size_t first = 0 );
symbol_elem_t * symbols_end(void);
size_t symbol_index(); // nth after first program symbol
size_t symbol_index( const symbol_elem_t *e );
-size_t symbol_unique_index( const struct symbol_elem_t *e );
+uint64_t symbol_unique_index( const struct symbol_elem_t *e );
struct symbol_elem_t * symbol_at( size_t index );
const char * cbl_field_type_str( enum cbl_field_type_t type );
const char * cbl_logop_str( enum logop_t op );
+const char * cbl_field_type_name( enum cbl_field_type_t type ); // for messages
+
static inline const char *
refer_type_str( const cbl_refer_t *r ) {
return r && r->field? cbl_field_type_str(r->field->type) : "(none)";
void current_enabled_ecs( tree ena );
+bool validate_numeric_edited(cbl_field_t *field);
+
#endif
// generated by /home/jklowden/projects/3rd/gcc/parser/gcc/cobol/token_names.h.gen cobol/parse.h
-// Wed Nov 26 11:57:23 EST 2025
+// Sun Jan 11 18:01:04 EST 2026
tokens = {
{ "identification", IDENTIFICATION_DIV }, // 258
{ "environment", ENVIRONMENT_DIV }, // 259
* header files.
*/
-#include <cobol-system.h>
+#include "cobol-system.h"
#include <coretypes.h>
#include <tree.h>
+#include <fold-const.h>
#undef yy_flex_debug
#include <langinfo.h>
#include <backtrace.h>
#include <diagnostic.h>
#include <opts.h>
-#include "util.h"
+#include "util.h"
#include "cbldiag.h"
#include "cdfval.h"
#include "lexio.h"
#include "genapi.h"
#include "genutil.h"
#include "../../libgcobol/charmaps.h"
+#include "../../libgcobol/valconv.h"
#pragma GCC diagnostic ignored "-Wunused-result"
#pragma GCC diagnostic ignored "-Wmissing-field-initializers"
if( value.is_numeric() ) {
auto initial = xasprintf("%ld", (long)value.as_number());
auto len = strlen(initial);
- cbl_field_data_t data(len, len);
- data.initial = initial;
+ cbl_field_data_t data(len, len, len,0, initial); // digits == len, no rdigits
data.valify();
field = cbl_field_t{ FldLiteralN, constant_e, data, 0, name.c_str()};
} else {
auto len = strlen(value.string);
cbl_field_data_t data(len, len);
- data.initial = xstrdup(value.string);
+ data.original(xstrdup(value.string));
field = cbl_field_t{ FldLiteralA, constant_e, data, 0, name.c_str() };
field.set_attr(quoted_e);
}
- field.codeset.set();
+ field.codeset.set();
return field;
}
-const std::list<cbl_field_t>
+const std::list<cbl_field_t>
cdf_literalize() {
std::list<cbl_field_t> fields;
auto dict = cdf_dictionary();
-
+
for( auto elem : dict ) {
std::string name(elem.first);
const cdfval_t& value(elem.second);
-
- fields.push_back(cdf_literalize(name, value));
+
+ fields.push_back(cdf_literalize(name, value));
}
return fields;
}
return "???";
}
+const char *
+cbl_field_type_name( enum cbl_field_type_t type )
+{
+ switch(type) {
+ case FldDisplay:
+ return "DISPLAY";
+ case FldInvalid:
+ return ""; // Invalid";
+ case FldGroup:
+ return "GROUP";
+ case FldAlphanumeric:
+ return "ALPHANUMERIC";
+ case FldNumericBinary:
+ return "NUMERIC-BINARY";
+ case FldFloat:
+ return "FLOAT";
+ case FldNumericBin5:
+ return "COMPUTATIONAL-5";
+ case FldPacked:
+ return "PACKED-DECIMAL";
+ case FldNumericDisplay:
+ return "NUMERIC-DISPLAY";
+ case FldNumericEdited:
+ return "NUMERIC-EDITED";
+ case FldAlphaEdited:
+ return "ALPHANUMERIC-EDITED";
+ case FldLiteralA:
+ return "ALPHANUMERIC LITERAL";
+ case FldLiteralN:
+ return "NUMERIC LITERAL";
+ case FldClass:
+ return "CLASS";
+ case FldConditional:
+ return "CONDITIONAL";
+ case FldForward:
+ return "FORWARD";
+ case FldIndex:
+ return "INDEX";
+ case FldSwitch:
+ return "SWITCH";
+ case FldPointer:
+ return "POINTER";
+ }
+ cbl_internal_error("%s:%d: invalid %<symbol_type_t%> %d", __func__, __LINE__, type);
+ return "???";
+}
+
const char *
cbl_logop_str( enum logop_t op )
{
if( aref.field->type == FldFloat || bref.field->type == FldFloat )
{
output.type = FldFloat;
- output.data.capacity = 16;
+ output.data.capacity(16);
output.attr = (intermediate_e );
}
else if( op == '*'
> MAX_FIXED_POINT_DIGITS)
{
output.type = FldFloat;
- output.data.capacity = 16;
+ output.data.capacity(16);
output.attr = (intermediate_e );
}
else
{
output.type = FldNumericBin5;
- output.data.capacity = 16;
+ output.data.capacity(16);
output.data.digits = MAX_FIXED_POINT_DIGITS;
output.attr = (intermediate_e | signable_e );
}
extern int yydebug, yy_flex_debug;
bool
-is_alpha_edited( const char picture[] ) {
+cbl_field_data_t::is_alpha_edited() const {
static const char valid[] = "abxABX90/(),.";
assert(picture);
-
- for( const char *p = picture; *p != '\0'; p++ ) {
- if( strchr(valid, *p) ) continue;
- if( ISDIGIT(*p) ) continue;
- if( symbol_decimal_point() == *p ) continue;
- if( symbol_currency(*p) ) continue;
-
- if( yydebug ) {
- dbgmsg( "%s: bad character '%c' at %.*s<-- in '%s'",
- __func__, *p, int(p - picture) + 1, picture, picture );
- }
+ auto ep = picture + strlen(picture);
+
+ // Find first character that is not part of an alpha-edited PICTURE.
+ auto p = std::find_if( picture, ep,
+ []( char ch ) {
+ if( strchr(valid, ch) ) return false;
+ if( ISDIGIT(ch) ) return false;
+ if( symbol_decimal_point() == ch ) return false;
+ if( symbol_currency(ch) ) return false;
+ return true;
+ } );
+ if( p != ep ) {
+ dbgmsg( "%s: bad character '%c' at %.*s<-- in '%s'",
+ __func__, *p, int(p - picture) + 1, picture, picture );
return false;
}
return true;
case FldPointer:
// set the type
field->type = candidate;
- if( field->data.capacity == 0 ) {
+ if( field->data.capacity() == 0 ) {
static const cbl_field_data_t data = {0, 8, 0, 0, NULL};
field->data = data;
field->attr &= ~size_t(signable_e);
case FldNumericDisplay:
case FldAlphaEdited:
case FldNumericEdited:
- {
- bool retval = field->codeset.set();
- return retval;
- }
+ return field->codeset.set();
default:
break;
}
field->data.initial = NULL;
}
- if( field->data.capacity == 0 ) field->data = primary->data;
+ if( field->data.capacity() == 0 ) field->data = primary->data;
if( is_numeric(field->type) && field->usage == FldDisplay ) {
fOK = symbol_field_type_update(field, FldNumericDisplay, false);
return fOK;
}
+static
+FIXED_WIDE_INT(128)
+dirty_to_binary(const char *instring,
+ uint32_t &capacity,
+ uint32_t &digits,
+ int32_t &rdigits,
+ uint64_t &attr)
+ {
+ digits = 0;
+ rdigits = 0;
+ attr = 0;
+
+ FIXED_WIDE_INT(128) value = 0;
+
+ // We need to convert data.initial to an FIXED_WIDE_INT(128) value
+ const char *p = instring;
+ int sign = 1;
+ bool ignore_zeroes = true;
+ if( *p == '-' )
+ {
+ attr |= signable_e;
+ sign = -1;
+ p += 1;
+ }
+ else if( *p == '+' )
+ {
+ // We set it signable so that the instruction DISPLAY +1
+ // actually outputs "+1"
+ attr |= signable_e;
+ p += 1;
+ }
+
+ // We need to be able to handle
+ // 123
+ // 123.456
+ // 123E<exp>
+ // 123.456E<exp>
+ // where <exp> can be N, +N and -N
+ //
+
+ int rdigit_delta = 0;
+ int exponent = 0;
+ const char *exp = strchr(p, 'E');
+ if( !exp )
+ {
+ exp = strchr(p, 'e');
+ }
+ if(exp)
+ {
+ exponent = atoi(exp+1);
+ }
+
+ // We can now calculate the value, and the number of digits and rdigits.
+
+ // We trim off leading zeroes before the decimal point, and trailing zeroes
+ // after a decimal point.
+
+ const char *pend = exp;
+ if( !exp )
+ {
+ pend = instring + strlen(instring);
+ }
+
+ const char *pdecimal = strchr(instring, symbol_decimal_point());
+ if( pdecimal )
+ {
+ while( pend > instring && *(pend-1) == '0' )
+ {
+ pend -= 1;
+ }
+ }
+
+ while(p < pend)
+ {
+ char ch = *p++;
+ if( ch == symbol_decimal_point() )
+ {
+ rdigit_delta = 1;
+ ignore_zeroes = false;
+ continue;
+ }
+ if( ignore_zeroes && ch == '0' )
+ {
+ continue;
+ }
+ ignore_zeroes = false;
+ if( ch < '0' || ch > '9' )
+ {
+ break;
+ }
+ digits += 1;
+ rdigits += rdigit_delta;
+ value *= 10;
+ value += ch - '0';
+ }
+
+ if( exponent < 0 )
+ {
+ rdigits += -exponent;
+ }
+ else
+ {
+ while(exponent--)
+ {
+ if(rdigits)
+ {
+ rdigits -= 1;
+ }
+ else
+ {
+ digits += 1;
+ value *= 10;
+ }
+ }
+ }
+
+ if( (int32_t)digits < rdigits )
+ {
+ digits = rdigits;
+ }
+
+ // We now need to calculate the capacity.
+
+ unsigned int min_prec = wi::min_precision(value, UNSIGNED);
+ if( min_prec > 64 )
+ {
+ // Bytes 15 through 8 are non-zero
+ capacity = 16;
+ }
+ else if( min_prec > 32 )
+ {
+ // Bytes 7 through 4 are non-zero
+ capacity = 8;
+ }
+ else if( min_prec > 16 )
+ {
+ // Bytes 3 and 2
+ capacity = 4;
+ }
+ else if( min_prec > 8 )
+ {
+ // Byte 1 is non-zero
+ capacity = 2;
+ }
+ else
+ {
+ // The value is zero through 0xFF
+ capacity = 1;
+ }
+
+ value *= sign;
+
+ // One last adjustment. The number is signable, so the binary value
+ // is going to be treated as twos complement. That means that the highest
+ // bit has to be 1 for negative signable numbers, and 0 for positive. If
+ // necessary, adjust capacity up by one byte so that the variable fits:
+
+ if( capacity < 16 && (attr & signable_e) )
+ {
+ FIXED_WIDE_INT(128) mask
+ = wi::set_bit_in_zero<FIXED_WIDE_INT(128)>(capacity * 8 - 1);
+ if( wi::neg_p (value) && (value & mask) == 0 )
+ {
+ capacity *= 2;
+ }
+ else if( !wi::neg_p (value) && (value & mask) != 0 )
+ {
+ capacity *= 2;
+ }
+ }
+
+ return value;
+ }
+
+static void
+digits_from_int128( char *ach,
+ cbl_field_t *field,
+ uint32_t desired_digits,
+ FIXED_WIDE_INT(128) value128, // cppcheck-suppress unknownMacro
+ int32_t rdigits)
+ {
+ if( value128 < 0 )
+ {
+ value128 = -value128;
+ }
+
+ // 'rdigits' are the number of rdigits in value128.
+
+ int scaled_rdigits = get_scaled_rdigits(field);
+
+ int i = field->data.rdigits;
+ while( i<0 )
+ {
+ value128 = value128/10;
+ i += 1;
+ }
+
+ // We take the digits of value128, and put them into ach. We line up
+ // the rdigits, and we truncate the string after desired_digits
+ while(rdigits < scaled_rdigits)
+ {
+ value128 *= 10;
+ rdigits += 1;
+ }
+ while(rdigits > scaled_rdigits)
+ {
+ value128 = value128 / 10;
+ rdigits -= 1;
+ }
+ char conv[128];
+ print_dec (value128, conv, SIGNED);
+ size_t len = strlen(conv);
+
+ if( len<desired_digits )
+ {
+ memset(ach, ascii_0, desired_digits - len);
+ strcpy(ach+desired_digits - len, conv);
+ }
+ else
+ {
+ strcpy(ach, conv + len-desired_digits);
+ }
+ }
+
+static
+void
+binary_initial( char *retval,
+ cbl_field_t *field,
+ FIXED_WIDE_INT(128) value,
+ int drdigits)
+ {
+ // This routine returns an xmalloced buffer designed to replace the
+ // data.initial member of the incoming field
+
+ int scaled_rdigits = get_scaled_rdigits(field);
+
+ int i = field->data.rdigits;
+ while( i<0 )
+ {
+ value = value/10;
+ i += 1;
+ }
+
+ // We take the digits of value, and put them into ach. We line up
+ // the rdigits, and we truncate the string after desired_digits
+ while(drdigits < scaled_rdigits)
+ {
+ value *= 10;
+ drdigits += 1;
+ }
+ while(drdigits > scaled_rdigits)
+ {
+ value = value / 10;
+ drdigits -= 1;
+ }
+
+ switch(field->data.capacity())
+ {
+ tree type;
+ case 1:
+ case 2:
+ case 4:
+ case 8:
+ case 16:
+ type = build_nonstandard_integer_type ( field->data.capacity()
+ * BITS_PER_UNIT, 0);
+ native_encode_wide_int (type, value, PTRCAST(unsigned char, retval),
+ field->data.capacity());
+ break;
+ default:
+ fprintf(stderr,
+ "Trouble in binary_initial at %s() %s:%d\n",
+ __func__,
+ __FILE__,
+ __LINE__);
+ abort();
+ break;
+ }
+ }
+
+/*
+ * Preconditions:
+ * 1. input is not NULL
+ * 2. type is numeric
+ * 3. input conforms to type (will fit, allows sign, etc.)
+ * 4. capacity set per PICTURE and USAGE, or
+ * type == FldLiteralN and data.capacity == 0
+ * 5. data_initial has been established with data.capacity() bytes, unless
+ * FldLiteralN, in which case we will malloc data.initial.
+ *
+ * Process:
+ * Convert input string to binary Host representation:
+ * FldFloat: as tree
+ * other (fixed point): as FIXED_WIDE_INT(128)
+ * Set etc union via assignment, cbl_field_data_t::operator=().
+ * That sets the correct member in the union and etc_type.
+ *
+ * As of Mon Jan 5 13:32:32 2026, use gcc_assert for preconditions. A location
+ * is provided so diagnositics can be issued. We may remove precondition
+ * verification from the caller and move error handling here.
+ *
+ * Post condition:
+ * etc union holds Host numeric value.
+ * data.initial is NULL for error, else points to data.etc.
+ */
+
void
+cbl_field_t::encode_numeric( const char input[], cbl_loc_t loc,
+ const REAL_VALUE_TYPE& /*rvt*/ ) {
+ gcc_assert(input);
+ gcc_assert(is_numeric(this) || type == FldNumericEdited);
+
+ // The following are intended to test the preconditions....
+ if( type == FldLiteralN ) {
+ if( 0 < data.capacity() ) {
+ error_msg(loc, "unexpected nonzero numeric literal capacity");
+ }
+ if( data.initial != nullptr ) {
+ error_msg(loc, "unexpected initial value for numeric literal");
+ }
+ } else {
+ if( 0 == data.capacity() ) {
+ error_msg(loc, "unexpected zero capacity numeric nonliteral");
+ }
+ }
+
+ gcc_assert(0 < data.capacity() || type == FldLiteralN);
+ gcc_assert( data.initial == nullptr
+ || type == FldLiteralN
+ || data.capacity() <= strlen(data.initial)
+ || 1 < codeset.stride() );
+
+ if( type == FldFloat )
+ {
+ double d;
+ int n;
+ int erc = sscanf(input, "%lf%n", &d, &n);
+ if( erc < 0 || size_t(n) != strlen(input) )
+ {
+ dbgmsg("%s: error: could not interpret '%s' of '%s' as a number",
+ __func__, input + n, input);
+ gcc_assert(false);
+ }
+ REAL_VALUE_TYPE value;
+ real_from_string (&value, input);
+ value = real_value_truncate (TYPE_MODE (float128_type_node), value);
+ data = build_real(float128_type_node, value);
+ // Turn that back into a REAL_VALUE_TYPE with
+ // REAL_VALUE_TYPE readback_value = TREE_REAL_CST(data.etc.value);
+
+#define FOR_JIM 0
+#if FOR_JIM
+ {
+ // When you know data.etc.value was created with build_real()
+ enum tree_code code = TREE_CODE(TREE_TYPE(data.etc.value));
+ // code will be REAL_TYPE
+
+ REAL_VALUE_TYPE readback_value = TREE_REAL_CST(data.etc.value);
+ char ach[48];
+ size_t number_of_digits = 33;
+ bool crop_trailing_zeroes = true;
+ real_to_decimal(ach,
+ &readback_value,
+ sizeof(ach),
+ number_of_digits,
+ crop_trailing_zeroes);
+ fprintf(stderr, "FOR_JIM: %s real_value: %s\n", get_tree_code_name(code), ach);
+ }
+#endif
+
+ unsigned char *retval =
+ static_cast<unsigned char *>(xmalloc(data.capacity()));
+ assert(retval);
+ switch( data.capacity() )
+ {
+ case 4:
+ value = real_value_truncate (TYPE_MODE (float32_type_node), value);
+ native_encode_real(SCALAR_FLOAT_TYPE_MODE (float32_type_node), &value,
+ retval, 4, 0);
+ break;
+ case 8:
+ value = real_value_truncate (TYPE_MODE (float64_type_node), value);
+ native_encode_real(SCALAR_FLOAT_TYPE_MODE (float64_type_node), &value,
+ retval, 8, 0);
+ break;
+ case 16:
+ // 'value' is already a truncated float128
+ native_encode_real(SCALAR_FLOAT_TYPE_MODE (float128_type_node), &value,
+ retval, 16, 0);
+ break;
+ default:
+ gcc_assert(false);
+ break;
+ }
+ data.initial = reinterpret_cast<char *>(retval);
+ }
+ else
+ {
+ uint32_t l_capacity;
+ uint32_t l_digits;
+ int32_t l_rdigits;
+ uint64_t l_attr;
+ // The following returned capacity is 1, 2, 4, 8, or 16, for the binary
+ // value.
+ FIXED_WIDE_INT(128)value = dirty_to_binary(input,
+ l_capacity,
+ l_digits,
+ l_rdigits,
+ l_attr);
+ data = wide_int_to_tree(intTI_type_node, value);
+ // turn that back into a FIXED_WIDE_INT with
+ // wi::tree_to_wide_ref iii = wi::to_wide( data.etc.value );
+
+#if FOR_JIM
+ {
+ // When you know data.etc.value was created with wide_int_to_tree.
+ enum tree_code code = TREE_CODE(TREE_TYPE(data.etc.value));
+ // code will be INTEGER_TYPE
+
+ wi::tree_to_wide_ref iii = wi::to_wide( data.etc.value );
+ char ach[60];
+ print_dec(iii, ach, SIGNED);
+ fprintf(stderr, "FOR_JIM: %s fixed_value: %s\n", get_tree_code_name(code), ach);
+ }
+#endif
+
+ if( data.capacity() == 0 )
+ {
+ // It falls to us to establish these parameters:
+ data.capacity( l_capacity );
+ data.digits = l_digits;
+ data.rdigits = l_rdigits;
+ attr |= l_attr;
+ data.initial = static_cast<char *>(xmalloc(data.capacity()));
+ gcc_assert(data.initial);
+ }
+ else if( !(attr & quoted_e) )
+ {
+ // quoted_e at this point means numeric edited, which gets the initial
+ // value verbatim.
+ if( l_attr & signable_e && !(attr & signable_e) && value < 0)
+ {
+ if( type != FldNumericEdited || (data.picture && data.picture[0] != '-')) {
+ error_msg(loc, "%qs has unsigned PICTURE but signed VALUE %qs",
+ this->name, data.original());
+ }
+ }
+
+ if( data.digits && value != 0)
+ {
+ // We were supplied with parameters. We now make sure they are
+ // consistent.
+ if( attr & scaled_e )
+ {
+ if( data.rdigits > 0 )
+ {
+ // This is like PIC PPP9999, with digits=4 and rdigits=3
+ if( l_digits != static_cast<uint32_t>(l_rdigits) )
+ {
+ error_msg(loc, "The magnitude is too large");
+ }
+ else
+ {
+ // This is like PIC PP999, with digits=3 and rdigits=2
+ if( data.digits + data.rdigits < l_digits )
+ {
+ error_msg(loc, "Too many significant digits");
+ }
+ else
+ {
+ // We know the abs(value) is less than 1, and we know that the
+ // fractional part fits into (data.digits + data.rdigits) to
+ // the right of the decimal point. We need to make sure that
+ // the top rdigits of value are zero.
+
+ FIXED_WIDE_INT(128)tester = value;
+ if( tester < 0 )
+ {
+ tester = - tester;
+ }
+ // The final value will have data.digits + l_rdigits decimal
+ // places. Let's scale rvalue to that range, taking into
+ // account that we already have l_rdigits of those places.
+ tester *=
+ get_power_of_ten(data.digits + data.rdigits - l_rdigits);
+
+ // In the case of PPP9999, tester needs to be between 1 and
+ // 9999. data.digits is 4, so....
+ if( tester >= get_power_of_ten(data.digits) )
+ {
+ error_msg(loc, "The fractional part is too large");
+ }
+ }
+ }
+ }
+ else
+ {
+ // This is like PIC 999PP, with digits=3 and rdigits=-2
+ if( data.digits-data.rdigits < l_digits )
+ {
+ error_msg(loc, "Too many leading digits");
+ }
+ // We need to make sure the bottom -rdigits places are zero:
+ FIXED_WIDE_INT(128)v = value;
+ for(int32_t i=0; i < -data.rdigits; i++)
+ {
+ if( v % 10 != 0)
+ {
+ error_msg(loc, "P-scaled digits are nonzero");
+ break;
+ }
+ v = v / 10;
+ }
+ }
+ }
+ else if( !(attr & quoted_e) )
+ {
+ if( data.rdigits == 0 && l_rdigits > 0)
+ {
+ // This is a condition that the parser finds before we can:
+ }
+ if( data.rdigits && data.rdigits < l_rdigits )
+ {
+ // This is a condition that the parser finds before we can:
+ }
+ if( l_digits - l_rdigits > data.digits - data.rdigits )
+ {
+ error_msg(loc, "VALUE has too many integer digits");
+ }
+ }
+ }
+ }
+
+ char *retval;
+ if( data.initial )
+ {
+ retval = const_cast<char *>(data.initial);
+ }
+ else
+ {
+ retval = static_cast<char *>(xmalloc(data.capacity()));
+ data.initial = retval;
+ }
+
+ switch(type)
+ {
+ case FldNumericBin5:
+ case FldLiteralN:
+ {
+ binary_initial(retval, this, value, l_rdigits);
+ break;
+ }
+ case FldNumericBinary:
+ {
+ binary_initial(retval, this, value, l_rdigits);
+ if( attr & big_endian_e )
+ {
+ // This is a big-endian value, so swap retval end-for-end:
+ size_t left = 0;
+ size_t right = data.capacity() - 1;
+ while(left < right)
+ {
+ std::swap(retval[left++], retval[right--]);
+ }
+ }
+ break;
+ }
+ case FldPacked:
+ {
+ char *pretval = retval;
+ char ach[128];
+
+ bool negative;
+ if( value < 0 )
+ {
+ negative = true;
+ value = -value;
+ }
+ else
+ {
+ negative = false;
+ }
+
+ // For COMP-6 (flagged by packed_no_sign_e), the number of required
+ // digits is twice the capacity.
+
+ // For COMP-3, the number of digits is 2*capacity minus 1, because the
+ // the final "digit" is a sign nybble.
+
+ size_t ndigits = (attr & packed_no_sign_e)
+ ? data.capacity() * 2
+ : data.capacity() * 2 - 1;
+
+ digits_from_int128(ach, this, ndigits, value, l_rdigits);
+
+ const char *digits = ach;
+ for(size_t i=0; i<ndigits; i++)
+ {
+ if( !(i & 0x01) )
+ {
+ *pretval = ((*digits++) & 0x0F)<<4;;
+ }
+ else
+ {
+ *pretval++ += (*digits++) & 0x0F;
+ }
+ }
+ if( !(attr & packed_no_sign_e) )
+ {
+ // This is COMP-3, so put in a sign nybble
+ if( attr & signable_e )
+ {
+ if( negative )
+ {
+ *pretval++ += 0x0D; // Means signable and negative
+ }
+ else
+ {
+ *pretval++ += 0x0C; // Means signable and non-negative
+ }
+ }
+ else
+ {
+ *pretval++ += 0x0F; // Means not signable
+ }
+ }
+ break;
+ }
+
+ case FldNumericDisplay:
+ {
+ // We are going to take the numerical value and convert it to the form
+ // specified by the attributes, digits, and rdigits.
+
+ char *pretval = retval;
+ char ach[128];
+
+ bool negative;
+ if( value < 0 )
+ {
+ negative = true;
+ value = - value;
+ }
+ else
+ {
+ negative = false;
+ }
+ digits_from_int128(ach, this, data.digits, value, l_rdigits);
+ const char *digits = ach;
+ if( (attr & signable_e)
+ && (attr & separate_e)
+ && (attr & leading_e ) )
+ {
+ // This zoned decimal value is signable, separate, and leading.
+ if( negative )
+ {
+ *pretval++ = ascii_minus;
+ }
+ else
+ {
+ *pretval++ = ascii_plus;
+ }
+ }
+ for(size_t i=0; i<data.digits; i++)
+ {
+ // Start by assuming it's an value that can't be signed
+ *pretval++ = ascii_0 + ((*digits++) & 0x0F);
+ }
+ if( (attr & signable_e)
+ && (attr & separate_e)
+ && !(attr & leading_e ) )
+ {
+ // The value is signable, separate, and trailing
+ if( negative )
+ {
+ *pretval++ = ascii_minus;
+ }
+ else
+ {
+ *pretval++ = ascii_plus;
+ }
+ }
+
+ // It's at this point we convert to the target encoding:
+ charmap_t *charmap = __gg__get_charmap(codeset.encoding);
+ size_t retval_length = pretval - retval;
+ if( retval_length != char_capacity() ) {
+ cbl_errx( "%s: %s %lu %s %lu",
+ name,
+ "retval_length",
+ (unsigned long)retval_length,
+ "!= char_capacity()",
+ (unsigned long)char_capacity());
+ }
+ gcc_assert(retval_length == char_capacity());
+ size_t nbytes;
+ const char *converted = __gg__iconverter(DEFAULT_SOURCE_ENCODING,
+ codeset.encoding,
+ retval,
+ retval_length,
+ &nbytes);
+ if( nbytes != data.capacity() ) {
+ cbl_errx( "%s: nbytes %lu %s %lu",
+ name,
+ (unsigned long)nbytes,
+ "!= data.capacity()",
+ (unsigned long)data.capacity());
+ }
+ gcc_assert(nbytes == data.capacity());
+ memcpy(retval, converted, data.capacity());
+ if( (attr & signable_e)
+ && !(attr & separate_e) )
+ {
+ // This value is signable, and not separate. So, the sign
+ // information goes into the first or last byte:
+ char *sign_location = attr & leading_e
+ ? retval
+ : retval + (data.digits-1) * charmap->stride() ;
+ cbl_char_t schar = charmap->set_digit_negative(*sign_location,
+ negative);
+ memcpy(sign_location, &schar, charmap->stride());
+ }
+ break;
+ }
+
+ case FldNumericEdited:
+ {
+ if( attr & quoted_e )
+ {
+ // What the programmer says the value is, the value stays, no
+ // matter how weird it might be.
+ }
+ else
+ {
+ // It's not a quoted string, so we use data.value:
+ bool negative;
+ if( value < 0 )
+ {
+ negative = true;
+ value = -value;
+ }
+ else
+ {
+ negative = false;
+ }
+
+ char ach[128];
+ memset(ach, 0, sizeof(ach));
+ memset(retval, 0, data.capacity());
+
+ if( (attr & blank_zero_e) && value == 0 )
+ {
+ memset( retval,
+ ascii_space,
+ data.capacity());
+ }
+ else
+ {
+ digits_from_int128(ach, this, char_capacity(), value, l_rdigits);
+
+ // __gg__string_to_numeric_edited operates in ASCII space:
+ __gg__string_to_numeric_edited( reinterpret_cast<char *>(retval),
+ ach,
+ data.rdigits,
+ negative,
+ data.picture);
+ // So now we convert it to the target encoding:
+ size_t nbytes;
+ const char *converted = __gg__iconverter(DEFAULT_SOURCE_ENCODING,
+ codeset.encoding,
+ retval,
+ char_capacity(),
+ &nbytes);
+ memcpy(retval, converted, nbytes);
+ }
+ }
+ break;
+ }
+
+ default:
+ cbl_errx( "%s:%d: type %s, who woulda thunk?",
+ __func__, __LINE__, cbl_field_type_str(type) );
+ gcc_assert(false);
+ break;
+ }
+ }
+ gcc_assert(data.etc_type != cbl_field_data_t::no_value_e);
+}
+
+size_t parse_error_inc();
+size_t parse_error_count();
+
+bool // true if error reported
cbl_field_t::report_invalid_initial_value(const YYLTYPE& loc) const {
- if( ! data.initial ) return;
+ if( ! data.original() ) return false;
- auto fig = cbl_figconst_of(data.initial);
+ const auto nerr = parse_error_count();
- // numeric initial value
+ auto orig = data.original();
+
+ auto fig = cbl_figconst_of(orig);
+
+ // numeric orig value
if( is_numeric(type) ) {
if( has_attr(quoted_e) ) {
error_msg(loc, "numeric type %s VALUE '%s' requires numeric VALUE",
- name, data.initial);
- return;
+ name, orig);
+ return true;
}
if( ! (fig == normal_value_e || fig == zero_value_e) ) {
error_msg(loc, "numeric type %s VALUE '%s' requires numeric VALUE",
name, cbl_figconst_str(fig));
- return;
+ return true;
}
switch( type ) {
// We are dealing with a pure binary type. If the capacity is
// 8 or more, we need do no further testing because we assume
// everything fits.
- if( data.capacity < 8 ) {
- const char *p = strchr(data.initial, symbol_decimal_point());
+ if( data.capacity() < 8 ) {
+ const char *p = strchr(orig, symbol_decimal_point());
if( p && atoll(p+1) != 0 ) {
error_msg(loc, "integer type %s VALUE '%s' "
"requires integer VALUE",
- name, data.initial);
+ name, orig);
} else {
// Calculate the maximum possible value that a binary with this
// many bytes can hold
size_t max_possible_value;
max_possible_value = 1;
- max_possible_value <<= data.capacity*8;
+ max_possible_value <<= data.capacity()*8;
max_possible_value -= 1;
if( attr & signable_e )
{
}
// Pick up the given VALUE
size_t candidate;
- if( *data.initial == '-' ) {
+ if( *orig == '-' ) {
// We care about the magnitude, not the sign
if( !(attr & signable_e) ){
error_msg(loc, "integer type %s VALUE '%s' "
"requires a non-negative integer",
- name, data.initial);
+ name, orig);
}
- candidate = atoll(data.initial+1);
+ candidate = atoll(orig+1);
}
else {
- candidate = (size_t)atoll(data.initial);
+ candidate = (size_t)atoll(orig);
}
if( candidate > max_possible_value ) {
error_msg(loc, "integer type %s VALUE '%s' "
"requires an integer of magnitude no greater than %zu",
- name, data.initial, max_possible_value);
+ name, orig, max_possible_value);
}
}
}
/*
* Check fraction for excess precision
*/
- const char *p = strchr(data.initial, symbol_decimal_point());
+ const char *p = strchr(orig, symbol_decimal_point());
if( p ) {
auto pend = std::find(p, p + strlen(p), 0x20);
int n = std::count_if( ++p, pend, isdigit );
if( data.precision() < n) {
if( 0 == data.rdigits ) {
error_msg(loc, "integer type %s VALUE '%s' requires integer VALUE",
- name, data.initial);
+ name, orig);
} else {
auto has_exponent = std::any_of( p, pend,
[]( char ch ) {
} );
if( !has_exponent && data.precision() < pend - p ) {
error_msg(loc, "%s cannot represent VALUE %qs exactly (max %c%ld)",
- name, data.initial, '.', (long)(pend - p));
+ name, orig, '.', (long)(pend - p));
}
}
}
} else {
- p = data.initial + strlen(data.initial);
+ p = orig + strlen(orig);
}
/*
* Check magnitude, whether or not there's a decimal point.
*/
// skip leading zeros
- auto first_digit = std::find_if( data.initial, p,
+ auto first_digit = std::find_if( orig, p,
[]( char ch ) {
return ch != '0'; } );
// count remaining digits, up to the decimal point
auto n = std::count_if( first_digit, p, isdigit );
if( data.ldigits() < n ) {
error_msg(loc, "numeric %s VALUE '%s' holds only %u digits",
- name, data.initial,
+ name, orig,
data.digits);
}
}
break;
- } // end type switch for normal string initial value
- return;
+ } // end type switch for normal string orig value
+ return nerr < parse_error_count();
} // end numeric
assert( ! is_numeric(type) );
// consider all-alphabetic
if( has_attr(all_alpha_e) ) {
- bool alpha_value = fig != zero_value_e;
+ bool is_alpha_only = fig != zero_value_e;
+
+ if( fig == normal_value_e && ! has_attr(hex_encoded_e)) {
+ // Test the input, not the converted initial value
+ is_alpha_only = std::none_of( orig, orig + strlen(orig),
+ []( char ch ) {
+ return
+ ISPUNCT(ch) ||
+ ISDIGIT(ch); } );
+ }
+ /*
+ * This is overspecific: It catches numeric literal VALUE for all_alpha_e\
+ * only.
+ * The general error is:
+ * - alphanumeric type
+ * - data.initial is all spaces (based on PICTURE)
+ * - data.original() is numeric or data.etc_type == value_e
+ * - quoted_e clear, of course
+ *
+ * This happens because VALUE was captured as a cce and stored in
+ * data.original for encode_numeric. But encode_numeric was never called
+ * because it's not a numeric field.
+ *
+ * It is also insufficient. It does not deal with VALUE LENGTH OF.
+ */
+ if( is_alpha_only ) {
+ charmap_t *charmap = __gg__get_charmap(codeset.encoding);
+ auto spc = charmap->mapped_character(ascii_space);
+ bool spacey = std::all_of( data.initial,
+ data.initial + char_capacity(),
+ [spc]( char ch ) { return static_cast<cbl_char_t>(ch) == spc; } );
+ if( spacey ) {
+ if( ISDIGIT(orig[0]) || orig[0] == '-' || orig[0] == '+' ) {
+ gcc_assert( ! has_attr(quoted_e) );
+ is_alpha_only = false; // alpha field supplied with VALUE numeric
+ }
+ }
+ }
- if( fig == normal_value_e ) {
- alpha_value = std::none_of( data.initial,
- data.initial +
- data.capacity,
- []( char ch ) {
- return
- ISPUNCT(ch) ||
- ISDIGIT(ch); } );
- }
- if( ! alpha_value ) {
+ if( ! is_alpha_only ) {
error_msg(loc, "alpha-only %s VALUE '%s' contains non-alphabetic data",
- name, fig == zero_value_e? cbl_figconst_str(fig) : data.initial);
+ name, fig == zero_value_e? cbl_figconst_str(fig) : orig);
+
+ auto pend = orig + strlen(orig);
+ auto p = std::find_if( orig, pend,
+ []( char ch ) { return ! ISALPHA(ch); } );
+ dbgmsg("%zu nonalpha '%.*s'", pend - p, int(pend - p), p);
}
}
- return;
+ return nerr < parse_error_count();
}
// Return the field representing the subscript whose literal value
case 0:
if( src->type == FldLiteralA && is_numericish(tgt) && !is_literal(tgt) ) {
// Allow if input string is an integer.
- size_t outcount;
- char *in_ascii = static_cast<char *>(xmalloc(4 * src->data.capacity));
- const char *in_asciip = __gg__iconverter( src->codeset.encoding,
- DEFAULT_SOURCE_ENCODING,
- src->data.initial,
- src->data.capacity,
- &outcount );
- memcpy(in_ascii, in_asciip, outcount);
- const char *p = in_ascii, *pend = p + src->data.capacity;
+ const char *p = src->data.original(), *pend = p + strlen(src->data.original());
if( (p[0] == ascii_plus) || (p[0] == ascii_minus) ) p++;
retval = std::all_of( p, pend, isdigit );
if( yydebug && ! retval ) {
HOST_SIZE_T_PRINT_UNSIGNED,
__func__, __LINE__, *bad, (fmt_size_t)(bad - p));
}
- free(in_ascii);
}
break;
case 1:
}
if( retval && src->has_attr(embiggened_e) ) {
- if( is_numeric(tgt) && tgt->data.capacity < src->data.capacity ) {
+ if( is_numeric(tgt) && tgt->data.capacity() < src->data.capacity() ) {
dbgmsg("error: source no longer fits in target");
return false;
}
auto psize = std::find_if( sizes, esizes,
[digits]( sizes_t sizes ) {
- return sizes.bounds.first <= digits && digits <= sizes.bounds.second;
+ return sizes.bounds.first <= digits
+ && digits <= sizes.bounds.second;
} );
if( psize != esizes ) return psize->size;
linemap_add(line_table, LC_LEAVE, sysp, NULL, 0);
}
-size_t
+uint64_t
symbol_unique_index( const struct symbol_elem_t *e ) {
assert(e);
- size_t usym = symbol_index(e);
-#if READY_FOR_INODE
+ uint64_t usym = symbol_index(e);
if( ! input_filenames.empty() ) {
- size_t inode = input_filenames.top().inode;
- usym = usym ^ inode;
+ uint64_t inode = input_filenames.top().inode;
+ static const int half_bits = sizeof(uint64_t)*4;
+ usym ^= inode>>half_bits;
+ usym ^= inode<<half_bits;
}
-#endif
return usym;
}
#endif
static const diagnostics::option_id option_zero;
-size_t parse_error_inc();
void gcc_location_dump() {
linemap_dump_location( line_table, token_location, stderr );
msg = xasprintf("%s [%s]", gmsgid, option);
gmsgid = msg;
}
-
+
va_list ap;
va_start(ap, gmsgid);
*> { dg-do run }
- *> { dg-options "-finternal-ebcdic" }
+ *> { dg-options "-fexec-charset=cp1140" }
*> { dg-output-file "group2/CHAR_and_ORD_with_COLLATING_sequence_-_EBCDIC.out" }
IDENTIFICATION DIVISION.
PROGRAM-ID. prog.
DATA DIVISION.
WORKING-STORAGE SECTION.
01 FILLER.
- 02 ADATA VALUE "654321".
+ 02 ADATA PIC X(6) VALUE "654321".
02 A REDEFINES ADATA PIC 9 OCCURS 6 TIMES.
02 B PIC 9.
- 02 CDATA VALUE "999999".
+ 02 CDATA PIC X(6) VALUE "999999".
02 C REDEFINES CDATA PIC 9 OCCURS 6 TIMES.
01 TEMP PIC 9.
PROCEDURE DIVISION.
-555.10 555.10 555.10 555.10 555.0999756 555.100000000000023 555.1000000000000000000000000000000197
-555.10 555.10 555.10 555.10 555.0999756 555.100000000000023 555.1000000000000000000000000000000197
-555.10 555.10 555.10 555.10 555.0999756 555.100000000000023 555.1000000000000000000000000000000197
-555.10 555.10 555.10 555.10 555.0999756 555.100000000000023 555.1000000000000000000000000000000197
-555.10 555.10 555.10 555.10 555.0999756 555.100000000000023 555.1000000000000000000000000000000197
-555.10 555.10 555.10 555.10 555.0999756 555.100000000000023 555.1000000000000000000000000000000197
-555.10 555.10 555.10 555.10 555.0999756 555.100000000000023 555.1000000000000000000000000000000197
-
+555.10 555.10 555.10 555.10 555.0999756 555.099999999999909 555.0999999999999999999999999999999211
+555.10 555.10 555.10 555.10 555.0999756 555.099999999999909 555.0999999999999999999999999999999211
+555.10 555.10 555.10 555.09 555.0999756 555.099999999999909 555.0999999999999999999999999999999211
+555.10 555.10 555.09 555.09 555.0999756 555.099999999999909 555.0999999999999999999999999999999211
+555.10 555.09 555.09 555.10 555.0999756 555.099999999999909 555.0999999999999999999999999999999211
+555.09 555.09 555.10 555.10 555.0999756 555.099999999999909 555.1000030517578124999999999999999606
+555.09 555.10 555.10 555.10 555.0999756 555.100003051757767 555.0999999999999971578290569595992171
\ No newline at end of file
-555
-555.55
--5.5555E+208
+-555.55e206
555
555.55
-5.5555E+208
+555.55e206
333.3300 333.3300 333.3300 333.3300 3.333300083E+22 3.33329999999999994E+102 3.333300000000000000000000000000000168E+202
-555.5500 555.5500 555.5500 555.5500 5.555499988E+22 5.55549999999999973E+102 5.555500000000000000000000000000000029E+202
-
+555.5500 555.5500 555.5500 555.5500 5.555499988E+22 5.55549999999999973E+102 5.555500000000000000000000000000000029E+202
\ No newline at end of file
#include <algorithm>
#include <unordered_map>
#include <vector>
+#include <langinfo.h>
#include "ec.h"
#include "common-defs.h"
int __gg__quote_character = '"' ;
int __gg__low_value_character = 0x00 ;
int __gg__high_value_character = 0xFF ;
+cbl_char_t __gg__working_init = NOT_A_CHARACTER;
+cbl_char_t __gg__local_init = NOT_A_CHARACTER;
+uint32_t __gg__wsclear = NOT_A_CHARACTER;
std::vector<std::string> __gg__currency_signs(256) ;
int __gg__default_currency_sign;
char *__gg__ct_currency_signs[256]; // Compile-time currency signs
+cbl_encoding_t __gg__console_encoding = no_encoding_e ;
cbl_encoding_t __gg__display_encoding = no_encoding_e;
cbl_encoding_t __gg__national_encoding = no_encoding_e;
{ false, iconv_YU_e, "YU" },
};
+/*
+ * Because this variable is static, the contructor runs before main and is
+ * guaranted to run.
+ */
+static class rt_encoding_t
+ {
+ const char *ctype, *lc_ctype;
+ public:
+ rt_encoding_t() : ctype( setlocale(LC_CTYPE, "") )
+ {
+ lc_ctype = nl_langinfo(CODESET);
+ // Let's learn what the computer is using for the console:
+ // We need to establish the codeset used by the system console:
+ __gg__console_encoding = use_locale();
+ }
+ cbl_encoding_t use_locale() const
+ {
+ auto encoding = strstr(ctype, "UTF-8") ?
+ iconv_UTF_8_e : __gg__encoding_iconv_type(lc_ctype);
+ return encoding;
+ }
+ } rt_encoding;
+
static const encodings_t *
encoding_descr( cbl_encoding_t encoding ) {
static encodings_t *eoencodings = encodings + COUNT_OF(encodings);
return p < eoencodings? p : nullptr;
}
-const char *
-__gg__encoding_iconv_name( cbl_encoding_t encoding ) {
- auto p = encoding_descr(encoding);
- return p? p->name : nullptr;
-}
-
-bool
-__gg__encoding_iconv_valid( cbl_encoding_t encoding ) {
- auto p = encoding_descr(encoding);
- return p? p->supported : false;
-}
-
-cbl_encoding_t
-__gg__encoding_iconv_type( const char *name ) {
+static const encodings_t *
+encoding_descr( const char name[] ) {
static encodings_t *eoencodings = encodings + COUNT_OF(encodings);
char *slashless = strdup(name);
} );
free(slashless);
- return p < eoencodings? p->type : no_encoding_e;
+ return p < eoencodings? p : nullptr;
+}
+
+const encodings_t *
+__gg__encoding_iconv_descr( const char name[] ) {
+ return encoding_descr(name);
+}
+
+const encodings_t *
+__gg__encoding_iconv_descr( cbl_encoding_t encoding ) {
+ return encoding_descr(encoding);
+}
+
+const char *
+__gg__encoding_iconv_name( cbl_encoding_t encoding ) {
+ auto p = encoding_descr(encoding);
+ return p? p->name : nullptr;
+}
+
+bool
+__gg__encoding_iconv_valid( cbl_encoding_t encoding ) {
+ auto p = encoding_descr(encoding);
+ return p? p->supported : false;
+}
+
+cbl_encoding_t
+__gg__encoding_iconv_type( const char *name ) {
+ auto p = encoding_descr(name);
+ return p? p->type : no_encoding_e;
}
char *
__gg__iconverter( cbl_encoding_t from,
cbl_encoding_t to,
- const char *str,
+ const void *str_,
size_t length,
- size_t *outlength)
+ size_t *outlength_p,
+ size_t *iconv_retval_p )
{
+ const char *str = static_cast<const char *>(str_);
+
+ // Attempts to convert 'length' bytes 'str' in 'from' encoding to
+ // the 'to' encoding.
+
+ // The return value points to a static memory area in this function, the
+ // caller has to respect that and make copies before doing something that
+ // will call this routine again. Note that __gg__get_charmap, and
+ // charmap_t::mapped_character can call this routine.
+
+ // The routine optionally returns the number of bytes generated, the number
+ // of bytes eaten by iconv, and the actual return value from the iconv call.
+
+ // Let's consider the possibility of each input character needing four output
+ // characters. We increase it by one to leave room for the terminating NUL,
+ // which itself might be four bytes of 0x00. The static area keeps growing
+ // as necessary.
+
+ // Get charmap first, because we might need it in the event of a conversion
+ // error, and we have to avoid problems with recursion clobbering the return
+ // buffer, because __gg__get_charmap can call us:
+ charmap_t *charmap_to = __gg__get_charmap(to);
+
static size_t retsize = 1;
static char *retval = static_cast<char *>(malloc(retsize));
- // Let's consider the possibility of each input character needed four output
- // characters:
- size_t needed = 4*length;
+ size_t needed = 4*(length+1);
if( retsize < needed )
{
retsize = needed;
retval = static_cast<char *>(realloc(retval, retsize));
}
+ size_t outlength;
+ size_t iconv_retval;
+
if( from == to )
{
+ // There is no need to actually convert. Simulate a successful iconv()
+ // call:
+
memcpy(retval, str, length);
- *outlength = length;
+ outlength = length;
+ iconv_retval = 0;
}
else
{
- // Converts the given string from from to to using iconv.
-
- // The return value points to a static memory area in this function, the
- // caller has to respect that.
-
// We attempt to minimize overhead by using a map to call
// iconv_open but once for each from/to pairing.
char *inbuf = const_cast<char *>(str);
char *outbuf = retval;
- size_t incount = length;
- size_t outcount = retsize;
- *outlength = iconv( cd,
- &inbuf, &incount,
- &outbuf, &outcount);
- *outlength = retsize - outcount;
-
- if( *outlength == length )
+ size_t inbytesleft = length;
+ size_t outbytesleft = retsize;
+
+ /* It's time for some COBOL magic. The default HIGH-VALUE in COBOL is
+ 0xFF. CP1252, UTF-16, and UTF32 all happily interconvert 0xFF, 0x00FF,
+ and 0x000000FF. But CP1140 is a pain.
+
+ A CP1252 0xFF becomes a CP1140 DF, which converts back to 0xFF
+ CP1140 DF becomes FF, 00ff and 000000FF.
+
+ So, we need to intervene when the source, or dest, is ebcdic. */
+
+ char *inbuf_cpy = nullptr;
+ if( __gg__high_value_character == DEGENERATE_HIGH_VALUE )
+ {
+ const charmap_t *map_from = __gg__get_charmap(from);
+ if( map_from->is_like_ebcdic() )
+ {
+ inbuf_cpy = static_cast<char *>(malloc(length));
+ assert(inbuf_cpy);
+ memcpy(inbuf_cpy, inbuf, length);
+ inbuf = inbuf_cpy;
+ for(size_t i=0; i<length; i++)
+ {
+ if( (unsigned char)inbuf[i] == (unsigned char)0xFF )
+ {
+ inbuf[i] = (char)0xDF;
+ }
+ }
+ }
+ }
+
+ // When the caller supplies iconv_retval_p, we only try to convert once,
+ // because they are telling us they will handle errors.
+
+ // Otherwise, we just keep trying to convert, replacing unconvertable
+ // characters with a replacement.
+
+ iconv_retval = 1; // This primes the pump:
+ for(;;)
{
- /* In a kind of shortsighted way, we are going to assume
- single-byte-coding, and we are going to cope here with the
- COBOL-world reality of HIGH-VALUE being, by default, the value 0xFF.
- This is required by IBM in the EBCDIC and ASCII worlds. The
- implications for other locales are being left for another time.
-
- So, for now, we are regarding 0xFF as invariant. Thus, at this
- point, we have to scan the input and make sure the output has 0xFF
- where the input does. */
- for(size_t i=0; i<length; i++)
+ iconv_retval = iconv( cd,
+ &inbuf, &inbytesleft,
+ &outbuf, &outbytesleft);
+ if( iconv_retval_p || iconv_retval == 0 )
{
- if( static_cast<unsigned char>(str[i]) == 0xFF )
+ // Either there was no conversion error, or else our caller wants
+ // to know about the error
+ break;
+ }
+ // Arriving here means that there has been a conversion error.
+ if( charmap_to->stride() >= 2 )
+ {
+ // Put in the value for the U+FFFD Replacement Character
+ charmap_to->putch(REPLACEMENT_CHARACTER, outbuf, size_t(0));
+ outbuf += charmap_to->stride();
+ outbytesleft -= charmap_to->stride();
+ }
+ else if( charmap_to->is_like_utf8() )
+ {
+ // Put in the UTF-8 bytes for the U+FFFD Replacement Character
+ *outbuf++ = static_cast<char>(0xEF);
+ *outbuf++ = static_cast<char>(0xBF);
+ *outbuf++ = static_cast<char>(0xBD);
+ outbytesleft -= 3;
+ }
+ else
+ {
+ // This is some kind of single-byte-coded character set. We just use
+ // a question mark as the replacement character.
+ *outbuf++ = charmap_to->mapped_character(ascii_query);
+ outbytesleft -= 1;
+ }
+ // skip past the byte that caused the conversion error:
+ inbuf += 1;
+ inbytesleft -= 1;
+ // Raise the run-time error:
+#ifdef IN_TARGET_LIBS
+ exception_raise(ec_data_conversion_e);
+ // And then loop around and try it again.
+#endif
+ }
+
+ free(inbuf_cpy);
+ // Calculate the number of bytes generated:
+ outlength = retsize - outbytesleft;
+
+ if( __gg__high_value_character == DEGENERATE_HIGH_VALUE )
+ {
+ const charmap_t *map_to = __gg__get_charmap(to);
+ if( map_to->is_like_ebcdic() )
+ {
+ for(size_t i=0; i<length; i++)
{
- retval[i] = static_cast<char>(0xFF);
+ if( (unsigned char)retval[i] == (unsigned char)0xDF )
+ {
+ retval[i] = (char)0xFF;
+ }
}
}
}
}
// For the convenience of those who call this routine, we are sticking a
- // terminating NUL on the end of the generated string
- retval[*outlength] = '\0';
+ // terminating NUL on the end of the generated string. Keeping in mind that
+ // a NUL isn't always a single byte, we are going to lay down four of them.
+ retval[outlength+0] = '\0';
+ retval[outlength+1] = '\0';
+ retval[outlength+2] = '\0';
+ retval[outlength+3] = '\0';
+
+ if( outlength_p )
+ {
+ *outlength_p = outlength;
+ }
+ if( iconv_retval_p )
+ {
+ *iconv_retval_p = iconv_retval;
+ }
return retval;
}
+char *
+__gg__miconverter( cbl_encoding_t from,
+ cbl_encoding_t to,
+ const void *str_,
+ size_t length,
+ size_t *outlength_p,
+ size_t *iconv_retval_p )
+ {
+ const char *converted = __gg__iconverter(from,
+ to,
+ str_,
+ length,
+ outlength_p,
+ iconv_retval_p);
+ char *retval = static_cast<char *>(malloc(*outlength_p + 4));
+ assert(retval);
+ memcpy(retval, converted, *outlength_p);
+ // Tack on four zeros to be a NUL in any encoding.
+ memset(retval + *outlength_p, 0, 4);
+ return retval;
+ }
+
static
std::unordered_map<cbl_encoding_t, charmap_t *>map_of_encodings;
#include <vector>
#include <unistd.h>
+#include <limits.h>
+#include <iconv.h>
/* There are four distinct codeset domains in the COBOL compiler.
*
Stay alert! */
+typedef uint32_t cbl_char_t;
+#define NOT_A_CHARACTER (0xbadbeef)
+
extern int __gg__decimal_point ;
extern int __gg__decimal_separator ;
extern int __gg__quote_character ;
extern int __gg__default_currency_sign;
extern cbl_encoding_t __gg__display_encoding ;
extern cbl_encoding_t __gg__national_encoding ;
+extern cbl_char_t __gg__working_init;
+extern cbl_char_t __gg__local_init;
+extern uint32_t __gg__wsclear;
+
+enum
+ {
+ /* HIGH-VALUE is an endless source of irritation.
+
+ 0xFF is the default value for COBOL since time immemorial. Its use that
+ way long predates the existence of code pages. 0xFF is a valid character
+ in many code pages, which make a muddle of the original intent of a
+ default value of 0xFF for high-value.
+
+ We want older programs to continue to work. And we want to use 0xFF for
+ ascii and ebcdic, and it turns out that 0xFFFF works for UTF-16; it is
+ specifically designed in UNICODE as a well-formed non-character.
+
+ 0xFFFFFFFF, however, is not readily usable in UTF-32. It is not well-
+ formed, and it is not a character. Technically, the largest value in
+ UTF-32 is the largest UNICODE code point, which is 0x10FFFF. It's
+ tempting to use that value as the UTF32 HIGH-VALUE, except that it doesn't
+ map into a single 16-bit value in UTF-16 (it takes a pair of 16-bit
+ values), and it doesn't map into anything sensible in ASCII or EBCDIC, and
+ it takes multiple bytes in UTF-8.
+
+ So, we are going to work with the following observations:
+
+ 0xFF in CP1252 <==> 0x000000FF in UTF32
+ 0xFF in CP1140 <==> 0x0000009F in UTF32
+ 0xFFFF in UTF-16 <==> 0x0000FFFF in UTF32
+
+ Be it hereby acknowledged that not all possibilities for encoding inter-
+ conversion have been explored, and we anticipate finding and eliminating
+ HIGH-VALUE problems will be Whac-A-Mole territory for some time to come.
+
+ Please use these constants for that kind of work, because otherwise
+ finding anomalies will be even more frustrating than I currently
+ anticipate. Dubner, 2025-11-24 */
+ DEFAULT_HIGH_VALUE_8 = 0xFF,
+ DEFAULT_HIGH_VALUE_16 = 0x00FF,
+ DEFAULT_HIGH_VALUE_32 = 0x000000FF,
+
+ /* These values are used as figurative constants when interconverting from
+ and encoding to UTF32. Examine, for example, the implementation for
+ the INSPECT statement: */
+ ASCII_HIGH_VALUE_32 = 0x000000FF,
+ EBCDIC_HIGH_VALUE_32 = 0x000000FF,
+ UTF16_HIGH_VALUE_32 = 0x000000FF,
+ UTF32_HIGH_VALUE_32 = 0x000000FF,
+
+ REPLACEMENT_CHARACTER = 0xFFFD,
+ };
#define NULLCH ('\0')
#define DEGENERATE_HIGH_VALUE 0xFF
#define DEGENERATE_LOW_VALUE 0x00
+#define ascii_nul ((uint8_t)('\0'))
#define ascii_A ((uint8_t)('A'))
#define ascii_B ((uint8_t)('B'))
#define ascii_C ((uint8_t)('C'))
#define ascii_colon ((uint8_t)(':'))
#define ascii_comma ((uint8_t)(','))
#define ascii_dollar_sign ((uint8_t)('$'))
+#define ascii_bang ((uint8_t)('!'))
#define ascii_dquote ((uint8_t)('"'))
#define ascii_oparen ((uint8_t)('('))
#define ascii_caret ((uint8_t)('^'))
const char * __gg__encoding_iconv_name( cbl_encoding_t encoding );
cbl_encoding_t __gg__encoding_iconv_type( const char *name );
+extern cbl_encoding_t __gg__console_encoding;
+// returns a pointer to a static buffer. Beware!
char * __gg__iconverter(cbl_encoding_t from,
cbl_encoding_t to,
- const char *str,
+ const void *str,
size_t length,
- size_t *outlength);
+ size_t *outlength = nullptr, // Bytes produced
+ size_t *iconv_retval = nullptr);
+
+// returns a malloced buffer. Remember to free it.
+char * __gg__miconverter(cbl_encoding_t from,
+ cbl_encoding_t to,
+ const void *str,
+ size_t length,
+ size_t *outlength = nullptr, // Bytes produced
+ size_t *iconv_retval = nullptr);
+
#define DEFAULT_SOURCE_ENCODING (iconv_CP1252_e)
+#define DEFAULT_32_ENCODING (iconv_UTF32LE_e)
+
+class charmap_t;
+
+charmap_t *__gg__get_charmap(cbl_encoding_t encoding);
class charmap_t
{
bool m_is_valid;
bool m_is_big_endian;
bool m_has_bom = false;
- int m_stride; // Number of bytes between one character and the next
+ bool m_is_like_utf8;
+ uint8_t m_stride; // Number of bytes between one character and the next
enum
{
// This map retains the ASCII-to-encoded value in m_encoding, so that iconv
// need be called but once for each ASCII value.
- std::unordered_map<int, int>m_map_of_encodings;
+ std::unordered_map<cbl_char_t, cbl_char_t>m_map_of_encodings;
public:
explicit charmap_t(cbl_encoding_t e) : m_encoding(e)
// what we get back.
size_t outlength = 0;
- const char challenge[] = "0";
- const unsigned char *response = PTRCAST(unsigned char,
- __gg__iconverter(DEFAULT_SOURCE_ENCODING,
- m_encoding,
- challenge,
- 1,
- &outlength));
+ char challenge[] = "0";
+ char response_[8];
+
+ iconv_t cd = iconv_open(
+ __gg__encoding_iconv_name(m_encoding),
+ __gg__encoding_iconv_name(DEFAULT_SOURCE_ENCODING));
+ char *inbuf = challenge;
+ char *outbuf = response_;
+ size_t inbytesleft = 1;
+ size_t outbytesleft = sizeof(response_);
+ /*size_t nret = */ iconv( cd,
+ &inbuf, &inbytesleft,
+ &outbuf, &outbytesleft);
+ outlength = sizeof(response_) - outbytesleft;
+ iconv_close(cd);
+
+ const unsigned char *response =
+ reinterpret_cast<unsigned char *>(response_);
+
unsigned char char_0 = 0x00;
m_is_valid = false;
m_has_bom = false;
m_is_big_endian = false;
+ m_is_like_utf8 = false;
if( outlength == 1 )
{
m_is_valid = true;
m_numeric_sign_type = sign_type_ebcdic;
}
+
+ // Let's see if this encoding is UTF-8. We will do that by converting
+ // the single-byte CP1252 code for the Euro symbol to our encoding.
+ cd = iconv_open(
+ __gg__encoding_iconv_name(iconv_CP1252_e),
+ __gg__encoding_iconv_name(m_encoding));
+ challenge[0] = static_cast<char>(0x80);// This is the CP1252 Euro symbol.
+ inbuf = challenge;
+ outbuf = response_;
+ inbytesleft = 1;
+ outbytesleft = sizeof(response_);
+ iconv(cd,
+ &inbuf, &inbytesleft,
+ &outbuf, &outbytesleft);
+ outlength = sizeof(response_) - outbytesleft;
+ iconv_close(cd);
+ m_is_like_utf8 = (outlength == 3);
}
- bool is_valid() const{return m_is_valid ;}
- bool is_big_endian() const{return m_is_big_endian;}
- bool has_bom() const{return m_has_bom ;}
- int stride() const{return m_stride ;}
+ bool is_valid() const { return m_is_valid ; }
+ bool is_big_endian() const { return m_is_big_endian; }
+ bool has_bom() const { return m_has_bom ; }
+ uint8_t stride() const { return m_stride ; }
- int mapped_character(int ch)
+ cbl_char_t mapped_character(cbl_char_t ch)
{
// The assumption is that anybody calling this routine is providing
// a single-byte character in the DEFAULT_SOURCE_ENCODING encoding. We
// return the equivalent character in the m_encoding
- int retval;
- std::unordered_map<int, int>::const_iterator it =
+ cbl_char_t retval;
+ std::unordered_map<cbl_char_t, cbl_char_t>::const_iterator it =
m_map_of_encodings.find(ch);
if( it != m_map_of_encodings.end() )
{
{
return mapped_character(__gg__low_value_character);
}
- int high_value_character()
+ cbl_char_t high_value_character()
{
- return mapped_character(__gg__high_value_character);
+ cbl_char_t retval = 0;
+ if( __gg__high_value_character == DEFAULT_HIGH_VALUE_8 )
+ {
+ switch(m_stride)
+ {
+ case 1:
+ retval = DEFAULT_HIGH_VALUE_8;
+ break;
+ case 2:
+ retval = DEFAULT_HIGH_VALUE_16;
+ break;
+ case 4:
+ retval = DEFAULT_HIGH_VALUE_32 ;
+ break;
+ }
+ }
+ else
+ {
+ retval = mapped_character(__gg__high_value_character);
+ }
+ return retval;
}
- int figconst_character(cbl_figconst_t figconst)
+ cbl_char_t figconst_character(cbl_figconst_t figconst)
{
- int const_char = 0; // Head off a compiler warning
+ cbl_char_t const_char = 0; // Head off a compiler warning
switch(figconst)
{
case normal_value_e :
- const_char = -1;
+ abort();
break;
case low_value_e :
const_char = low_value_character();
return retval;
}
- int
- set_digit_negative(int digit, bool is_negative)
+ cbl_char_t
+ set_digit_negative(cbl_char_t digit, bool is_negative)
{
+ // Returns a 0-9 digit with the internal sign bit altered for ascii or
+ // ebcdic.
switch(m_numeric_sign_type)
{
case sign_type_ascii:
return m_numeric_sign_type == sign_type_ebcdic;
}
- };
+ bool
+ is_like_utf8() const
+ {
+ return m_is_like_utf8;
+ }
-charmap_t *__gg__get_charmap(cbl_encoding_t encoding);
+ void
+ memset(void *dest_, cbl_char_t ch, size_t bytelength)
+ {
+ uint8_t *dest = static_cast<uint8_t *>(dest_);
+ switch(m_stride)
+ {
+ case 1:
+ {
+ if( (ch & 0xFFFFFF00) == 0x00000000 )
+ {
+ // This is the normal case of filling a buffer with a single byte
+ ::memset(dest, ch & 0xff, bytelength);
+ }
+ else
+ {
+ // We are being asked to fill a byte-wide buffer with a multi-byte
+ // character.
+ uint8_t byte3 = ch >> 24;
+ uint8_t byte2 = ch >> 16;
+ uint8_t byte1 = ch >> 8;
+ uint8_t byte0 = ch;
+ size_t fill;
+ size_t i=0;
+ if( byte3 )
+ {
+ fill = bytelength / 4;
+ while( i<fill )
+ {
+ dest[i++] = byte0;
+ dest[i++] = byte1;
+ dest[i++] = byte2;
+ dest[i++] = byte3;
+ }
+ }
+ else if( byte2 )
+ {
+ fill = bytelength / 3;
+ while( i<fill )
+ {
+ dest[i++] = byte0;
+ dest[i++] = byte1;
+ dest[i++] = byte2;
+ }
+ }
+ else
+ {
+ fill = bytelength / 2;
+ while( i<fill )
+ {
+ dest[i++] = byte0;
+ dest[i++] = byte1;
+ }
+ }
+ while( i < bytelength )
+ {
+ dest[i++] = mapped_character(ascii_space);
+ }
+ }
+ break;
+ }
+
+ case 2:
+ {
+ assert( !(bytelength&1) );
+ // We know the target has an even number of bytes available. We also
+ // know that each codepoint is usually one, but sometimes two, pairs
+ // of bytes
+ uint16_t top_half = ch>>16;
+ uint16_t bottom_half = ch;
+ size_t fill = bytelength;
+ size_t i = 0;
+ uint16_t *p = PTRCAST(uint16_t, dest);
+ while( i<fill )
+ {
+ p[i/2] = bottom_half;
+ i += 2;
+ if( i>= fill )
+ {
+ break;
+ }
+ if( top_half )
+ {
+ p[i/2] = bottom_half;
+ i += 2;
+ }
+ }
+ if( i < bytelength )
+ {
+ // We were trying to put two-pair values into the destination, but
+ // there were an odd number of pairs available.
+ p[i] = mapped_character(ascii_space);
+ i += 2; // cppcheck-suppress unreadVariable
+ }
+ break;
+ }
+
+ case 4:
+ {
+ assert( !(bytelength&3) );
+ // We know the target has multiple of four bytes available.
+ uint32_t *p = PTRCAST(uint32_t, dest);
+ size_t i = 0;
+ while( i<bytelength )
+ {
+ p[i/4] = ch;
+ i += 4;
+ }
+ break;
+ }
+ }
+ }
+
+ void putch(cbl_char_t ch, void *base_, size_t location)
+ {
+ // This routine puts a character at a byte location. It's up to the
+ // user to provide the correct byte location, and update it by the stride
+ // when necessary.
+ uint8_t *base = static_cast<uint8_t *>(base_);
+ memcpy(base+location, &ch, m_stride);
+ if( m_stride < 4 )
+ {
+ location += m_stride;
+ ch >>= (8 * m_stride);
+ while(ch)
+ {
+ memcpy(base+location, &ch, m_stride);
+ location += m_stride;
+ ch >>= (8 * m_stride);
+ }
+ }
+ }
+
+ void putch(cbl_char_t ch, void *base_, size_t *location)
+ {
+ // This routine puts a character at a location, and updates the location
+ uint8_t *base = static_cast<uint8_t *>(base_);
+ memcpy(base+*location, &ch, m_stride);
+ *location += m_stride;
+ if( m_stride < 4 )
+ {
+ ch >>= 8 * m_stride;
+ while(ch)
+ {
+ memcpy(base+*location, &ch, m_stride);
+ *location += m_stride;
+ ch >>= 8 * m_stride;
+ }
+ }
+ }
+
+ cbl_char_t getch(const void *base_, size_t location) const
+ {
+ // This routine gets a character at a location, and updates the location
+ cbl_char_t retval = 0;
+ const uint8_t *base = static_cast<const uint8_t *>(base_);
+
+ memcpy(&retval, base+location, m_stride);
+//// location += m_stride;
+//// We need to do something about UTF-8 snd UTF-16
+//// while(ch)
+//// {
+//// memcpy(base+*location, &ch, m_stride);
+//// *location += m_stride;
+//// ch >>= 8 * m_stride;
+//// }
+ return retval;
+ }
+
+ cbl_char_t getch(const void *base_, size_t *location) const
+ {
+ // This routine gets a character at a location, and updates the location
+ cbl_char_t retval = 0;
+ const uint8_t *base = static_cast<const uint8_t *>(base_);
+
+ memcpy(&retval, base+*location, m_stride);
+ *location += m_stride;
+//// We need to do something about UTF-8 snd UTF-16
+//// while(ch)
+//// {
+//// memcpy(base+*location, &ch, m_stride);
+//// *location += m_stride;
+//// ch >>= 8 * m_stride;
+//// }
+ return retval;
+ }
+
+ unsigned long long strtoull(char *in, char **end, int /*base*/)
+ {
+ // This is like strtoull(3), but the base is restricted to 10.
+ size_t index = 0;
+ unsigned long long retval = 0;
+ cbl_char_t mapped_0 = mapped_character(ascii_0);
+ cbl_char_t mapped_9 = mapped_character(ascii_9);
+ for(;;)
+ {
+ cbl_char_t ch = getch(in, &index);
+ if( ch < mapped_0 || ch > mapped_9 )
+ {
+ break;
+ }
+ retval *= 10;
+ retval += ch & 0x0F;
+ }
+ *end = in + index-m_stride ;
+ return retval;
+ }
+
+ template <typename T>
+ size_t
+ Strlen( T *input, ssize_t limit = SSIZE_MAX ) {
+ size_t i;
+ for( i = 0; i < (limit / sizeof(T)) && input[i] != 0; i++ )
+ ;
+ return i;
+ }
+ size_t strlen2( const void *converted, ssize_t limit = SSIZE_MAX ) {
+ switch(m_stride) {
+ case 1:
+ return Strlen( reinterpret_cast<const char*>(converted), limit );
+ case 2:
+ return Strlen( reinterpret_cast<const uint16_t*>(converted), limit );
+ case 4:
+ return Strlen( reinterpret_cast<const uint16_t*>(converted), limit );
+ }
+ //// gcc_unreachable();
+ return -1; // Mollify cppcheck.
+ }
+
+ size_t
+ strlen( const void *converted,
+ ssize_t limit = SSIZE_MAX)
+ {
+ size_t retval;
+
+ union
+ {
+ const uint8_t *p8 ;
+ const uint16_t *p16;
+ const uint32_t *p32;
+ } ;
+ const uint8_t *p_start = reinterpret_cast<const uint8_t *>(converted);
+ p8 = p_start;
+ switch(m_stride)
+ {
+ case 1:
+ {
+ // Loop until the pointer is past the limit, or until we hit
+ // a character that is all zeroes
+ while(*p8)
+ {
+ if( p8 - p_start > limit )
+ {
+ break;
+ }
+ p8 += 1;
+ }
+ break;
+ }
+ case 2:
+ {
+ // Loop until the pointer is past the limit, or until we hit
+ // a character that is all zeroes
+ while(*p16)
+ {
+ if( p8 - p_start > limit )
+ {
+ break;
+ }
+ p8 += 2;
+ }
+ break;
+ }
+ case 4:
+ {
+ // Loop until the pointer is past the limit, or until we hit
+ // a character that is all zeroes
+ while(*p32)
+ {
+ if( p8 - p_start > limit )
+ {
+ break;
+ }
+ p8 += 4;
+ }
+ break;
+ }
+ }
+ retval = p8 - p_start;
+ return retval;
+ }
+ };
#endif
/* COBOL has the concept of Numeric Display values, which use an entire byte
per digit. IBM also calls this "Zoned Decimal".
-
+
In ASCII, the digits are '0' through '9' (0x30 through 0x39'. Signed
values are indicated by turning on the 0x40 bit in either the first
byte (for LEADING variables) or the last byte (for TRAILING).
* (But maybe the fill character should just be an explicit character.)
*/
enum cbl_field_attr_t : uint64_t {
- none_e = 0x0000000000,
- figconst_1_e = 0x0000000001, // This needs to be 1 - don't change the position
- figconst_2_e = 0x0000000002, // This needs to be 2
- figconst_4_e = 0x0000000004, // This needs to be 4
- rjust_e = 0x0000000008, // justify right
- ljust_e = 0x0000000010, // justify left
- zeros_e = 0x0000000020, // zero fill
- signable_e = 0x0000000040,
- constant_e = 0x0000000080, // pre-assigned constant
- function_e = 0x0000000100,
- quoted_e = 0x0000000200,
- filler_e = 0x0000000400,
- register_e = 0x0000000800, // Data definition is found in constants.cc
- intermediate_e = 0x0000001000, // Compiler-defined temporary variable
- embiggened_e = 0x0000002000, // redefined numeric made 64-bit by USAGE POINTER
- all_alpha_e = 0x0000004000, // FldAlphanumeric, but all A's
- all_x_e = 0x0000008000, // picture is all X's
- all_ax_e = 0x000000a000, // picture is all A's or all X's
- prog_ptr_e = 0x0000010000, // FUNCTION-POINTER or PROGRAM-POINTER
- scaled_e = 0x0000020000,
- refmod_e = 0x0000040000, // Runtime; indicates a refmod is active
- based_e = 0x0000080000, // pointer capacity, for ADDRESS OF or ALLOCATE
- any_length_e = 0x0000100000, // inferred length of linkage in nested program
- global_e = 0x0000200000, // field has global scope
- external_e = 0x0000400000, // field has external scope
- blank_zero_e = 0x0000800000, // BLANK WHEN ZERO
+ none_e = 0x0000000000,
+ figconst_1_e = 0x0000000001, // This needs to be 1 - don't change the position
+ figconst_2_e = 0x0000000002, // This needs to be 2
+ figconst_4_e = 0x0000000004, // This needs to be 4
+ rjust_e = 0x0000000008, // justify right
+ ljust_e = 0x0000000010, // justify left
+ zeros_e = 0x0000000020, // zero fill
+ signable_e = 0x0000000040,
+ constant_e = 0x0000000080, // pre-assigned constant
+ function_e = 0x0000000100,
+ quoted_e = 0x0000000200,
+ filler_e = 0x0000000400,
+ register_e = 0x0000000800, // Data definition is found in constants.cc
+ intermediate_e = 0x0000001000, // Compiler-defined temporary variable
+ embiggened_e = 0x0000002000, // redefined numeric made 64-bit by USAGE POINTER
+ all_alpha_e = 0x0000004000, // FldAlphanumeric, but all A's
+ all_x_e = 0x0000008000, // picture is all X's
+ all_ax_e = 0x000000a000, // picture is all A's or all X's
+ prog_ptr_e = 0x0000010000, // FUNCTION-POINTER or PROGRAM-POINTER
+ scaled_e = 0x0000020000,
+ refmod_e = 0x0000040000, // Runtime; indicates a refmod is active
+ based_e = 0x0000080000, // pointer capacity, for ADDRESS OF or ALLOCATE
+ any_length_e = 0x0000100000, // inferred length of linkage in nested program
+ global_e = 0x0000200000, // field has global scope
+ external_e = 0x0000400000, // field has external scope
+ blank_zero_e = 0x0000800000, // BLANK WHEN ZERO
// data division uses 2 low bits of high byte
- linkage_e = 0x0001000000, // field is in linkage section
- local_e = 0x0002000000, // field is in local section
- leading_e = 0x0004000000, // leading sign (signable_e alone means trailing)
- separate_e = 0x0008000000, // separate sign
- envar_e = 0x0010000000, // names an environment variable
- encoded_e = 0x0020000000, // data.initial matches codeset.encoding
- bool_encoded_e = 0x0040000000, // data.initial is a boolean string
- hex_encoded_e = 0x0080000000, // data.initial is a hex-encoded string
- depends_on_e = 0x0100000000, // A group hierachy contains a DEPENDING_ON
- initialized_e = 0x0200000000, // Don't call parser_initialize from parser_symbol_add
- has_value_e = 0x0400000000, // Flag to hierarchical descendents to ignore .initial
- ieeedec_e = 0x0800000000, // Indicates a FldFloat is IEEE 754 decimal, rather than binary
- big_endian_e = 0x1000000000, // Indicates a value is big-endian
- same_as_e = 0x2000000000, // Field produced by SAME AS (cannot take new members)
- record_key_e = 0x4000000000,
- typedef_e = 0x8000000000, // IS TYPEDEF
+ linkage_e = 0x0001000000, // field is in linkage section
+ local_e = 0x0002000000, // field is in local section
+ leading_e = 0x0004000000, // leading sign (signable_e alone means trailing)
+ separate_e = 0x0008000000, // separate sign
+ envar_e = 0x0010000000, // names an environment variable
+ encoded_e = 0x0020000000, // data.initial matches codeset.encoding
+ bool_encoded_e = 0x0040000000, // data.initial is a boolean string
+ hex_encoded_e = 0x0080000000, // data.initial is a hex-encoded string
+ depends_on_e = 0x0100000000, // A group hierachy contains a DEPENDING_ON
+ initialized_e = 0x0200000000, // Don't call parser_initialize from parser_symbol_add
+ has_value_e = 0x0400000000, // Flag to hierarchical descendents to ignore .initial
+ ieeedec_e = 0x0800000000, // Indicates a FldFloat is IEEE 754 decimal, rather than binary
+ big_endian_e = 0x1000000000, // Indicates a value is big-endian
+ same_as_e = 0x2000000000, // Field produced by SAME AS (cannot take new members)
+ record_key_e = 0x4000000000,
+ typedef_e = 0x8000000000, // IS TYPEDEF
strongdef_e = typedef_e + intermediate_e, // STRONG TYPEDEF (not temporary)
};
// The separate_e value does double-duty for FldPacked/COMP-6, which is not
// that there is no sign nybble.
#define packed_no_sign_e separate_e
-enum cbl_figconst_t
+#define LOW_VALUE_E figconst_1_e
+#define ZERO_VALUE_E (figconst_2_e|figconst_1_e)
+#define SPACE_VALUE_E figconst_4_e
+#define QUOTE_VALUE_E (figconst_4_e|figconst_1_e)
+#define HIGH_VALUE_E (figconst_4_e|figconst_2_e)
+
+enum cbl_figconst_t : uint64_t
{
normal_value_e = 0, // This one must be zero
low_value_e = 1, // The order is important, because
module_toplevel_e,
};
+enum convert_type_t {
+ convert_alpha_e = 0x01,
+ convert_nat_e = 0x02,
+ convert_any_e = 0x03, // i.e., both
+ convert_byte_e = 0x04,
+ convert_hex_e = 0x08, // may be combined with alpha or national
+ convert_just_bit_e = 0x10,
+ convert_just_e = 0x18, // combined with HEX
+ convert_rjust_bit_e = 0x20,
+ convert_rjust_e = 0x38, // combined with JUSTIFY
+};
+
/*
* Compare a "raised" EC to an enabled EC or of a declarative. "raised" may in
* fact not be raised; in the compiler this function is used to compare a TURN
size_t section; // implies program
bool global;
ec_type_t type;
- uint32_t nfile, files[files_max];
+ size_t nfile;
+ uint64_t files[files_max];
cbl_file_mode_t mode;
explicit cbl_declarative_t( cbl_file_mode_t mode = file_mode_none_e )
std::vector<uint64_t> encode() const;
/*
+ * Sort file names before file modes, and file modes before non-IO.
* Sort file names before file modes, and file modes before non-IO.
*/
bool operator<( const cbl_declarative_t& that ) const {
// TRUE if there are no files to match, or the provided file is in the list.
bool match_file( size_t file ) const {
- static const uint32_t * pend = files + nfile;
+ static const uint64_t * pend = files + nfile;
return nfile == 0 || pend != std::find(files, files + nfile, file);
}
struct encodings_t {
bool supported;
cbl_encoding_t type;
- const char name[32];
+ char name[32];
};
#endif
int errnum; // most recent errno; can't reuse "errno" as the name
file_status_t io_status; // See 2014 standard, section 9.1.12
int padding; // Actually a char
- int delimiter; // ends a record; defaults to '\n'.
+ uint32_t delimiter; // ends a record; defaults to '\n'.
+ int stride; // Width of a character
int flags; // cblc_file_flags_t
- int recent_char; // This is the most recent char sent to the file
+ uint32_t recent_char; // This is the most recent char sent to the file
int recent_key;
cblc_file_prior_op_t prior_op; // run-time type is INT
cbl_encoding_t encoding; // We assume size int
}
static
-char *
-get_filename( const cblc_file_t *file)
+void
+establish_filename( cblc_file_t *file,
+ const cblc_field_t *field_of_name,
+ char *filename)
{
- bool is_quoted = !!(file->flags & file_name_quoted_e);
+ // This routine sets file->filename to the provided name. The name might
+ // ultimately be filename, which if present has to be in the system encoding
+ // and is flagged as not-quoted, meaning that it could have been from an
+ // environment variable. It could be from a FldLiteral, which for a SELECT
+ // clause is not encoded in field_of_name->encoding. Or it could be from a
+ // variable, in which case it is encoded.
- static size_t fname_size = MINIMUM_ALLOCATION_SIZE;
- static char *fname = static_cast<char *>(malloc(MINIMUM_ALLOCATION_SIZE));
- massert(fname);
- if( strlen(file->filename)+1 > fname_size)
- {
- fname_size = strlen(file->filename)+1 ;
- fname = static_cast<char *>(realloc(fname, fname_size));
- }
+ // Whenever anybody establishes file->filename, it should be done through
+ // a malloc.
- strcpy(fname, file->filename);
+ // The field_of_name was not encoded in any way. The field_of_name->codeset
+ // might the same as ordinary alphanumerics, but the field->data for literals
+ // was not encoded.
- if( !is_quoted )
+ char *allocated_here = nullptr;
+ if( !filename )
{
- // We have been given something that might be the name of an
- // environment variable that contains the filename:
- const char *p_from_environment = getenv(fname);
- if( p_from_environment )
+ // We weren't given a filename, so we extract it from the field_of_name
+
+ file->flags |= file_name_quoted_e;
+ if( field_of_name->type == FldLiteralA )
{
- if( strlen(p_from_environment)+1 > fname_size )
+ // For literals in SELECT clauses, the initial value is the
+ // nul-terminated filename in the source-code encoding. The
+ // field->encoding is possibly wrong, but irrelevant.
+ allocated_here = static_cast<char *>(malloc(field_of_name->capacity+1));
+ massert(allocated_here);
+ memcpy(allocated_here, field_of_name->data, field_of_name->capacity);
+ allocated_here[field_of_name->capacity] = '\0';
+ filename = allocated_here;
+ }
+ else
+ {
+ // We need to convert from the designated encoding to the system
+ // encoding:
+ filename = __gg__iconverter(field_of_name->encoding,
+ __gg__console_encoding,
+ reinterpret_cast<char *>(field_of_name->data),
+ field_of_name->capacity);
+ // COBOL strings are space-filled to the right, so we have to get rid
+ // of any spaces out there. If somebody *wants* a filename space-filled
+ // to the right, well, at this juncture I am not prepared to be complicit
+ // in that particular flavor of lunacy.
+ size_t n = strlen(filename)-1;
+ // Note the conditional that terminates the loop when n goes from zero
+ // to a huge positive number in the event that the string is all SPACES
+ while( n < strlen(filename) && filename[n] == ascii_space )
{
- fname_size = strlen(p_from_environment)+1;
- free(fname);
- fname = static_cast<char *>(malloc(fname_size));
- massert(fname);
+ filename[n--] = '\0';
}
- strcpy(fname, p_from_environment);
}
}
+ else
+ {
+ file->flags &= ~file_name_quoted_e;
+ }
+
+ // At this point, we have a trimmed filename in the system encoding:
- if(*fname)
+ if( !(file->flags & file_name_quoted_e) )
{
- // COBOL strings are space-filled to the right, so we have to get rid
- // of any spaces out there. If somebody *wants* a filename space-filled
- // to the right, well, at this juncture I am not prepared to be complicit
- // in that particular flavor of lunacy.
- size_t n = strlen(fname)-1;
- // Note the conditional that terminates the loop when n goes from zero
- // to a huge positive number in the event that the string is all SPACES
- while( n < strlen(fname) && fname[n] == ascii_space )
+ // We have been given something that might be the name of an
+ // environment variable that contains the filename:
+ char *p_from_environment = getenv(filename);
+ if( p_from_environment )
{
- fname[n--] = '\0';
+ filename = p_from_environment;
}
}
- return fname;
+ free(file->filename);
+ file->filename = strdup(filename);
+ free(allocated_here);
}
static void
__gg__file_init(
cblc_file_t *file,
const char *name,
- size_t symbol_table_index,
+ uint64_t symbol_table_index,
cblc_field_t **keys,
int *key_numbers,
int *uniques,
file->errnum = 0 ;
file->io_status = FsSuccess ;
file->delimiter = charmap->mapped_character(ascii_newline) ;
+ file->stride = charmap->stride();
file->flags = file_flag_none_e;
file->flags |= (optional ? file_flag_optional_e : file_flag_none_e)
+ file_flag_initialized_e;
file->errnum = 0;
file->io_status = FsErrno;
- char record_marker;
+ cbl_char_t record_marker;
unsigned char *stash = static_cast<unsigned char *>(malloc(file->default_record->capacity));
massert(stash);
errno = 0;
file->errnum = 0;
+ record_marker = 0;
ssize_t presult = pread(rfp.fd, &record_marker, 1, rfp.flag_position);
if( presult < 0 )
{
}
static void
-__io__file_remove(cblc_file_t *file, char *filename, int is_quoted)
+trim_in_place(char *psz)
{
- // filename is the result of a strdup or malloc. Because both FILE OPEN
- // and FILE DELETE can establish or change a name, we free it here and
- // replace it. The same is true in FILE DELETE Format 2
- free(file->filename);
- file->filename = filename;
- file->flags &= ~file_name_quoted_e;
- file->flags |= is_quoted ? file_name_quoted_e : file_flag_none_e;
- int erc;
-
- // This code copied from reopen
- const char *trimmed_name = get_filename(file);
- if( !trimmed_name[0] )
+ // Get rid of leading spaces:
+ if( *psz == ascii_space )
{
- bool all_spaces = true;
- for(size_t i=0; i<strlen(file->filename); i++)
+ char *p = psz;
+ while(*p++ == ascii_space)
{
- if( file->filename[i] != ascii_space )
- {
- all_spaces = false;
- }
- break;
+ // Just trim them away:
}
- if( all_spaces )
+ p -= 1;
+ size_t i=0;
+ while(*p)
{
- warnx("Warning: %s specified with a filename that is all spaces",
- file->name);
- file->io_status = FsNameError; // "31"
- goto done;
+ psz[i++] = *p++;
}
+ psz[i++] = '\0';
+ }
+ // Get rid of trailing spaces:
+ size_t len = strlen(psz);
+ size_t i = len-1;
+ while( i < len && psz[i] == ascii_space )
+ {
+ psz[i--] = '\0';
+ }
+ }
- warnx( "%s(): There is no environment variable named \"%s\"\n",
- __func__,
- file->filename);
- file->io_status = FsNoFile; // "35"
+static void
+__io__file_remove(cblc_file_t *file,
+ const char *filename)
+ {
+ file->errnum = 0;
+ file->io_status = FsErrno;
+ int erc;
+
+ if( filename )
+ {
+ free(file->filename);
+ file->filename = strdup(filename);
+ trim_in_place(file->filename);
+ }
+
+ if( !strlen(file->filename) )
+ {
+ warnx("Warning: %s specified with a filename that is empty",
+ file->name);
+ file->io_status = FsNameError; // "31"
goto done;
}
- // trimmed_name is now the file system name of the file to be removed.
// If the file is open, we flag that with "41"
if( file->file_pointer )
// There's been a lot of buildup. We can now try to remove the file:
errno = 0;
- erc = remove(trimmed_name);
+ erc = remove(file->filename);
if( erc == 0 )
{
// All is copacetic. There was a file, and now it's gone.
}
else if( errno == ENOENT )
{
- // The file didn't exist.
+ // The file didn't exist.
file->io_status = FsUnavail; // "05"
}
else
{
// We have some other kind of error. Lack of credentials, or whatever.
- file->io_status = FsErrno; //
+ file->io_status = FsErrno; //
goto done;
}
while( rfp.record_position >= 0
&& rfp.record_position+total_record_length <= rfp.file_size )
{
- char record_marker;
+ cbl_char_t record_marker;
+ record_marker = 0;
ssize_t presult = pread(rfp.fd, &record_marker, 1, rfp.flag_position);
if( presult < 0 )
{
{
// This is like a write, except the place we are putting
// it has to be occupied instead of empty.
- char record_marker;
+ cbl_char_t record_marker;
if( relative_file_parameters_get( rfp,
rfm_microfocus_e,
file,
goto done;
}
+ record_marker = 0;
ssize_t presult = pread(rfp.fd, &record_marker, 1, rfp.flag_position);
if( presult < 0 )
{
}
}
// Let's check to make sure the slot for this record is currently available:
- char record_marker;
+ cbl_char_t record_marker;
+ record_marker = 0;
ssize_t presult = pread(rfp.fd, &record_marker, 1, rfp.flag_position);
if( presult < 0 )
{
{
// This code handles SEQUENTIAL and LINE SEQUENTIAl
charmap_t *charmap = __gg__get_charmap(file->encoding);
+ int stride = charmap->stride();
- char ch = '\0';
- size_t characters_to_write;
+ // ch is the vertical control character
+ cbl_char_t ch = '\0';
+ size_t bytes_to_write;
int lcount;
}
// By default, we write out the number of characters in the record area
- characters_to_write = length;
+ bytes_to_write = length;
// That gets overridden if there is a record_length
if( file->record_length )
{
int rdigits;
- characters_to_write = (int)__gg__binary_value_from_field(
+ bytes_to_write = stride * (int)__gg__binary_value_from_field(
&rdigits,
file->record_length);
}
if( file->org == file_line_sequential_e )
{
// If file-sequential, then trailing spaces are removed:
- while( characters_to_write > 0
- && location[characters_to_write-1] == charmap->mapped_character(ascii_space) )
+ while(bytes_to_write > 0
+ && charmap->getch(location, bytes_to_write-stride)
+ == charmap->mapped_character(ascii_space) )
{
- characters_to_write -= 1;
+ bytes_to_write -= stride;
}
}
- if( after && file->org == file_line_sequential_e && ch == charmap->mapped_character(ascii_newline) )
+ if( after && file->org == file_line_sequential_e
+ && ch == charmap->mapped_character(ascii_newline) )
{
// In general, we terminate every line with a newline. Because this
// line is supposed to start with a newline, we decrement the line
// counter by one if we had already sent one.
- if( lcount && ( file->recent_char == charmap->mapped_character(ascii_newline)
- || file->recent_char == charmap->mapped_character(ascii_ff)) )
+ if( lcount &&
+ ( file->recent_char == charmap->mapped_character(ascii_newline)
+ || file->recent_char == charmap->mapped_character(ascii_ff)) )
{
lcount -= 1;
}
{
while(lcount--)
{
- fputc(ch, file->file_pointer);
+ fwrite( &ch,
+ stride,
+ 1,
+ file->file_pointer);
if( handle_ferror(file, __func__, "fputc() error [3]") )
{
goto done;
switch(file->org)
{
case file_line_sequential_e:
- if( characters_to_write )
+ if( bytes_to_write )
{
fwrite( location,
- characters_to_write,
+ bytes_to_write,
1,
file->file_pointer);
if( handle_ferror(file, __func__, "fwrite() error") )
break;
case file_sequential_e:
- if( characters_to_write )
+ if( bytes_to_write )
{
// File sequential records can start off with a four-byte
// preamble.
- if( characters_to_write < file->record_area_min
- || characters_to_write > file->record_area_max)
+ if( bytes_to_write < file->record_area_min
+ || bytes_to_write > file->record_area_max)
{
file->io_status = FsBoundWrite; // "44"
goto done;
if( file->record_area_min != file->record_area_max )
{
// Because of the min/max mismatch, we require a preamble:
- // The first two bytes are the big-endian character count
+ // The first two bytes are the big-endian byte count
const unsigned char preamble[4] =
{
- (unsigned char)(characters_to_write>>8),
- (unsigned char)(characters_to_write),
+ (unsigned char)(bytes_to_write>>8),
+ (unsigned char)(bytes_to_write),
0,
0
};
}
fwrite( location,
- characters_to_write,
+ bytes_to_write,
1,
file->file_pointer);
if( handle_ferror(file, __func__, "fwrite() error") )
{
// Special case: when AFTER NON-ZERO lines, we stick a newline on the
// end of this record:
- fputc(ch, file->file_pointer);
+ fwrite( &ch,
+ stride,
+ 1,
+ file->file_pointer);
if( handle_ferror(file, __func__, "fputc() error [4]") )
{
goto done;
// We did the output BEFORE, so now it's time to send some newlines
while(lcount--)
{
- fputc(ch, file->file_pointer);
+ fwrite( &ch,
+ stride,
+ 1,
+ file->file_pointer);
if( handle_ferror(file, __func__, "fputc() error [5]") )
{
goto done;
{
file->errnum = 0;
file->io_status = FsErrno;
- size_t characters_read = 0;
- size_t remaining;
+ size_t bytes_read = 0;
bool hit_eof;
// According to IBM:
// characters to the right as undefined. I'm going with IBM,
// it makes more sense to me.
+ charmap_t *charmap = __gg__get_charmap(file->encoding);
+ int stride = charmap->stride();
+
// We first stage the data into the record area.
- int ch;
+ cbl_char_t ch;
long fpos = ftell(file->file_pointer);
if( handle_ferror(file, __func__, "ftell() error") )
}
hit_eof = false;
- while( characters_read < file->record_area_max )
+ while( bytes_read < file->record_area_max )
{
- ch = fgetc(file->file_pointer);
+ ch = 0;
+ fread(&ch, 1, stride, file->file_pointer);
file->errnum = ferror(file->file_pointer);
if( ch == file->delimiter )
{
break;
}
- if( ch == EOF )
+ if( feof(file->file_pointer) )
{
hit_eof = true;
clearerr(file->file_pointer);
fpos = -1;
goto done;
}
- file->default_record->data[characters_read] = (char)ch;
- characters_read += 1;
- }
- remaining = characters_read;
- while(remaining < file->record_area_max )
- {
- // Space fill shorty records
- charmap_t *charmap = __gg__get_charmap(file->encoding);
- file->default_record->data[remaining++] =
- charmap->mapped_character(ascii_space);
+ memcpy(file->default_record->data+bytes_read, &ch, stride);
+ bytes_read += stride;
}
+ // Space fill shorty records
+ charmap->memset(file->default_record->data+bytes_read,
+ charmap->mapped_character(ascii_space),
+ file->record_area_max - bytes_read);
- if( hit_eof && !characters_read)
+ if( hit_eof && !bytes_read)
{
// We got an end-of-file without characters
file->io_status = FsEofSeq; // "10"
// does another READ:
file->io_status = FsErrno;
}
- else if (characters_read < file->record_area_max)
+ else if (bytes_read < file->record_area_max )
{
// Just discard an early record delimiter
file->io_status = FsRecordLength; // "04"
// Manual: "If the first unread character is the record delimiter, it
// is discarded. Otherwise, the first unread character becomes the first
// character read by the next READ statement."
- ch = fgetc(file->file_pointer);
- file->errnum = ferror();
- // If that next character isn't a delimiter, put it back:
- if( ch != file->delimiter && ch != EOF)
- {
- ungetc(ch, file->file_pointer);
- }
- else if( handle_ferror(file->file_pointer, __func__, "fgetc() error") )
- {
- fpos = -1;
- goto done;
- }
#else
// In this code, extra characters before the newline
// are read next time are discarded. GnuCOBOL works this way, and
// SEQUENTIAL; it describes only SEQUENTIAL.
for(;;)
{
- ch = fgetc(file->file_pointer);
+ ch = 0;
+ fread(&ch, 1, stride, file->file_pointer);
file->errnum = ferror(file->file_pointer);
// We can't use handle_ferror() directly, because an EOF is
// a legitimate way to end the last line.
- if( ch == file->delimiter || ch == EOF)
+ if( ch == file->delimiter || feof(file->file_pointer) )
{
clearerr(file->file_pointer);
break;
if( file->record_length )
{
__gg__int128_to_field(file->record_length,
- characters_read,
+ bytes_read/stride,
0,
truncation_e,
NULL);
if( characters_read < bytes_in_record )
{
charmap_t *charmap = __gg__get_charmap(file->encoding);
- memset( file->default_record->data,
- charmap->mapped_character(ascii_space),
- bytes_to_read);
+ charmap->memset(file->default_record->data,
+ charmap->mapped_character(ascii_space),
+ bytes_to_read);
file->io_status = FsEofSeq; // "10"
fpos = -1;
goto done;
}
+ if( characters_read < file->default_record->capacity )
+ {
+ // The record area is longer than the characters we read. Space-fill out
+ // to the end:
+
+ charmap_t *charmap = __gg__get_charmap(file->encoding);
+ charmap->memset( file->default_record->data + characters_read,
+ charmap->mapped_character(ascii_space),
+ file->default_record->capacity - characters_read );
+ }
+
+
+
// Let the caller know if we got too few or too many characters
if( bytes_in_record < file->record_area_min
|| bytes_in_record > file->record_area_max )
if( file->record_length )
{
__gg__int128_to_field(file->record_length,
- characters_read,
+ characters_read/file->stride,
0,
truncation_e,
NULL);
file->prior_read_location = -1;
goto done;
}
- char record_marker;
+ cbl_char_t record_marker;
+ record_marker = 0;
if( pread(rfp.fd, &record_marker, 1, rfp.flag_position) <= 0)
{
goto done;
// Stash the mode_char for later analysis during READ and WRITE operations
file->mode_char = mode_char;
- char *trimmed_name;
- trimmed_name = get_filename(file);
- if( !trimmed_name[0] )
+ if( !file->filename[0] )
{
bool all_spaces = true;
for(size_t i=0; i<strlen(file->filename); i++)
// achMode is the mode string that gets passed down below to fopen().
random_access_mode = ( file->access == file_access_rnd_e
|| file->access == file_access_dyn_e);
- the_file_exists = access(trimmed_name, F_OK) == 0;
+ the_file_exists = access(file->filename, F_OK) == 0;
file->flags |= the_file_exists ? file_flag_existed_e : file_flag_none_e ;
// We have four operations: INPUT (r) OUTPUT (w) I-O (+) and EXTEND (a)
}
}
- file->file_pointer = fopen(trimmed_name, achMode);
+ file->file_pointer = fopen(file->filename, achMode);
if( file->file_pointer == NULL )
{
file->errnum = errno;
static void
__io__file_open(cblc_file_t *file,
- char *filename,
- int mode_char,
- int is_quoted)
+ const char *filename,
+ int mode_char)
{
- // 'filename' is a pointer to a malloc() buffer.
- // The 'filename' has to be in the system encoding, typically ASCII
-
- // The complication: A filename can be literal text, it can be from a COBOL
- // alphanumeric variable, or it can be the name of an environment variable
- // that contains the actual name of the file. The consequence is that if
- // you want to call __gg__file_open from anywhere except the parser_file_open
- // routine, then you had best really know what you are doing.
-
file->errnum = 0;
file->io_status = FsErrno;
+
+ if( filename )
+ {
+ free(file->filename);
+ file->filename = strdup(filename);
+ trim_in_place(file->filename);
+ }
+
+ if( !strlen(file->filename) )
+ {
+ warnx("Warning: %s specified with a filename that is empty",
+ file->name);
+ file->io_status = FsNameError; // "31"
+ goto done;
+ }
+
if( file->file_pointer )
{
// The file is already open:
}
else
{
- // filename is the result of a strdup or malloc. Because both FILE OPEN
- // and FILE DELETE can establish or change a name, we free it here and
- // replace it. The same is true in FILE DELETE Format 2
- free(file->filename);
- file->filename = filename;
- file->flags &= ~file_name_quoted_e;
- file->flags |= is_quoted ? file_name_quoted_e : file_flag_none_e;
-
__gg__file_reopen(file, mode_char);
}
+ done:
file->prior_op = file_op_open;
establish_status(file, -1);
}
file_indexed_close(file);
}
- // The filename was malloced. So, we get rid of it here.
- free(file->filename);
- file->filename = NULL;
-
done:
file->prior_op = file_op_close;
establish_status(file, fpos);
static const char constexpr marquee[64] = "libgcobol: gfileio.cc";
typedef void (open_t)( cblc_file_t *file,
- char *filename,
- int mode_char,
- int is_quoted );
+ const char *filename,
+ int mode_char);
typedef void (close_t)( cblc_file_t *file,
int how );
typedef void (start_t)( cblc_file_t *file,
int after,
int lines,
int is_random );
- typedef void (rewrite_t)( cblc_file_t *file,
- size_t length, bool is_random );
- typedef void (delete_t)( cblc_file_t *file,
+ typedef void (rewrite_t)(cblc_file_t *file,
+ size_t length, bool is_random );
+ typedef void (delete_t)(cblc_file_t *file,
bool is_random );
- typedef void (remove_t)( cblc_file_t *file,
- char *filename,
- int is_quoted);
-
+ typedef void (remove_t)(cblc_file_t *file,
+ const char *filename);
open_t *Open;
close_t *Close;
start_t *Start;
* Then, in libgcobol, replace direct calls with calls through fileops.
* That is, instead of
*
- * __gg__file_open("foo", "r", false );
+ * __gg__file_open("foo", "r" );
* use
- * gfile->Open("foo", "r", false );
+ * gfile->Open("foo", "r" );
*
* You'll probably want some kind of trampoline to avoid the need to
* generate the Gimple to call through a pointer to a structure:
extern "C"
void
__gg__file_open(cblc_file_t *file,
+ const cblc_field_t *field_of_name,
char *filename,
- int mode_char,
- int is_quoted)
+ int mode_char)
{
- // The 'filename' has to be in the system encoding, typically ASCII
+ establish_filename(file, field_of_name, filename);
gcobol_io_t *functions = gcobol_io_funcs();
- functions->Open(file, filename, mode_char, is_quoted);
+ functions->Open(file, NULL, mode_char);
}
extern "C"
extern "C"
void
-__gg__file_remove(cblc_file_t *file, char *name, int is_quoted)
+__gg__file_remove( cblc_file_t *file,
+ const cblc_field_t *field_of_name,
+ char *filename)
{
// DELETE FILE Format 2 - removes a file.
+ establish_filename(file, field_of_name, filename);
gcobol_io_t *functions = gcobol_io_funcs();
- functions->Remove(file, name, is_quoted);
+ functions->Remove(file, NULL);
}
/* end interface functions */
void __gg__handle_error(const char *function, const char *msg);
void __gg__file_open( cblc_file_t *file,
+ const cblc_field_t *fname,
char *filename,
- int mode_char,
- int is_quoted);
+ int mode_char);
void __gg__file_reopen(cblc_file_t *file, int mode_char);
const size_t *C_s = __gg__treeplet_3s;
bool on_size_error = !!(on_error_flag & ON_SIZE_ERROR);
- // This is the assignment phase of an ADD Format 2
+ // This is the assignment phase of an SUBTRACT Format 2
// We take phase1_result and subtract it from C
GCOB_FP128 temp = __gg__float128_from_qualified_field(C[0], C_o[0], C_s[0]);
compute_error
);
- // Subtract that from the B value:
+ // Subtract that subtotal from the B value:
GCOB_FP128 value_b = __gg__float128_from_qualified_field(B[0], B_o[0], B_s[0]);
- // The two numbers have the same number of rdigits. It's now safe to add
- // them.
+
phase1_result_float = subtraction_helper_float(value_b, phase1_result_float, compute_error);
}
{
if( C[0]->type == FldFloat )
{
- // gixed * float
+ // fixed * float
a_value = (GCOB_FP128) multiply_intermediate_int128;
if( multiply_intermediate_rdigits )
{
#include <langinfo.h>
#include <cctype>
+#include <cwctype>
#include <cmath>
#include <cstring>
#include <ctime>
static bool
is_zulu_format(PCHAR left, PCHAR &right, charmap_t *charmap)
{
- int char_Z = charmap->mapped_character(ascii_Z);
+ cbl_char_t char_Z = charmap->mapped_character(ascii_Z);
+ cbl_char_t char_z = charmap->mapped_character(ascii_z);
+ int stride = charmap->stride();
bool retval = false;
- if( right > left )
+ if( right - left >= stride)
{
- retval = std::toupper((unsigned char)*(right-1)) == char_Z;
+ cbl_char_t last_character = charmap->getch(right-stride, size_t(0));
+ if( last_character == char_Z || last_character == char_z )
+ {
+ retval = true;
+ }
}
return retval;
}
void
string_to_dest(cblc_field_t *dest, const char *psz)
{
+ charmap_t *charmap = __gg__get_charmap(dest->encoding);
size_t dest_length = dest->capacity;
- size_t source_length = strlen(psz);
+ size_t source_length = charmap->strlen(psz);
size_t length = std::min(dest_length, source_length);
- charmap_t *charmap = __gg__get_charmap(dest->encoding);
- memset(dest->data, charmap->mapped_character(ascii_space), dest_length);
+ charmap->memset(dest->data, charmap->mapped_character(ascii_space), dest_length);
memcpy(dest->data, psz, length);
}
ctm.day_of_week+1,
ctm.day_of_year,
ctm.ZZZZ);
+
__gg__convert_encoding(PTRCAST(char, stime),
DEFAULT_SOURCE_ENCODING,
dest->encoding);
char const * const dest_end,
char const * source,
char const * const source_end,
- charmap_t * charmap_source,
+ charmap_t * charmap,
char const * const ftime)
{
// This routine is highly dependent on the source format being correct.
static const int OFFSET_TO_DOY = 34;
static const int OFFSET_TO_ZZZZ = 37;
- unsigned int decimal_point =
- charmap_source->mapped_character(__gg__get_decimal_point());
- unsigned int source_Y = charmap_source->mapped_character(ascii_Y );
- unsigned int source_W = charmap_source->mapped_character(ascii_W );
- unsigned int source_s = charmap_source->mapped_character(ascii_s );
- unsigned int source_m = charmap_source->mapped_character(ascii_m );
- unsigned int source_h = charmap_source->mapped_character(ascii_h );
- unsigned int source_plus = charmap_source->mapped_character(ascii_plus);
- unsigned int source_D = charmap_source->mapped_character(ascii_D );
- unsigned int source_M = charmap_source->mapped_character(ascii_M );
+ int stride = charmap->stride();
+
+ cbl_char_t decimal_point =
+ charmap->mapped_character(__gg__get_decimal_point());
+ cbl_char_t source_Y = charmap->mapped_character(ascii_Y );
+ cbl_char_t source_W = charmap->mapped_character(ascii_W );
+ cbl_char_t source_s = charmap->mapped_character(ascii_s );
+ cbl_char_t source_m = charmap->mapped_character(ascii_m );
+ cbl_char_t source_h = charmap->mapped_character(ascii_h );
+ cbl_char_t source_plus = charmap->mapped_character(ascii_plus);
+ cbl_char_t source_D = charmap->mapped_character(ascii_D );
+ cbl_char_t source_M = charmap->mapped_character(ascii_M );
while( source < source_end && dest < dest_end )
{
- unsigned char fchar = *source;
+ cbl_char_t fchar = charmap->getch(source, size_t(0));
if( fchar == source_Y )
{
// This can only be a YYYY
// But, we have a choice. If there is a 'W' in the format, then we
// need to use ZZZZ rather than YYYY:
- src = ftime + OFFSET_TO_YYYY;
+ src = ftime + OFFSET_TO_YYYY*stride;
const char *p = source;
+ size_t index = 0;
while(p < source_end)
{
- if( (unsigned char)*p++ == source_W )
+ //if( (unsigned char)*p++ == source_W )
+ if( charmap->getch(source, &index) == source_W )
{
- src = ftime + OFFSET_TO_ZZZZ;
+ src = ftime + OFFSET_TO_ZZZZ*stride;
}
+ p += stride;
}
ncount = 4;
{
// This can only be a MM
ncount = 2;
- src = ftime + OFFSET_TO_MM;
+ src = ftime + OFFSET_TO_MM*stride;
}
else if( fchar == source_D )
{
// It can be a D, DD or DDD
- if( (unsigned char)source[2] == source_D )
+ if( charmap->getch(source, 2*stride) == source_D )
{
ncount = 3;
- src = ftime + OFFSET_TO_DOY;
+ src = ftime + OFFSET_TO_DOY*stride;
}
- else if( (unsigned char)source[1] == source_D )
+ else if( charmap->getch(source, 1*stride) == source_D )
{
ncount = 2;
- src = ftime + OFFSET_TO_DD;
+ src = ftime + OFFSET_TO_DD*stride;
}
else
{
ncount = 1;
- src = ftime + OFFSET_TO_DOW;
+ src = ftime + OFFSET_TO_DOW*stride;
}
}
else if( fchar == source_plus )
{
saw_plus_sign = true;
ncount = 1;
- src = ftime + OFFSET_TO_OFFSET;
+ src = ftime + OFFSET_TO_OFFSET*stride;
}
else if( fchar == source_h )
{
ncount = 2;
if(saw_plus_sign)
{
- src = ftime + OFFSET_TO_OFFSET_HOUR;
+ src = ftime + OFFSET_TO_OFFSET_HOUR*stride;
}
else
{
- src = ftime + OFFSET_TO_HOUR;
+ src = ftime + OFFSET_TO_HOUR*stride;
}
}
else if( fchar == source_m )
ncount = 2;
if(saw_plus_sign)
{
- src = ftime + OFFSET_TO_OFFSET_MINUTE;
+ src = ftime + OFFSET_TO_OFFSET_MINUTE*stride;
}
else
{
- src = ftime + OFFSET_TO_MINUTE;
+ src = ftime + OFFSET_TO_MINUTE*stride;
}
}
else if( fchar == decimal_point )
{
// There can be a variable number of fractional 's'
ncount = -1;
- src = ftime + OFFSET_TO_FRACTION;
+ src = ftime + OFFSET_TO_FRACTION*stride;
}
else
{
ncount = 2;
- src = ftime + OFFSET_TO_SECOND;
+ src = ftime + OFFSET_TO_SECOND*stride;
}
}
else if( fchar == source_W )
{
ncount = 3;
- src = ftime + OFFSET_TO_WEEK;
+ src = ftime + OFFSET_TO_WEEK*stride;
}
else
{
{
// This indicates special processing for a variable number of 's'
// characters
- while((unsigned char)*source == source_s && dest < dest_end)
+ while(charmap->getch(source, size_t(0)) == source_s && dest < dest_end)
{
- source += 1;
- *dest++ = *src++;
+ source += stride;
+ memcpy(dest, src, stride);
+ dest += stride;
+ src += stride;
}
}
else
{
- source += ncount;
+ source += ncount*stride;
while(ncount-- && dest < dest_end)
{
- *dest++ = *src++;
+ memcpy(dest, src, stride);
+ dest += stride;
+ src += stride;
}
}
}
}
// We need to convert the ch character to the destination encoding.
- const char achFrom[2] = {static_cast<char>(ch), '\0'};
+ // THIS IS A KLUDGE UNTIL WE MAKE THE CURRENT_COLLATION TO BE A MAP OF
+ // WIDE CHARACTERS!
+ charmap_t *charmap_dest = __gg__get_charmap(dest->encoding);
+
+ cbl_char_t achFrom = 0;
+ memcpy(&achFrom, &ch, 1);
size_t charsout;
const char *converted = __gg__iconverter(__gg__display_encoding,
dest->encoding,
- achFrom,
+ &achFrom,
1,
&charsout );
// Pick up our character, because mapped_character() might clobber
// the converted contents.
- int converted_char = *converted; // cppcheck-suppress variableScope
+ int converted_char = 0;
+ memcpy(&converted_char, converted, charmap_dest->stride());
// Space fill the dest:
- charmap_t *charmap_dest = __gg__get_charmap(dest->encoding);
- memset(dest->data,
- charmap_dest->mapped_character(ascii_space),
- dest->capacity);
+ charmap_dest-> memset(dest->data,
+ charmap_dest->mapped_character(ascii_space),
+ dest->capacity);
// Make the first character of the destination equal to our converted
// character:
if( ch > -1 && charsout == 1 )
{
- dest->data[0] = converted_char;
+ charmap_dest->putch(converted_char, dest->data, size_t(0));
}
}
char retval[DATE_STRING_BUFFER_SIZE];
timespec_to_string(retval, tp);
- __gg__convert_encoding(PTRCAST(char, retval),
- DEFAULT_SOURCE_ENCODING,
- dest->encoding);
- string_to_dest(dest, retval);
+
+ size_t bytes_converted;
+ char *converted = __gg__miconverter(__gg__console_encoding,
+ dest->encoding,
+ retval,
+ strlen(retval),
+ &bytes_converted);
+ __gg__field_from_string(dest, 0, dest->capacity, converted, bytes_converted);
+ __gg__adjust_dest_size(dest, bytes_converted);
+ free(converted);
}
extern "C"
charmap_t *charmap_from = __gg__get_charmap(from);
charmap_t *charmap_to = __gg__get_charmap(to);
- int dest_space = charmap_to->mapped_character(ascii_space);
- int format_Z = charmap_from->mapped_character(ascii_Z);
+ cbl_char_t dest_space = charmap_to->mapped_character(ascii_space);
+ cbl_char_t format_Z = charmap_from->mapped_character(ascii_Z);
+ cbl_char_t format_z = charmap_from->mapped_character(ascii_z);
// Establish the destination, and set it to spaces
char *d = PTRCAST(char, dest->data);
const char *dend = d + dest->capacity;
- memset(d, dest_space, dest->capacity);
+ charmap_to->memset(d, dest_space, dest->capacity);
// Establish the formatting string:
const char *format = PTRCAST(char, (input->data+input_offset));
const char *format_end = format + input_size;
bool is_zulu = false;
-
const char *p = format;
while( p < format_end )
{
- int ch = *p++;
- if( ch == format_Z )
+ cbl_char_t ch = charmap_from->getch(p, size_t(0));
+ if( ch == format_Z || ch == format_z)
{
is_zulu = true;
break;
}
+ p += charmap_from->stride();
}
struct cbl_timespec ts = {};
// Convert seconds west of UTC to minutes east of UTC
ctm.tz_offset = -timezone/60;
- char achftime[64];
+ char achftime[256];
get_all_time(dest, achftime, ctm);
ftime_replace(d, dend, format, format_end, charmap_from, achftime);
+ return;
}
extern "C"
size_t arg1_offset,
size_t arg1_size,
const cblc_field_t *arg2, // integer date
- size_t arg2_offset,
+ size_t arg2_offset,
size_t arg2_size)
{
// FUNCTION FORMATTED-DATE
- cbl_encoding_t from = arg1->encoding;
cbl_encoding_t to = dest->encoding;
- charmap_t *charmap_from = __gg__get_charmap(from);
+ cbl_encoding_t from = arg1->encoding;
+
charmap_t *charmap_to = __gg__get_charmap(to);
+ charmap_t *charmap_from = __gg__get_charmap(from);
- int dest_space = charmap_to->mapped_character(ascii_space);
+ cbl_char_t dest_space = charmap_to->mapped_character(ascii_space);
// Establish the destination, and set it to spaces
char *d = PTRCAST(char, dest->data);
const char *dend = d + dest->capacity;
- memset(d, dest_space, dest->capacity);
+ charmap_to->memset(d, dest_space, dest->capacity);
// Establish the formatting string:
char *format = PTRCAST(char, (arg1->data+arg1_offset));
populate_ctm_from_date(ctm, arg2, arg2_offset, arg2_size);
- char achftime[64];
+ char achftime[256];
get_all_time(dest, achftime, ctm);
if( __gg__exception_code )
{
- memset(d, dest_space, dend-d);
+ charmap_to->memset(d, dest_space, dend-d);
}
else
{
convert_to_zulu(ctm);
}
- char achftime[64];
+ char achftime[256];
get_all_time(dest, achftime, ctm);
if( __gg__exception_code )
{
// Establish the destination, and set it to spaces
char *d = PTRCAST(char, dest->data);
const char *dend = d + dest->capacity;
- memset(d, dest_space, dest->capacity);
+ charmap_to->memset(d, dest_space, dest->capacity);
// Establish the formatting string:
char *format = PTRCAST(char, (par1->data+par1_o));
convert_to_zulu(ctm);
}
- char achftime[64];
+ char achftime[256];
get_all_time(dest, achftime, ctm);
if( __gg__exception_code )
{
- memset(d, dest_space, dend-d);
+ charmap_to->memset(d, dest_space, dend-d);
}
else
{
}
}
+static void
+change_case( cblc_field_t *dest,
+ const cblc_field_t *input,
+ size_t input_offset,
+ size_t input_size,
+ std::wint_t (changer)( std::wint_t ch )
+ )
+ {
+ cbl_encoding_t enc_to = dest->encoding;
+ cbl_encoding_t enc_from = input->encoding;
+ cbl_encoding_t enc_work = DEFAULT_32_ENCODING;
+
+ // In order to handle any input encoding, we convert to UTF32:
+ size_t converted_bytes;
+ const char *converted = __gg__iconverter(enc_from,
+ enc_work,
+ input->data+input_offset,
+ input_size,
+ &converted_bytes);
+ // Make a copy of it to prevent the static nature of iconverter from causing
+ // trouble:
+ cbl_char_t *duped =
+ static_cast<cbl_char_t *>(__gg__memdup(converted, converted_bytes));
+ cbl_char_t *pend = duped + converted_bytes / width_of_utf32;
+
+ // Use the designated case changer:
+ std::transform(duped, pend, duped,
+ [&changer](cbl_char_t c) { return changer(c); });
+
+ // Convert that modified string to the destination encoding:
+ converted = __gg__iconverter(enc_work,
+ enc_to,
+ duped,
+ converted_bytes,
+ &converted_bytes);
+ free(duped);
+
+ char *duped2 = static_cast<char *>(__gg__memdup(converted, converted_bytes));
+ __gg__field_from_string(dest,
+ 0,
+ dest->capacity,
+ duped2,
+ converted_bytes);
+ free(duped2);
+ __gg__adjust_dest_size(dest, converted_bytes);
+ }
+
+
extern "C"
void
__gg__lower_case( cblc_field_t *dest,
size_t input_offset,
size_t input_size)
{
- cbl_encoding_t from = input->encoding;
- cbl_encoding_t to = dest->encoding;
- charmap_t *charmap_dest = __gg__get_charmap(to);
+ return change_case(dest, input, input_offset, input_size, std::towlower);
+ }
- size_t dest_length = dest->capacity;
- size_t source_length = input_size;
- size_t length = std::min(dest_length, source_length);
- memset( dest->data,
- charmap_dest->mapped_character(ascii_space),
- dest_length);
- memcpy(dest->data, input->data+input_offset, length);
- __gg__convert_encoding_length(PTRCAST(char, dest->data),
- length,
- from,
- DEFAULT_SOURCE_ENCODING);
- std::transform(dest->data, dest->data + dest_length, dest->data,
- [](unsigned char c) { return std::tolower(c); });
- __gg__convert_encoding_length(PTRCAST(char, dest->data),
- length,
- DEFAULT_SOURCE_ENCODING,
- to);
+extern "C"
+void
+__gg__upper_case( cblc_field_t *dest,
+ const cblc_field_t *input,
+ size_t input_offset,
+ size_t input_size)
+ {
+ return change_case(dest, input, input_offset, input_size, std::towupper);
}
extern "C"
PTRCAST(char, input->data + input_offset),
input_size,
&nbytes);
- const char *pend = p + input_size;
+ const char *pend = p + nbytes;
int errpos = 0;
__int128 retval = 0;
&nbytes);
char *pstart = strdup(converted);
massert(pstart);
- char *pend = pstart + src_size;
+ char *pend = pstart + nbytes;
char *p = pstart;
GCOB_FP128 retval = 0;
cbl_encoding_t from = arg1->encoding;
cbl_encoding_t to = dest->encoding;
charmap_t *charmap = __gg__get_charmap(to);
- int mapped_space = charmap->mapped_character(ascii_space);
+ int stride = charmap->stride();
+ cbl_char_t mapped_space = charmap->mapped_character(ascii_space);
int rdigits;
__int128 type = __gg__binary_value_from_qualified_field(&rdigits,
"be an intermediate alphanumeric\n");
abort();
}
+
+ // What is this all about?
dest->capacity = dest->offset;
// Make a copy of the input:
// Convert it to the destination encoding
__gg__convert_encoding_length(copy, arg1_size, from, to);
-
// No matter what, we want to find the leftmost non-space and the
// rightmost non-space:
char *left = copy;
- char *right = left + arg1_size-1;
+ char *right = left + arg1_size-stride;
// Find left and right: the first and last non-spaces
while( left <= right )
{
- if( *left != mapped_space && *right != mapped_space )
+ cbl_char_t cleft = charmap->getch(left, (size_t)0);
+ cbl_char_t cright = charmap->getch(right, (size_t)0);
+
+ if( cleft != mapped_space && cright != mapped_space )
{
break;
}
- if( *left == mapped_space )
+ if( cleft == mapped_space )
{
- left += 1;
+ left += stride;
}
- if( *right == mapped_space )
+ if( cright == mapped_space )
{
- right -= 1;
+ right -= stride;
}
}
if( type == LEADING )
// When the arg1 input string was empty, we want left to be right+1.
// The left/right loop can sometimes end up with left equal to right+2.
// That needs to be fixed:
- left = right+1;
+ left = right+stride;
}
- size_t ncount = right+1 - left;
+ size_t ncount = right+stride - left;
__gg__adjust_dest_size(dest, ncount);
- char *dest_left = PTRCAST(char, dest->data);
- char *dest_right = dest_left + dest->capacity - 1;
- const char *dest_end = dest_left + dest->capacity;
-
- while( dest_left <= dest_right && left <= right )
- {
- *dest_left++ = *left++;
- }
- while(dest_left < dest_end)
- {
- *dest_left++ = mapped_space;
- }
+ memmove(dest->data, left, ncount);
}
#if HAVE_INITSTATE_R && HAVE_SRANDOM_R && HAVE_RANDOM_R
cbl_encoding_t from = input->encoding;
cbl_encoding_t to = dest->encoding;
- size_t dest_length = dest->capacity;
- size_t source_length = input_size;
- size_t length = std::min(dest_length, source_length);
+ charmap_t *charmap = __gg__get_charmap(to);
+ size_t stride = charmap->stride();
- // Make a copy of the input
- char *copy = static_cast<char *>(malloc(length));
- massert(copy);
- memcpy(copy, input->data+input_offset, length);
+ size_t dest_length = dest->capacity;
// Convert the input to the destination encoding
- __gg__convert_encoding_length(copy,
- length,
- from,
- to);
-
- // Set the destination to all spaces
- charmap_t *charmap = __gg__get_charmap(to);
- memset(dest->data, charmap->mapped_character(ascii_space), dest_length);
- for(size_t i=0; i<length; i++)
- {
- dest->data[i] = copy[source_length-1-i];
- }
- if( (dest->attr & intermediate_e) )
- {
- dest->capacity = std::min(dest_length, source_length);
- }
-
- free(copy);
+ size_t bytes_converted;
+ const char *converted = __gg__iconverter(from,
+ to,
+ input->data+input_offset,
+ input_size,
+ &bytes_converted);
+ // copy over characters from the end of the copy to the beginning of dest:
+ size_t i_from = bytes_converted - stride;
+ size_t i_to = 0;
+ while( i_from < bytes_converted && i_to < dest_length )
+ {
+ cbl_char_t ch = charmap->getch(converted, i_from);
+ charmap->putch(ch, dest->data+dest->offset, i_to);
+ i_from -= stride;
+ i_to += stride;
+ }
+ __gg__adjust_dest_size(dest, i_to);
}
extern "C"
NULL);
}
-extern "C"
-void
-__gg__upper_case( cblc_field_t *dest,
- const cblc_field_t *input,
- size_t input_offset,
- size_t input_size)
- {
- cbl_encoding_t from = input->encoding;
- cbl_encoding_t to = dest->encoding;
- charmap_t *charmap_dest = __gg__get_charmap(to);
-
- size_t dest_length = dest->capacity;
- size_t source_length = input_size;
- size_t length = std::min(dest_length, source_length);
- memset( dest->data,
- charmap_dest->mapped_character(ascii_space),
- dest_length);
- memcpy(dest->data, input->data+input_offset, length);
- __gg__convert_encoding_length(PTRCAST(char, dest->data),
- length,
- from,
- DEFAULT_SOURCE_ENCODING);
- std::transform(dest->data, dest->data + dest_length, dest->data,
- [](unsigned char c) { return std::toupper(c); });
- __gg__convert_encoding_length(PTRCAST(char, dest->data),
- length,
- DEFAULT_SOURCE_ENCODING,
- to);
- }
-
extern "C"
void
__gg__variance( cblc_field_t *dest,
size_t par2_offset,
size_t par2_size)
{
- // Establish the formatting string:
- char *format = PTRCAST(char, (par1->data+par1_offset));
- char *format_end = format + par1_size;
-
- // Establish the string to be checked:
- char *source = PTRCAST(char, (par2->data+par2_offset));
- char *source_end = source + par2_size;
-
- charmap_t *charmap_format = __gg__get_charmap(par1->encoding);
- charmap_t *charmap_checked = __gg__get_charmap(par2->encoding);
- int checked_space = charmap_checked->mapped_character(ascii_space);
- int source_plus = charmap_checked->mapped_character(ascii_plus);
- int source_minus = charmap_checked->mapped_character(ascii_minus);
- int source_zero = charmap_checked->mapped_character(ascii_zero);
-
- int format_space = charmap_format->mapped_character(ascii_space);
- int format_T = charmap_format->mapped_character(ascii_T );
- int format_colon = charmap_format->mapped_character(ascii_colon );
- int format_plus = charmap_format->mapped_character(ascii_plus );
- int format_minus = charmap_format->mapped_character(ascii_minus );
- int format_W = charmap_format->mapped_character(ascii_W );
- int format_Z = charmap_format->mapped_character(ascii_Z );
- int format_z = charmap_format->mapped_character(ascii_z );
- int format_s = charmap_format->mapped_character(ascii_s );
- int format_m = charmap_format->mapped_character(ascii_m );
- int format_h = charmap_format->mapped_character(ascii_h );
- int format_w = charmap_format->mapped_character(ascii_w );
- int format_Y = charmap_format->mapped_character(ascii_Y );
- int format_M = charmap_format->mapped_character(ascii_M );
- int format_D = charmap_format->mapped_character(ascii_D );
- int format_zero = charmap_format->mapped_character(ascii_zero );
- char decimal_point
- = charmap_format->mapped_character(__gg__get_decimal_point());
+ // It turns out to be just easier to convert the strings to ASCII space to
+ // do the conversion of par2 against the format in par1:
+ charmap_t *charmap = __gg__get_charmap(DEFAULT_SOURCE_ENCODING);
+
+ size_t bytes_converted;
+ char *par1_c = __gg__miconverter(par1->encoding,
+ DEFAULT_SOURCE_ENCODING,
+ par1->data+par1_offset,
+ par1_size,
+ &bytes_converted);
+ char *format = par1_c;
+ char *format_end = format + bytes_converted;
+
+ char *par2_c = __gg__miconverter(par2->encoding,
+ DEFAULT_SOURCE_ENCODING,
+ par2->data+par2_offset,
+ par2_size,
+ &bytes_converted);
+ char *source = par2_c;
+ char *source_end = source + bytes_converted;
+
+ char decimal_point = __gg__get_decimal_point();
// Let's eliminate trailing spaces...
- trim_trailing_spaces(format, format_end, format_space);
- trim_trailing_spaces(source, source_end, checked_space);
+ trim_trailing_spaces(format, format_end, ascii_space);
+ trim_trailing_spaces(source, source_end, ascii_space);
bool in_offset = false;
bool in_nanoseconds = false;
{
unsigned char ch = *format;
- if( ch == format_T
- || ch == format_colon
- || ch == format_minus
- || ch == format_W)
+ if( ch == ascii_T
+ || ch == ascii_colon
+ || ch == ascii_minus
+ || ch == ascii_W)
{
// These are just formatting characters. They need to be duplicated,
// but are otherwise ignored.
goto proceed;
}
- if( ch == format_plus )
+ if( ch == ascii_plus )
{
// This flags a following hhmm offset. It needs to match a '+' or '-'
- if( (unsigned char)*source != source_plus
- && (unsigned char)*source != source_minus
- && (unsigned char)*source != source_zero)
+ if( (unsigned char)*source != ascii_plus
+ && (unsigned char)*source != ascii_minus
+ && (unsigned char)*source != ascii_zero)
{
break;
}
- if( (unsigned char)*source == format_zero )
+ if( (unsigned char)*source == ascii_zero )
{
// The next four characters have to be zeroes
- if( (unsigned char)source[1] != format_zero )
+ if( (unsigned char)source[1] != ascii_zero )
{
retval += 1;
break;
}
- if( (unsigned char)source[2] != format_zero )
+ if( (unsigned char)source[2] != ascii_zero )
{
retval += 2;
break;
}
- if( (unsigned char)source[3] != format_zero )
+ if( (unsigned char)source[3] != ascii_zero )
{
retval += 3;
break;
}
- if( (unsigned char)source[4] != format_zero )
+ if( (unsigned char)source[4] != ascii_zero )
{
retval += 4;
break;
goto proceed;
}
- if( ch == format_Y )
+ if( ch == ascii_Y )
{
- errpos = gets_year(source, source_end, charmap_checked, ctm);
+ errpos = gets_year(source, source_end, charmap, ctm);
if( errpos > 0 )
{
retval += errpos - 1;
goto proceed;
}
- if( ch == format_M )
+ if( ch == ascii_M )
{
- errpos = gets_month(source, source_end, charmap_checked, ctm);
+ errpos = gets_month(source, source_end, charmap, ctm);
if( errpos > 0 )
{
retval += errpos - 1;
goto proceed;
}
- if( ch == format_D )
+ if( ch == ascii_D )
{
// We have three possibilities: DDD, DD, and D
- if( (unsigned char)format[1] != format_D )
+ if( (unsigned char)format[1] != ascii_D )
{
// A singleton 'D' is a day-of-week
- errpos = gets_day_of_week(source, source_end, charmap_checked, ctm);
+ errpos = gets_day_of_week(source, source_end, charmap, ctm);
if( errpos > 0)
{
retval += errpos - 1;
}
bump = 1;
}
- else if( (unsigned char)format[2] != format_D )
+ else if( (unsigned char)format[2] != ascii_D )
{
// This is DD, for day-of-month
- errpos = gets_day(source, source_end, charmap_checked, ctm);
+ errpos = gets_day(source, source_end, charmap, ctm);
if( errpos > 0)
{
retval += errpos - 1;
{
// Arriving here means that it is DDD, for day-of-year
// This is DD, for day-of-month
- errpos = gets_day_of_year(source, source_end, charmap_checked, ctm);
+ errpos = gets_day_of_year(source, source_end, charmap, ctm);
if( errpos > 0)
{
retval += errpos - 1;
goto proceed;
}
- if( ch == format_w )
+ if( ch == ascii_w )
{
- errpos = gets_week(source, source_end, charmap_checked, ctm);
+ errpos = gets_week(source, source_end, charmap, ctm);
if( errpos > 0 )
{
retval += errpos - 1;
goto proceed;
}
- if( ch == format_h )
+ if( ch == ascii_h )
{
- errpos = gets_hours(source, source_end, charmap_checked, ctm, in_offset);
+ errpos = gets_hours(source, source_end, charmap, ctm, in_offset);
if( errpos > 0 )
{
retval += errpos - 1;
goto proceed;
}
- if( ch == format_m )
+ if( ch == ascii_m )
{
- errpos = gets_minutes(source, source_end, charmap_checked, ctm, in_offset);
+ errpos = gets_minutes(source, source_end, charmap, ctm, in_offset);
if( errpos > 0 )
{
retval += errpos - 1;
goto proceed;
}
- if( ch == format_s && !in_nanoseconds )
+ if( ch == ascii_s && !in_nanoseconds )
{
- errpos = gets_seconds(source, source_end, charmap_checked, ctm);
+ errpos = gets_seconds(source, source_end, charmap, ctm);
if( errpos > 0 )
{
retval += errpos - 1;
goto proceed;
}
- if( ch == format_s && in_nanoseconds )
+ if( ch == ascii_s && in_nanoseconds )
{
// Peel off digits to the right of the decimal point one at a time
errpos = gets_nanoseconds(format,
source,
source_end,
ctm,
- charmap_format,
- charmap_checked);
+ charmap,
+ charmap);
if( errpos > 0 )
{
retval += errpos - 1;
goto proceed;
}
- if( ch == format_Z || ch == format_z )
+ if( ch == ascii_Z || ch == ascii_z )
{
// This has to be the end of the road
- if( (unsigned char)source[0] != format_Z
- && (unsigned char)source[0] != format_z )
+ if( (unsigned char)source[0] != ascii_Z
+ && (unsigned char)source[0] != ascii_z )
{
retval += 0;
break;
{
// This means we processed the entire format string without seeing an error
retval = 0;
-
// Otherwise, either the format or source was too short
}
+ free(par1_c);
+ free(par2_c);
return retval;
}
}
else
{
- retval = (double)(ctm.hh * 3600 + ctm.mm * 60 + ctm.ss) + ctm.nanoseconds/1000000000.;
+ retval = (double)(ctm.hh * 3600 + ctm.mm * 60 + ctm.ss)
+ + ctm.nanoseconds/1000000000.;
}
__gg__double_to_target( dest,
retval,
size_t field_offset,
size_t field_size)
{
- charmap_t *charmap = __gg__get_charmap(dest->encoding);
+ // We are going to build the hex string up here, in ascii, and convert to the
+ // the destination encoding at the end.
+
static const char hex[17] = "0123456789ABCDEF";
- size_t bytes = field_size;
- __gg__adjust_dest_size(dest, 2*bytes);
- for(size_t i=0; i<bytes; i++)
+
+ // Dest size is two hex characters per input byte.
+ size_t build_size = 2 * field_size;
+
+ // Build up the hex string in ascii:
+ char *build = static_cast<char *>(malloc(build_size));
+ massert(build);
+ for(size_t i=0; i<field_size; i++)
{
unsigned char byte = (field->data+field_offset)[i];
- dest->data[2*i ] = charmap->mapped_character(hex[byte>>4]);
- dest->data[2*i+1] = charmap->mapped_character(hex[byte&0xF]);
+ build[2*i ] = hex[byte>>4];
+ build[2*i+1] = hex[byte&0xF];
}
+ // Convert the hex string to the destination encoding:
+ size_t converted_bytes;
+ const char *converted = __gg__iconverter(DEFAULT_SOURCE_ENCODING,
+ dest->encoding,
+ build,
+ build_size,
+ &converted_bytes);
+ // And put it into place:
+ __gg__adjust_dest_size(dest, converted_bytes);
+ memcpy(dest->data, converted, converted_bytes);
+ free(build);
}
extern "C"
&nbytes);
GCOB_FP128 value = 0;
const char *data = converted;
- const char *data_end = data + source_size;
+ const char *data_end = data + nbytes;
int error = floating_format_tester( data,
data_end);
- if( error || source_size >= 256 )
+ if( error || nbytes >= 256 )
{
exception_raise(ec_argument_function_e);
}
&nbytes);
const char *data = converted;
- const char *data_end = data + source_size;
+ const char *data_end = data + nbytes;
int error = floating_format_tester( data,
data_end);
__gg__int128_to_field(dest,
strcpy(ach, nl_langinfo(D_FMT));
strftime(ach, sizeof(ach), nl_langinfo(D_FMT), &tm);
}
- __gg__convert_encoding(ach,
- DEFAULT_SOURCE_ENCODING,
- dest->encoding);
- memcpy(dest->data, ach, strlen(ach));
- __gg__adjust_dest_size(dest, strlen(ach));
+ size_t bytes_converted;
+ char *converted = __gg__miconverter(__gg__console_encoding,
+ dest->encoding,
+ ach,
+ strlen(ach),
+ &bytes_converted);
+ __gg__field_from_string(dest, 0, dest->capacity, converted, bytes_converted);
+ __gg__adjust_dest_size(dest, bytes_converted);
+ free(converted);
}
extern "C"
strftime(ach, sizeof(ach), nl_langinfo(T_FMT), &tm);
}
- __gg__convert_encoding(ach,
- DEFAULT_SOURCE_ENCODING,
- dest->encoding);
- memcpy(dest->data, ach, strlen(ach));
- __gg__adjust_dest_size(dest, strlen(ach));
+ size_t bytes_converted;
+ char *converted = __gg__miconverter(__gg__console_encoding,
+ dest->encoding,
+ ach,
+ strlen(ach),
+ &bytes_converted);
+ __gg__field_from_string(dest, 0, dest->capacity, converted, bytes_converted);
+ __gg__adjust_dest_size(dest, bytes_converted);
+ free(converted);
}
extern "C"
tm.tm_sec = seconds % 100;
strftime(ach, sizeof(ach), nl_langinfo(T_FMT), &tm);
}
-
- __gg__convert_encoding(ach,
- DEFAULT_SOURCE_ENCODING,
- dest->encoding);
- memcpy(dest->data, ach, strlen(ach));
- __gg__adjust_dest_size(dest, strlen(ach));
+ size_t bytes_converted;
+ char *converted = __gg__miconverter(__gg__console_encoding,
+ dest->encoding,
+ ach,
+ strlen(ach),
+ &bytes_converted);
+ __gg__field_from_string(dest, 0, dest->capacity, converted, bytes_converted);
+ __gg__adjust_dest_size(dest, bytes_converted);
+ free(converted);
}
#include <string>
#include <unordered_map>
#include <vector>
+#include <cwctype>
#include <dirent.h>
#include <dlfcn.h>
#include "exceptl.h"
#include "stringbin.h"
+#define NO_RDIGITS (0)
+
+// Forward reference:
+extern "C"
+int
+__gg__move( cblc_field_t *fdest,
+ size_t dest_offset,
+ size_t dest_size,
+ cblc_field_t *fsource,
+ size_t source_offset,
+ size_t source_size,
+ int source_flags,
+ cbl_round_t rounded );
+
/* BSD extension. */
#if !defined(LOG_PERROR)
int __gg__nop = 0 ;
int __gg__main_called = 0 ;
void *__gg__entry_label = NULL ;
-cbl_encoding_t __gg__console_encoding = no_encoding_e ;
// During SORT operations, we don't want the end-of-file condition, which
// happens as a matter of course, from setting the EOF exception condition.
// nested PERFORM PROC statements.
void *__gg__exit_address = NULL;
+// This is the encoding used for sorting tables and files
+static cbl_encoding_t encoding_for_sort;
+
/*
* ec_status_t represents the runtime exception condition status for
* any statement. There are 4 states:
cbl_encoding_t rt_display_encoding;
cbl_encoding_t rt_national_encoding;
char *rt_program_name;
+ cbl_char_t rt_working_init;
+ cbl_char_t rt_local_init;
program_state() : rt_currency_signs(256)
{
rt_quote_character = ascii_dquote ; // Change this with APOST
rt_low_value_character = DEGENERATE_LOW_VALUE ;
rt_high_value_character = DEGENERATE_HIGH_VALUE ;
+ rt_working_init = NOT_A_CHARACTER ;
+ rt_local_init = NOT_A_CHARACTER ;
// Set all the currency_sign pointers to NULL:
rt_national_encoding = ps.rt_national_encoding ;
rt_collation = ps.rt_collation ;
rt_program_name = ps.rt_program_name ;
+ rt_working_init = ps.rt_working_init ;
+ rt_local_init = ps.rt_local_init ;
}
};
__gg__display_encoding = program_states.back().rt_display_encoding ;
__gg__national_encoding = program_states.back().rt_national_encoding ;
__gg__currency_signs = program_states.back().rt_currency_signs ;
- }
-
-static
-int
-cstrncmp( char const * const left_,
- char const * const right_,
- size_t count)
- {
- const char *left = left_;
- const char *right = right_;
- // This is the version of strncmp() that uses the current collation
+ __gg__working_init = program_states.back().rt_working_init ;
+ __gg__local_init = program_states.back().rt_local_init ;
- // It also is designed to handle strings with embedded NUL characters, so
- // it treats NULs like any other characters.
- int retval = 0;
- while( count-- )
- {
- unsigned char chl = *left++;
- unsigned char chr = *right++;
- retval = chl - chr;
- if( retval )
- {
- break;
- }
- }
- return retval;
}
extern "C"
__gg__decimal_separator = ascii_period ;
}
-extern "C"
-void
-__gg__init_program_state(cbl_encoding_t display_encoding,
- cbl_encoding_t national_encoding)
+static __int128
+edited_to_binary( const cblc_field_t *field,
+ char *ps_,
+ int length,
+ int *rdigits)
{
- // This routine gets called at DATA DIVISION time.
+ charmap_t *charmap = __gg__get_charmap(field->encoding);
- __gg__display_encoding = display_encoding;
- __gg__national_encoding = national_encoding;
+ const unsigned char *ps = const_cast<const unsigned char *>(PTRCAST(unsigned char, ps_));
+ // This routine is used for converting NumericEdited strings to
+ // binary.
- // We need to make sure that the program_states vector has at least one
- // entry in it. This happens when we are the very first PROGRAM-ID called
- // in this module.
- if( program_states.empty() )
- {
- initialize_program_state();
- }
- }
+ // Numeric edited strings can have all kinds of crap in them: spaces,
+ // slashes, dollar signs...you name it. It might have a minus sign at
+ // the beginning or end, or it might have CR or DB at the end.
-static int
-var_is_refmod( const cblc_field_t *var )
- {
- return (var->attr & refmod_e) != 0;
- }
+ // We are going to look for a minus sign, D (or d) and use that to flag the
+ // result as negative. We are going to look for a decimal point and count up
+ // the numerical digits to the right of it. And we are going to pretend
+ // that nothing else matters.
-extern "C"
-__int128
-__gg__power_of_ten(int n)
- {
- // 2** 64 = 1.8E19
- // 2**128 = 3.4E38
- __int128 retval = 1;
- static const int MAX_POWER = 19 ;
- static const __int128 pos[MAX_POWER+1] =
- {
- 1ULL, // 00
- 10ULL, // 01
- 100ULL, // 02
- 1000ULL, // 03
- 10000ULL, // 04
- 100000ULL, // 05
- 1000000ULL, // 06
- 10000000ULL, // 07
- 100000000ULL, // 08
- 1000000000ULL, // 09
- 10000000000ULL, // 10
- 100000000000ULL, // 11
- 1000000000000ULL, // 12
- 10000000000000ULL, // 13
- 100000000000000ULL, // 14
- 1000000000000000ULL, // 15
- 10000000000000000ULL, // 16
- 100000000000000000ULL, // 17
- 1000000000000000000ULL, // 18
- 10000000000000000000ULL, // 19
- };
- if( n < 0 || n>MAX_POWER*2) // The most we can handle is 10**38
- {
- fprintf(stderr,
- "Trying to raise 10 to %d as an int128, which we can't do.\n",
- n);
- fprintf(stderr, "The problem is in %s %s:%d.\n", __func__, __FILE__, __LINE__);
- abort();
- }
- if( n <= MAX_POWER )
+ int hyphen = 0;
+ *rdigits = 0;
+
+ // index into the ps string
+ int index = 0;
+
+ // Create a delta_r for counting digits to the right of
+ // any decimal point. If and when we encounter a decimal point,
+ // we'll set this to one, otherwise it'll stay zero.
+ int delta_r = 0;
+
+ __int128 result = 0;
+
+ // We need to check the last two characters. If CR or DB, then the result
+ // is negative:
+ if( length >= 2)
{
- // Up to 10**18 we do directly:
- retval = pos[n];
+ if( ((ps[length-2]&0xFF) == charmap->mapped_character(ascii_D)
+ || (ps[length-2]&0xFF) == charmap->mapped_character(ascii_d))
+ && ((ps[length-1]&0xFF) == charmap->mapped_character(ascii_B)
+ || (ps[length-1]&0xFF) == charmap->mapped_character(ascii_b)) )
+ {
+ hyphen = 1;
+ }
+ else if( ((ps[length-2]&0xFF) == charmap->mapped_character(ascii_C)
+ || (ps[length-2]&0xFF) == charmap->mapped_character(ascii_c))
+ && ((ps[length-1]&0xFF) == charmap->mapped_character(ascii_R)
+ || (ps[length-1]&0xFF) == charmap->mapped_character(ascii_r)) )
+ {
+ hyphen = 1;
+ }
}
- else
+
+ while( index < length )
{
- // 19 through 38:
- retval = pos[n/2];
- retval *= retval;
- if( n & 1 )
+ unsigned char ch = ps[index++] & 0xFF;
+ if( ch == charmap->mapped_character(__gg__decimal_point) )
{
- retval *= 10;
+ delta_r = 1;
+ continue;
+ }
+ if( ch == charmap->mapped_character(ascii_minus) )
+ {
+ hyphen = 1;
+ continue;
+ }
+
+ if( charmap->mapped_character(ascii_0) <= ch
+ && ch <= charmap->mapped_character(ascii_9) )
+ {
+ result *= 10;
+ // In both EBCDIC and ASCII, this works:
+ result += ch & 0x0F ;
+ *rdigits += delta_r ;
+ continue;
}
}
- return retval;
+
+ if( hyphen )
+ {
+ result = -result;
+ }
+ return result;
}
-extern "C"
+static
__int128
-__gg__scale_by_power_of_ten_1(__int128 value, int N)
+big_endian_to_binary_signed(
+ const unsigned char *psource,
+ int capacity
+)
{
- // This routine is called when the result of the scaling is not allowed to
- // have non-zero rdigits. __gg__rdigits is set to 1 when the result is
- // in the bad zone. The ultimate caller needs to examine __gg__rdigits to
- // decide what to do about it.
+ // This subroutine takes a big-endian value of "capacity" bytes and
+ // converts it to a signed INT128. The highest order bit of the big-endian
+ // value determines whether or not the highest-order bits of the INT128
+ // return value are off or on.
- // This is a separate routine because of the performance hit caused by the
- // value % pot operation, which is needed only when certain EC checking is
- // turned on.
- if( N > 0 )
+ __int128 retval;
+ if( *psource >= 128 )
{
- __gg__rdigits = 0;
- value *= __gg__power_of_ten(N);
+ retval = -1;
}
- else if( N < 0)
+ else
{
- // We throwing away the N rightmost digits. Use __gg__rdigits
- // to let the calling chain know they were non-zero:
- __int128 pot = __gg__power_of_ten(-N);
- if( value % pot)
- {
- __gg__rdigits = 1;
- }
- else
- {
- __gg__rdigits = 0;
- }
-
- value /= pot;
+ retval = 0;
}
- else
+
+ // move the bytes of psource into retval, flipping them end-to-end
+ unsigned char *dest = PTRCAST(unsigned char, &retval);
+ while(capacity > 0)
{
- // N is zero
- __gg__rdigits = 0;
+ *dest++ = psource[--capacity];
}
- return value;
+ return retval;
}
-extern "C"
+static
__int128
-__gg__scale_by_power_of_ten_2(__int128 value, int N)
+little_endian_to_binary_signed(
+ const unsigned char *psource,
+ int capacity
+)
{
- if( N > 0 )
+ // This subroutine takes a little-endian value of "capacity" bytes and
+ // converts it to a signed INT128. The highest order bit of the little-endian
+ // value determines whether or not the highest-order bits of the INT128
+ // return value are off or on.
+
+ __int128 result;
+
+ // Set all the bits of the result based on the sign of the source:
+ if( psource[capacity-1] >= 128 )
{
- value *= __gg__power_of_ten(N);
+ result = -1;
}
- else if( N < 0)
+ else
{
- value /= __gg__power_of_ten(-N);
+ result = 0;
}
- return value;
+
+ // Copy the low-order bytes into place:
+ memcpy(&result, psource, capacity);
+ return result;
}
-static bool
-value_is_too_big(const cblc_field_t *var,
- __int128 value,
- int source_rdigits)
+static
+__int128
+little_endian_to_binary_unsigned(
+ const unsigned char *psource,
+ int capacity
+)
{
- // This routine is in support of arithmetic ON SIZE ERROR. It returns
- // TRUE if var hasn't enough bytes to hold the decimal representation
- // of value:
- bool retval = false;
-
- if( !(var->attr & intermediate_e) )
- {
- if( value < 0 )
- {
- value = -value;
- }
- if( var->digits )
- {
- // I don't know how to describe this calculation. I came up with the
- // equation by working a few examples. For instance, if value is 12345 and
- // source_rdigits is two, then we are trying to cram 123.45 into 99v99999
- // and we have a size error. So, digits is 7, rdigits is 5 and source_rdigits
- // 2. That means we compare 12345 with 10^(7 - 5 + 2), which is 12345 versus
- // 10000, which is too big, which means we have a size error.
- retval =
- value >= __gg__power_of_ten( var->digits - var->rdigits + source_rdigits);
- }
- else
- {
- // var->digits is zero. We are dealing with a binary-style number that
- // fills the whole of the value
- if( !( var->type == FldNumericBin5
- || var->type == FldPointer
- || var->type == FldIndex) )
- {
- __gg__abort("value_is_too_big() was given a type it doesn't know about");
- }
- if( var->capacity < 16 )
- {
- __int128 max_possible = 1;
- max_possible = max_possible << (var->capacity * 8);
- retval = value >= max_possible;
- }
- }
- }
+ __int128 result = 0;
- return retval;
+ // Copy the low-order bytes into place:
+ memcpy(&result, psource, capacity);
+ return result;
}
-static void
-binary_to_big_endian( unsigned char *dest,
- int bytes,
- __int128 value
- )
+static
+__int128
+big_endian_to_binary_unsigned(
+ const unsigned char *psource,
+ int capacity
+)
{
- if( value < 0 )
- {
- memset(dest, 0xFF, bytes);
- }
- else
- {
- memset(dest, 0x00, bytes);
- }
+ // This subroutine takes an unsigned big-endian value of "capacity" bytes and
+ // converts it to an INT128.
- dest += bytes-1;
- while( bytes-- )
- {
- *dest-- = (unsigned char) value;
- value >>= 8;
- }
- }
+ __int128 retval = 0 ;
-static void
-binary_to_little_endian( unsigned char *dest,
- int bytes,
- __int128 value
- )
- {
- if( value < 0 )
- {
- memset(dest, 0xFF, bytes);
- }
- else
+ // move the bytes of psource into retval, flipping them end-to-end
+ unsigned char *dest = PTRCAST(unsigned char, &retval);
+ while(capacity > 0)
{
- memset(dest, 0x00, bytes);
+ *dest++ = psource[--capacity];
}
- memcpy(dest, &value, bytes);
+ return retval;
}
-static __int128
-int128_to_int128_rounded( cbl_round_t rounded,
- __int128 value,
- __int128 factor,
- __int128 remainder,
- int *compute_error)
+static
+__int128
+get_binary_value_local( int *rdigits,
+ const cblc_field_t *resolved_var,
+ unsigned char *resolved_location,
+ size_t resolved_length)
{
- // value is signed, and is scaled to the target
- GCOB_FP128 fpart = ((GCOB_FP128)remainder) / ((GCOB_FP128)factor);
- __int128 retval = value;
+ __int128 retval = 0;
- if(rounded == nearest_even_e
- && fpart != GCOB_FP128_LITERAL (-0.5)
- && fpart != GCOB_FP128_LITERAL (0.5))
+ switch( resolved_var->type )
{
- // "bankers rounding" has been requested.
- //
- // Since the fraction is not 0.5, this is an ordinary rounding
- // problem
- rounded = nearest_away_from_zero_e;
- }
+ case FldLiteralA :
+ fprintf(stderr, "%s(): is trying to handle a FldLiteralA\n", __func__);
+ abort();
+ break;
- switch(rounded)
- {
- case truncation_e:
+ case FldGroup :
+ case FldAlphanumeric :
+ // Read the data area as a dirty string:
+ retval = __gg__dirty_to_binary(
+ PTRCAST(const char, resolved_location),
+ resolved_var->encoding,
+ resolved_length,
+ rdigits );
break;
- case nearest_away_from_zero_e:
+ case FldNumericDisplay:
{
- // This is ordinary rounding, like you learned in grade school
- // 0.0 through 0.4 becomes 0
- // 0.5 through 0.9 becomes 1
- if( value < 0 )
+ *rdigits = resolved_var->rdigits;
+ if( resolved_location[resolved_length-1] == DEGENERATE_HIGH_VALUE )
{
- if( fpart <= GCOB_FP128_LITERAL(-0.5) )
- {
- retval -= 1;
- }
+ // This is a degenerate case, which violates the language
+ // specification, but nonetheless seems to be a thing. By
+ // default, HIGH-VALUE is usually assumed to be 0xFF. This is
+ // not necessarily true; HIGH-VALUE can be changed by the
+ // SPECIAL-NAMES ALPHABET clause. Furthermore, by definition,
+ // HIGH-VALUE applies *only* to text literals. However, there
+ // seems to be code out in the universe that wants to be able
+ // to compare NumericDisplay values that have been set to
+ // HIGH-VALUE. Consider, for example, code that reads from
+ // a disk file which sets the input field to HIGH-VALUE upon
+ // an end-of-file condition.
+
+ // This code detects that particular condition, and sets the
+ // resulting binary number to the maximum possible positive
+ // value.
+
+ // Turn all the bits on
+ memset( &retval, 0xFF, sizeof(retval) );
+
+ // Make it positive by turning off the highest order bit:
+ (PTRCAST(unsigned char, &retval))[sizeof(retval)-1] = 0x3F;
}
else
{
- if( fpart >= GCOB_FP128_LITERAL(0.5) )
+ const charmap_t *charmap = __gg__get_charmap(resolved_var->encoding);
+ int stride = charmap->stride();
+ unsigned char *digits;
+ unsigned char *sign_byte_location;
+ int ndigits;
+ if( resolved_var->attr & signable_e )
{
- retval += 1;
+ // Pick up the sign byte, and force our value to be positive
+ if( (resolved_var->attr & separate_e )
+ && (resolved_var->attr & leading_e ) )
+ {
+ // LEADING SEPARATE
+ digits = resolved_location+stride;
+ sign_byte_location = resolved_location;
+ ndigits = resolved_length - stride;
+ }
+ else if( (resolved_var->attr & separate_e)
+ && !(resolved_var->attr & leading_e ) )
+ {
+ // TRAILING SEPARATE
+ digits = resolved_location;
+ sign_byte_location = resolved_location + resolved_length - stride;
+ ndigits = resolved_length - stride;
+ }
+ else if( (resolved_var->attr & leading_e) )
+ {
+ // LEADING
+ digits = resolved_location;
+ sign_byte_location = resolved_location;
+ ndigits = resolved_length;
+ }
+ else // if( !(resolved_var->attr & leading_e) )
+ {
+ // TRAILING
+ digits = resolved_location;
+ sign_byte_location = resolved_location + resolved_length - stride;
+ ndigits = resolved_length;
+ }
+ }
+ else
+ {
+ digits = resolved_location;
+ sign_byte_location = resolved_location;
+ ndigits = resolved_length;
}
+ ndigits /= stride;
+ retval = __gg__numeric_display_to_binary(sign_byte_location,
+ digits,
+ ndigits,
+ resolved_var->encoding);
}
break;
}
- case away_from_zero_e:
- {
- // zero stays zero, otherwise head for the next number away from zero
- if( value < 0 )
+ case FldNumericEdited :
+ retval = edited_to_binary(resolved_var,
+ PTRCAST(char, resolved_location),
+ resolved_length,
+ rdigits);
+ break;
+
+ case FldNumericBinary :
+ if( resolved_var->attr & signable_e)
{
- if( fpart != 0 )
- {
- retval -= 1;
- }
+ retval = big_endian_to_binary_signed(
+ PTRCAST(const unsigned char, resolved_location),
+ resolved_length);
}
else
{
- if( fpart != 0 )
- {
- retval += 1;
- }
+ retval = big_endian_to_binary_unsigned(
+ PTRCAST(const unsigned char, resolved_location),
+ resolved_length);
}
+ *rdigits = resolved_var->rdigits;
break;
- }
- case nearest_toward_zero_e:
+ case FldLiteralN:
{
- // 0.0 through 0.5 becomes 0
- // 0.6 through 0.9 becomes 1
- if( value < 0 )
+ if( resolved_var->attr & signable_e)
{
- if( fpart < GCOB_FP128_LITERAL(-0.5) )
- {
- retval -= 1;
- }
+ retval = little_endian_to_binary_signed(resolved_var->data,
+ resolved_var->capacity);
}
else
{
- if( fpart > GCOB_FP128_LITERAL(0.5) )
- {
- retval += 1;
- }
+ retval = little_endian_to_binary_unsigned(resolved_var->data,
+ resolved_var->capacity);
}
+ *rdigits = resolved_var->rdigits;
break;
}
- case toward_greater_e:
- {
- if( value > 0 )
+ case FldNumericBin5:
+ case FldIndex:
+ case FldPointer:
+ if( resolved_var->attr & signable_e)
{
- if( fpart != 0 )
- {
- retval += 1;
- }
+ retval = little_endian_to_binary_signed(
+ PTRCAST(const unsigned char, resolved_location),
+ resolved_length);
}
- break;
- }
-
- case toward_lesser_e:
- {
- if( value < 0 )
+ else
{
- if(fpart != 0)
- {
- retval -= 1;
- }
+ retval = little_endian_to_binary_unsigned(
+ PTRCAST(const unsigned char, resolved_location),
+ resolved_length);
}
+ *rdigits = resolved_var->rdigits;
break;
- }
- case nearest_even_e:
+ case FldPacked:
{
- // This is "banker's rounding"
- // 3.4 -> 3.0
- // 3.5 -> 4.0
- // 3.6 -> 4.0
-
- // 4.4 -> 4.0
- // 4.5 -> 4.0
- // 4.6 -> 5.0
+ *rdigits = resolved_var->rdigits;
+ retval = __gg__packed_to_binary(resolved_location,
+ resolved_length);
+ break;
+ }
+ }
- // We know that the fractional part is 0.5 or -0.5, and we know that
- // we want 3 to become 4 and for 4 to stay 4.
+ if( resolved_var->attr & scaled_e )
+ {
+ // Here's where we handle a P-scaled number.
- if( value < 0 )
+ if( resolved_var->rdigits >= 0)
{
- if( retval & 1 )
- {
- retval -= 1;
- }
+ // We might be dealing with a source with a PICTURE string of
+ // PPPPPP999, which means retval is a three-digit number
+ // and resolved_var->rdigits is +6. That means we need to divide retval
+ // by 10**9, and we need to make rdigits 9
+ *rdigits = resolved_var->digits + resolved_var->rdigits;
}
else
{
- if( retval & 1 )
- {
- retval += 1;
- }
- }
- break;
+ // We have a source with a PIC string like 999PPPPPP, which is
+ // a capacity of 3 and a resolved_var->rdigits of -6. We need to multiply
+ // retval by +6, and make rdigits zero:
+ retval *= __gg__power_of_ten( -resolved_var->rdigits );
+ *rdigits = 0;
}
+ }
- case prohibited_e:
- {
- if( fpart != 0 )
- {
- *compute_error |= compute_error_truncate;
- }
+ return retval;
+ }
- break;
+static uint32_t
+get_init_value(cblc_field_t *field)
+ {
+ uint32_t retval = 0;
+ cbl_figconst_t figconst = (cbl_figconst_t)(field->attr & FIGCONST_MASK);
+ if( figconst )
+ {
+ switch(figconst)
+ {
+ case normal_value_e :
+ // This is not possible, it says here in the fine print.
+ abort();
+ break;
+ case low_value_e :
+ retval = __gg__low_value_character;
+ break;
+ case zero_value_e :
+ retval = ascii_zero;
+ break;
+ case space_value_e :
+ retval = ascii_space;
+ break;
+ case quote_value_e :
+ retval = __gg__quote_character;
+ break;
+ case high_value_e :
+ retval = __gg__high_value_character;
+ break;
+ case null_value_e:
+ retval = 0x00;
+ break;
}
-
- default:
- abort();
- break;
+ }
+ else
+ {
+ int rdigits;
+ retval = get_binary_value_local(&rdigits,
+ field,
+ field->data,
+ field->capacity
+ );
}
return retval;
}
-static __int128
-f128_to_i128_rounded( cbl_round_t rounded,
- GCOB_FP128 value,
- int *compute_error)
+extern "C"
+void __gg__initialization_values( uint32_t wsclear,
+ cblc_field_t *working_init,
+ cblc_field_t *local_init)
{
- // value is signed, and is scaled to the target
- GCOB_FP128 ipart;
- GCOB_FP128 fpart = FP128_FUNC(modf)(value, &ipart);
- __int128 retval = (__int128)ipart;
+ __gg__wsclear = wsclear;
+ __gg__working_init = NOT_A_CHARACTER;
+ __gg__local_init = NOT_A_CHARACTER;
- if(rounded == nearest_even_e
- && fpart != GCOB_FP128_LITERAL (-0.5)
- && fpart != GCOB_FP128_LITERAL (0.5))
+ if( working_init )
{
- // "bankers rounding" has been requested.
- //
- // Since the fraction is not 0.5, this is an ordinary rounding
- // problem
- rounded = nearest_away_from_zero_e;
+ __gg__working_init = get_init_value(working_init);
+ }
+ if( local_init )
+ {
+ __gg__local_init = get_init_value(local_init);
}
+ program_states.back().rt_working_init = __gg__working_init;
+ program_states.back().rt_local_init = __gg__local_init;
+ }
- switch(rounded)
+extern "C"
+void
+__gg__init_program_state(cbl_encoding_t display_encoding,
+ cbl_encoding_t national_encoding)
+ {
+ // This routine gets called at DATA DIVISION time.
+
+ __gg__display_encoding = display_encoding;
+ __gg__national_encoding = national_encoding;
+
+ // We need to make sure that the program_states vector has at least one
+ // entry in it. This happens when we are the very first PROGRAM-ID called
+ // in this module.
+ if( program_states.empty() )
{
- case truncation_e:
- break;
+ initialize_program_state();
+ }
+ }
- case nearest_away_from_zero_e:
- {
- // This is ordinary rounding, like you learned in grade school
- // 0.0 through 0.4 becomes 0
- // 0.5 through 0.9 becomes 1
- if( value < 0 )
- {
- if( fpart <= GCOB_FP128_LITERAL (-0.5) )
- {
- retval -= 1;
- }
- }
- else
- {
- if( fpart >= GCOB_FP128_LITERAL (0.5) )
- {
- retval += 1;
- }
- }
- break;
- }
+static int
+var_is_refmod( const cblc_field_t *var )
+ {
+ return (var->attr & refmod_e) != 0;
+ }
- case away_from_zero_e:
+extern "C"
+__int128
+__gg__power_of_ten(int n)
+ {
+ // 2** 64 = 1.8E19
+ // 2**128 = 3.4E38
+ __int128 retval = 1;
+ static const int MAX_POWER = 19 ;
+ static const __int128 pos[MAX_POWER+1] =
+ {
+ 1ULL, // 00
+ 10ULL, // 01
+ 100ULL, // 02
+ 1000ULL, // 03
+ 10000ULL, // 04
+ 100000ULL, // 05
+ 1000000ULL, // 06
+ 10000000ULL, // 07
+ 100000000ULL, // 08
+ 1000000000ULL, // 09
+ 10000000000ULL, // 10
+ 100000000000ULL, // 11
+ 1000000000000ULL, // 12
+ 10000000000000ULL, // 13
+ 100000000000000ULL, // 14
+ 1000000000000000ULL, // 15
+ 10000000000000000ULL, // 16
+ 100000000000000000ULL, // 17
+ 1000000000000000000ULL, // 18
+ 10000000000000000000ULL, // 19
+ };
+ if( n < 0 || n>MAX_POWER*2) // The most we can handle is 10**38
+ {
+ fprintf(stderr,
+ "Trying to raise 10 to %d as an int128, which we can't do.\n",
+ n);
+ fprintf(stderr, "The problem is in %s %s:%d.\n", __func__, __FILE__, __LINE__);
+ abort();
+ }
+ if( n <= MAX_POWER )
+ {
+ // Up to 10**18 we do directly:
+ retval = pos[n];
+ }
+ else
+ {
+ // 19 through 38:
+ retval = pos[n/2];
+ retval *= retval;
+ if( n & 1 )
{
- // zero stays zero, otherwise head for the next number away from zero
- if( value < 0 )
- {
- if( fpart != 0 )
- {
- retval -= 1;
- }
- }
- else
- {
- if( fpart != 0 )
- {
- retval += 1;
- }
- }
- break;
+ retval *= 10;
}
+ }
+ return retval;
+ }
- case nearest_toward_zero_e:
- {
- // 0.0 through 0.5 becomes 0
- // 0.6 through 0.9 becomes 1
- if( value < 0 )
- {
- if( fpart < GCOB_FP128_LITERAL (-0.5) )
- {
- retval -= 1;
- }
- }
- else
- {
- if( fpart > GCOB_FP128_LITERAL (0.5) )
- {
- retval += 1;
- }
+extern "C"
+__int128
+__gg__scale_by_power_of_ten_1(__int128 value, int N)
+ {
+ // This routine is called when the result of the scaling is not allowed to
+ // have non-zero rdigits. __gg__rdigits is set to 1 when the result is
+ // in the bad zone. The ultimate caller needs to examine __gg__rdigits to
+ // decide what to do about it.
+
+ // This is a separate routine because of the performance hit caused by the
+ // value % pot operation, which is needed only when certain EC checking is
+ // turned on.
+ if( N > 0 )
+ {
+ __gg__rdigits = 0;
+ value *= __gg__power_of_ten(N);
+ }
+ else if( N < 0)
+ {
+ // We throwing away the N rightmost digits. Use __gg__rdigits
+ // to let the calling chain know they were non-zero:
+ __int128 pot = __gg__power_of_ten(-N);
+ if( value % pot)
+ {
+ __gg__rdigits = 1;
+ }
+ else
+ {
+ __gg__rdigits = 0;
+ }
+
+ value /= pot;
+ }
+ else
+ {
+ // N is zero
+ __gg__rdigits = 0;
+ }
+ return value;
+ }
+
+extern "C"
+__int128
+__gg__scale_by_power_of_ten_2(__int128 value, int N)
+ {
+ if( N > 0 )
+ {
+ value *= __gg__power_of_ten(N);
+ }
+ else if( N < 0)
+ {
+ value /= __gg__power_of_ten(-N);
+ }
+ return value;
+ }
+
+static bool
+value_is_too_big(const cblc_field_t *var,
+ __int128 value,
+ int source_rdigits)
+ {
+ // This routine is in support of arithmetic ON SIZE ERROR. It returns
+ // TRUE if var hasn't enough bytes to hold the decimal representation
+ // of value:
+ bool retval = false;
+
+ if( !(var->attr & intermediate_e) )
+ {
+ if( value < 0 )
+ {
+ value = -value;
+ }
+ if( var->digits )
+ {
+ // I don't know how to describe this calculation. I came up with the
+ // equation by working a few examples. For instance, if value is 12345 and
+ // source_rdigits is two, then we are trying to cram 123.45 into 99v99999
+ // and we have a size error. So, digits is 7, rdigits is 5 and source_rdigits
+ // 2. That means we compare 12345 with 10^(7 - 5 + 2), which is 12345 versus
+ // 10000, which is too big, which means we have a size error.
+ retval =
+ value >= __gg__power_of_ten( var->digits - var->rdigits + source_rdigits);
+ }
+ else
+ {
+ // var->digits is zero. We are dealing with a binary-style number that
+ // fills the whole of the value
+ if( !( var->type == FldNumericBin5
+ || var->type == FldPointer
+ || var->type == FldIndex) )
+ {
+ __gg__abort("value_is_too_big() was given a type it doesn't know about");
+ }
+ if( var->capacity < 16 )
+ {
+ __int128 max_possible = 1;
+ max_possible = max_possible << (var->capacity * 8);
+ retval = value >= max_possible;
+ }
+ }
+ }
+
+ return retval;
+ }
+
+static void
+binary_to_big_endian( unsigned char *dest,
+ int bytes,
+ __int128 value
+ )
+ {
+ if( value < 0 )
+ {
+ memset(dest, 0xFF, bytes);
+ }
+ else
+ {
+ memset(dest, 0x00, bytes);
+ }
+
+ dest += bytes-1;
+ while( bytes-- )
+ {
+ *dest-- = (unsigned char) value;
+ value >>= 8;
+ }
+ }
+
+static void
+binary_to_little_endian( unsigned char *dest,
+ int bytes,
+ __int128 value
+ )
+ {
+ if( value < 0 )
+ {
+ memset(dest, 0xFF, bytes);
+ }
+ else
+ {
+ memset(dest, 0x00, bytes);
+ }
+ memcpy(dest, &value, bytes);
+ }
+
+static __int128
+int128_to_int128_rounded( cbl_round_t rounded,
+ __int128 value,
+ __int128 factor,
+ __int128 remainder,
+ int *compute_error)
+ {
+ // value is signed, and is scaled to the target
+ GCOB_FP128 fpart = ((GCOB_FP128)remainder) / ((GCOB_FP128)factor);
+ __int128 retval = value;
+
+ if(rounded == nearest_even_e
+ && fpart != GCOB_FP128_LITERAL (-0.5)
+ && fpart != GCOB_FP128_LITERAL (0.5))
+ {
+ // "bankers rounding" has been requested.
+ //
+ // Since the fraction is not 0.5, this is an ordinary rounding
+ // problem
+ rounded = nearest_away_from_zero_e;
+ }
+
+ switch(rounded)
+ {
+ case truncation_e:
+ break;
+
+ case nearest_away_from_zero_e:
+ {
+ // This is ordinary rounding, like you learned in grade school
+ // 0.0 through 0.4 becomes 0
+ // 0.5 through 0.9 becomes 1
+ if( value < 0 )
+ {
+ if( fpart <= GCOB_FP128_LITERAL(-0.5) )
+ {
+ retval -= 1;
+ }
+ }
+ else
+ {
+ if( fpart >= GCOB_FP128_LITERAL(0.5) )
+ {
+ retval += 1;
+ }
+ }
+ break;
+ }
+
+ case away_from_zero_e:
+ {
+ // zero stays zero, otherwise head for the next number away from zero
+ if( value < 0 )
+ {
+ if( fpart != 0 )
+ {
+ retval -= 1;
+ }
+ }
+ else
+ {
+ if( fpart != 0 )
+ {
+ retval += 1;
+ }
+ }
+ break;
+ }
+
+ case nearest_toward_zero_e:
+ {
+ // 0.0 through 0.5 becomes 0
+ // 0.6 through 0.9 becomes 1
+ if( value < 0 )
+ {
+ if( fpart < GCOB_FP128_LITERAL(-0.5) )
+ {
+ retval -= 1;
+ }
+ }
+ else
+ {
+ if( fpart > GCOB_FP128_LITERAL(0.5) )
+ {
+ retval += 1;
+ }
}
break;
}
return retval;
}
-static void
-int128_to_field(cblc_field_t *var,
- unsigned char *location,
- size_t length,
- __int128 value,
- int source_rdigits,
- enum cbl_round_t rounded,
- int *compute_error)
+static __int128
+f128_to_i128_rounded( cbl_round_t rounded,
+ GCOB_FP128 value,
+ int *compute_error)
{
- // This routine takes a numerical value, and scales and converts it to the
- // target field type.
-
- // It operates in the source codeset space, and converts the final result
- // to the native codeset space
+ // value is signed, and is scaled to the target
+ GCOB_FP128 ipart;
+ GCOB_FP128 fpart = FP128_FUNC(modf)(value, &ipart);
+ __int128 retval = (__int128)ipart;
- switch( var->type )
+ if(rounded == nearest_even_e
+ && fpart != GCOB_FP128_LITERAL (-0.5)
+ && fpart != GCOB_FP128_LITERAL (0.5))
+ {
+ // "bankers rounding" has been requested.
+ //
+ // Since the fraction is not 0.5, this is an ordinary rounding
+ // problem
+ rounded = nearest_away_from_zero_e;
+ }
+
+ switch(rounded)
+ {
+ case truncation_e:
+ break;
+
+ case nearest_away_from_zero_e:
+ {
+ // This is ordinary rounding, like you learned in grade school
+ // 0.0 through 0.4 becomes 0
+ // 0.5 through 0.9 becomes 1
+ if( value < 0 )
+ {
+ if( fpart <= GCOB_FP128_LITERAL (-0.5) )
+ {
+ retval -= 1;
+ }
+ }
+ else
+ {
+ if( fpart >= GCOB_FP128_LITERAL (0.5) )
+ {
+ retval += 1;
+ }
+ }
+ break;
+ }
+
+ case away_from_zero_e:
+ {
+ // zero stays zero, otherwise head for the next number away from zero
+ if( value < 0 )
+ {
+ if( fpart != 0 )
+ {
+ retval -= 1;
+ }
+ }
+ else
+ {
+ if( fpart != 0 )
+ {
+ retval += 1;
+ }
+ }
+ break;
+ }
+
+ case nearest_toward_zero_e:
+ {
+ // 0.0 through 0.5 becomes 0
+ // 0.6 through 0.9 becomes 1
+ if( value < 0 )
+ {
+ if( fpart < GCOB_FP128_LITERAL (-0.5) )
+ {
+ retval -= 1;
+ }
+ }
+ else
+ {
+ if( fpart > GCOB_FP128_LITERAL (0.5) )
+ {
+ retval += 1;
+ }
+ }
+ break;
+ }
+
+ case toward_greater_e:
+ {
+ if( value > 0 )
+ {
+ if( fpart != 0 )
+ {
+ retval += 1;
+ }
+ }
+ break;
+ }
+
+ case toward_lesser_e:
+ {
+ if( value < 0 )
+ {
+ if(fpart != 0)
+ {
+ retval -= 1;
+ }
+ }
+ break;
+ }
+
+ case nearest_even_e:
+ {
+ // This is "banker's rounding"
+ // 3.4 -> 3.0
+ // 3.5 -> 4.0
+ // 3.6 -> 4.0
+
+ // 4.4 -> 4.0
+ // 4.5 -> 4.0
+ // 4.6 -> 5.0
+
+ // We know that the fractional part is 0.5 or -0.5, and we know that
+ // we want 3 to become 4 and for 4 to stay 4.
+
+ if( value < 0 )
+ {
+ if( retval & 1 )
+ {
+ retval -= 1;
+ }
+ }
+ else
+ {
+ if( retval & 1 )
+ {
+ retval += 1;
+ }
+ }
+ break;
+ }
+
+ case prohibited_e:
+ {
+ if( fpart != 0 )
+ {
+ *compute_error |= compute_error_truncate;
+ }
+
+ break;
+ }
+
+ default:
+ abort();
+ break;
+ }
+ return retval;
+ }
+
+static void
+int128_to_field(cblc_field_t *var,
+ unsigned char *location,
+ size_t length,
+ __int128 value,
+ int source_rdigits,
+ enum cbl_round_t rounded,
+ int *compute_error)
+ {
+ // This routine takes a numerical value, and scales and converts it to the
+ // target field type.
+
+ // It operates in the source codeset space, and converts the final result
+ // to the native codeset space
+
+ switch( var->type )
{
case FldFloat:
{
{
case FldGroup:
case FldAlphanumeric:
+ {
// This is sort of a Hail Mary play. We aren't supposed to do this
// conversion if rdigits is non-zero. But we shouldn't have gotten
- // here if rdigits is non-zero. So, we'll just go with the flow.
+ // here if rdigits is non-zero. But we're here, so we'll do the
+ // best we can in case somebody came up with a dialect that allows
+ // the attempt.
// Note that sending a signed value to an alphanumeric strips off
// any plus or minus signs.
memset(location, 0, length);
+ const charmap_t *charmap = __gg__get_charmap(var->encoding);
size_error = __gg__binary_to_string_encoded(
PTRCAST(char, location),
length > MAX_FIXED_POINT_DIGITS
? MAX_FIXED_POINT_DIGITS
- : length,
+ : length/charmap->stride(),
value,
var->encoding);
break;
+ }
case FldNumericDisplay:
if( var->attr & signable_e )
{
charmap_t *charmap = __gg__get_charmap(var->encoding);
+ int stride = charmap->stride();
// Things get exciting when a numeric-display value is signable
if( var->attr & separate_e )
{
// Whether positive or negative, a sign there will be:
- char sign_ch = is_negative ?
+ cbl_char_t sign_ch = is_negative ?
charmap->mapped_character(ascii_minus)
: charmap->mapped_character(ascii_plus) ;
if( var->attr & leading_e )
{
// The sign character goes into the first location
size_error =
- __gg__binary_to_string_encoded(PTRCAST(char, location+1),
- length-1,
- value,
- var->encoding);
- location[0] = sign_ch;
+ __gg__binary_to_string_encoded(
+ PTRCAST(char, location+stride),
+ var->digits,
+ value,
+ var->encoding);
+ charmap->putch(sign_ch, location, (size_t)0);
}
else
{
// The sign character goes into the last location
size_error =
__gg__binary_to_string_encoded(PTRCAST(char, location),
- length-1,
+ var->digits,
value,
var->encoding);
- location[length-1] = sign_ch;
+ charmap->putch(sign_ch, location, length-stride);
}
}
else
// First, convert the binary value to the correct-length string
size_error =
__gg__binary_to_string_encoded(PTRCAST(char, location),
- length,
+ var->digits,
value,
var->encoding);
// If all of the digits are zero, then the result is zero, and
// we have to kill the is_negative flag:
is_negative = false;
- for(size_t i=0; i<length; i++)
+ size_t index = 0;
+ while(index<length)
{
- if( location[i] != charmap->mapped_character(ascii_0) )
+ if( charmap->getch(location, &index)
+ != charmap->mapped_character(ascii_0) )
{
is_negative = true;
break;
}
unsigned char *sign_location =
- var->attr & leading_e ? location : location + length - 1;
-
- *sign_location = charmap->set_digit_negative(*sign_location,
- is_negative);
+ var->attr & leading_e ? location
+ : location + length - stride;
+ cbl_char_t sign_digit = charmap->getch(sign_location,
+ (size_t)0);
+ sign_digit = charmap->set_digit_negative(sign_digit,
+ is_negative);
+ charmap->putch(sign_digit, sign_location, (size_t)0);
}
}
else
// It's a simple positive number
size_error = __gg__binary_to_string_encoded(
PTRCAST(char, location),
- length,
+ var->digits,
value,
var->encoding);
}
charmap_t *charmap = __gg__get_charmap(var->encoding);
if( value == 0 && (var->attr & blank_zero_e) )
{
- memset(location, charmap->mapped_character(ascii_space), length);
+ charmap->memset(location, charmap->mapped_character(ascii_space), length);
}
else
{
DEFAULT_SOURCE_ENCODING,
var->encoding,
PTRCAST(char, location),
- var->capacity,
+ var->capacity/charmap->stride(),
&outlength);
memcpy(location, converted, outlength);
}
result might have a bad high-place digit for a value with an
odd number of places. */
- __int128 mask = __gg__power_of_ten(digits);
- size_error = !!(value / mask);
- value %= mask;
-
- // We are now set up to do the conversion:
- __gg__binary_to_packed(location, digits, value);
-
- // We can put the sign nybble into place at this point. Note that
- // for COMP-6 numbers the sign_nybble value is zero, so the next
- // operation is harmless.
- location[length -1] |= sign_nybble;
-
- // And we're done.
- break;
- }
-
- default:
- fprintf(stderr, "can't convert in %s() %s %d\n",
- __func__,
- var->name,
- var->type);
- abort();
- break;
- }
- if( compute_error )
- {
- *compute_error |= size_error ? compute_error_truncate : 0;
- }
- }
- }
- break;
- }
- }
-
-static __int128
-edited_to_binary( const cblc_field_t *field,
- char *ps_,
- int length,
- int *rdigits)
- {
- charmap_t *charmap = __gg__get_charmap(field->encoding);
-
- const unsigned char *ps = const_cast<const unsigned char *>(PTRCAST(unsigned char, ps_));
- // This routine is used for converting NumericEdited strings to
- // binary.
-
- // Numeric edited strings can have all kinds of crap in them: spaces,
- // slashes, dollar signs...you name it. It might have a minus sign at
- // the beginning or end, or it might have CR or DB at the end.
-
- // We are going to look for a minus sign, D (or d) and use that to flag the
- // result as negative. We are going to look for a decimal point and count up
- // the numerical digits to the right of it. And we are going to pretend
- // that nothing else matters.
-
- int hyphen = 0;
- *rdigits = 0;
-
- // index into the ps string
- int index = 0;
-
- // Create a delta_r for counting digits to the right of
- // any decimal point. If and when we encounter a decimal point,
- // we'll set this to one, otherwise it'll stay zero.
- int delta_r = 0;
-
- __int128 result = 0;
-
- // We need to check the last two characters. If CR or DB, then the result
- // is negative:
- if( length >= 2)
- {
- if( ((ps[length-2]&0xFF) == charmap->mapped_character(ascii_D)
- || (ps[length-2]&0xFF) == charmap->mapped_character(ascii_d))
- && ((ps[length-1]&0xFF) == charmap->mapped_character(ascii_B)
- || (ps[length-1]&0xFF) == charmap->mapped_character(ascii_b)) )
- {
- hyphen = 1;
- }
- else if( ((ps[length-2]&0xFF) == charmap->mapped_character(ascii_C)
- || (ps[length-2]&0xFF) == charmap->mapped_character(ascii_c))
- && ((ps[length-1]&0xFF) == charmap->mapped_character(ascii_R)
- || (ps[length-1]&0xFF) == charmap->mapped_character(ascii_r)) )
- {
- hyphen = 1;
- }
- }
-
- while( index < length )
- {
- unsigned char ch = ps[index++] & 0xFF;
- if( ch == charmap->mapped_character(__gg__decimal_point) )
- {
- delta_r = 1;
- continue;
- }
- if( ch == charmap->mapped_character(ascii_minus) )
- {
- hyphen = 1;
- continue;
- }
-
- if( charmap->mapped_character(ascii_0) <= ch
- && ch <= charmap->mapped_character(ascii_9) )
- {
- result *= 10;
- // In both EBCDIC and ASCII, this works:
- result += ch & 0x0F ;
- *rdigits += delta_r ;
- continue;
- }
- }
-
- if( hyphen )
- {
- result = -result;
- }
- return result;
- }
-
-static
-__int128
-big_endian_to_binary_signed(
- const unsigned char *psource,
- int capacity
-)
- {
- // This subroutine takes a big-endian value of "capacity" bytes and
- // converts it to a signed INT128. The highest order bit of the big-endian
- // value determines whether or not the highest-order bits of the INT128
- // return value are off or on.
-
- __int128 retval;
- if( *psource >= 128 )
- {
- retval = -1;
- }
- else
- {
- retval = 0;
- }
-
- // move the bytes of psource into retval, flipping them end-to-end
- unsigned char *dest = PTRCAST(unsigned char, &retval);
- while(capacity > 0)
- {
- *dest++ = psource[--capacity];
- }
- return retval;
- }
-
-static
-__int128
-little_endian_to_binary_signed(
- const unsigned char *psource,
- int capacity
-)
- {
- // This subroutine takes a little-endian value of "capacity" bytes and
- // converts it to a signed INT128. The highest order bit of the little-endian
- // value determines whether or not the highest-order bits of the INT128
- // return value are off or on.
-
- __int128 result;
-
- // Set all the bits of the result based on the sign of the source:
- if( psource[capacity-1] >= 128 )
- {
- result = -1;
- }
- else
- {
- result = 0;
- }
-
- // Copy the low-order bytes into place:
- memcpy(&result, psource, capacity);
- return result;
- }
-
-static
-__int128
-little_endian_to_binary_unsigned(
- const unsigned char *psource,
- int capacity
-)
- {
- __int128 result = 0;
-
- // Copy the low-order bytes into place:
- memcpy(&result, psource, capacity);
- return result;
- }
-
-static
-__int128
-big_endian_to_binary_unsigned(
- const unsigned char *psource,
- int capacity
-)
- {
- // This subroutine takes an unsigned big-endian value of "capacity" bytes and
- // converts it to an INT128.
-
- __int128 retval = 0 ;
-
- // move the bytes of psource into retval, flipping them end-to-end
- unsigned char *dest = PTRCAST(unsigned char, &retval);
- while(capacity > 0)
- {
- *dest++ = psource[--capacity];
- }
- return retval;
- }
-
-static
-__int128
-get_binary_value_local( int *rdigits,
- const cblc_field_t *resolved_var,
- unsigned char *resolved_location,
- size_t resolved_length)
- {
- __int128 retval = 0;
-
- switch( resolved_var->type )
- {
- case FldLiteralA :
- fprintf(stderr, "%s(): is trying to handle a FldLiteralA\n", __func__);
- abort();
- break;
-
- case FldGroup :
- case FldAlphanumeric :
- // Read the data area as a dirty string:
- retval = __gg__dirty_to_binary(
- PTRCAST(const char, resolved_location),
- resolved_var->encoding,
- resolved_length,
- rdigits );
- break;
-
- case FldNumericDisplay:
- {
- *rdigits = resolved_var->rdigits;
- if( resolved_location[resolved_length-1] == DEGENERATE_HIGH_VALUE )
- {
- // This is a degenerate case, which violates the language
- // specification, but nonetheless seems to be a thing. By
- // default, HIGH-VALUE is usually assumed to be 0xFF. This is
- // not necessarily true; HIGH-VALUE can be changed by the
- // SPECIAL-NAMES ALPHABET clause. Furthermore, by definition,
- // HIGH-VALUE applies *only* to text literals. However, there
- // seems to be code out in the universe that wants to be able
- // to compare NumericDisplay values that have been set to
- // HIGH-VALUE. Consider, for example, code that reads from
- // a disk file which sets the input field to HIGH-VALUE upon
- // an end-of-file condition.
-
- // This code detects that particular condition, and sets the
- // resulting binary number to the maximum possible positive
- // value.
-
- // Turn all the bits on
- memset( &retval, 0xFF, sizeof(retval) );
-
- // Make it positive by turning off the highest order bit:
- (PTRCAST(unsigned char, &retval))[sizeof(retval)-1] = 0x3F;
- }
- else
- {
- unsigned char *digits;
- unsigned char *sign_byte_location;
- int ndigits;
- if( resolved_var->attr & signable_e )
- {
- // Pick up the sign byte, and force our value to be positive
- if( (resolved_var->attr & separate_e )
- && (resolved_var->attr & leading_e ) )
- {
- // LEADING SEPARATE
- digits = resolved_location+1;
- sign_byte_location = resolved_location;
- ndigits = resolved_length - 1;
- }
- else if( (resolved_var->attr & separate_e)
- && !(resolved_var->attr & leading_e ) )
- {
- // TRAILING SEPARATE
- digits = resolved_location;
- sign_byte_location = resolved_location + resolved_length - 1;
- ndigits = resolved_length - 1;
- }
- else if( (resolved_var->attr & leading_e) )
- {
- // LEADING
- digits = resolved_location;
- sign_byte_location = resolved_location;
- ndigits = resolved_length;
- }
- else // if( !(resolved_var->attr & leading_e) )
- {
- // TRAILING
- digits = resolved_location;
- sign_byte_location = resolved_location + resolved_length - 1;
- ndigits = resolved_length;
- }
- }
- else
- {
- digits = resolved_location;
- sign_byte_location = resolved_location;
- ndigits = resolved_length;
- }
- retval = __gg__numeric_display_to_binary(sign_byte_location,
- digits,
- ndigits,
- resolved_var->encoding);
- }
- break;
- }
-
- case FldNumericEdited :
- retval = edited_to_binary(resolved_var,
- PTRCAST(char, resolved_location),
- resolved_length,
- rdigits);
- break;
-
- case FldNumericBinary :
- if( resolved_var->attr & signable_e)
- {
- retval = big_endian_to_binary_signed(
- PTRCAST(const unsigned char, resolved_location),
- resolved_length);
- }
- else
- {
- retval = big_endian_to_binary_unsigned(
- PTRCAST(const unsigned char, resolved_location),
- resolved_length);
- }
- *rdigits = resolved_var->rdigits;
- break;
-
- case FldLiteralN:
- {
- if( resolved_var->attr & signable_e)
- {
- retval = little_endian_to_binary_signed(resolved_var->data,
- resolved_var->capacity);
- }
- else
- {
- retval = little_endian_to_binary_unsigned(resolved_var->data,
- resolved_var->capacity);
- }
- *rdigits = resolved_var->rdigits;
- break;
- }
-
- case FldNumericBin5:
- case FldIndex:
- case FldPointer:
- if( resolved_var->attr & signable_e)
- {
- retval = little_endian_to_binary_signed(
- PTRCAST(const unsigned char, resolved_location),
- resolved_length);
- }
- else
- {
- retval = little_endian_to_binary_unsigned(
- PTRCAST(const unsigned char, resolved_location),
- resolved_length);
- }
- *rdigits = resolved_var->rdigits;
- break;
-
- case FldPacked:
- {
- *rdigits = resolved_var->rdigits;
- retval = __gg__packed_to_binary(resolved_location,
- resolved_length);
- break;
- }
- }
+ __int128 mask = __gg__power_of_ten(digits);
+ size_error = !!(value / mask);
+ value %= mask;
- if( resolved_var->attr & scaled_e )
- {
- // Here's where we handle a P-scaled number.
+ // We are now set up to do the conversion:
+ __gg__binary_to_packed(location, digits, value);
- if( resolved_var->rdigits >= 0)
- {
- // We might be dealing with a source with a PICTURE string of
- // PPPPPP999, which means retval is a three-digit number
- // and resolved_var->rdigits is +6. That means we need to divide retval
- // by 10**9, and we need to make rdigits 9
- *rdigits = resolved_var->digits + resolved_var->rdigits;
- }
- else
- {
- // We have a source with a PIC string like 999PPPPPP, which is
- // a capacity of 3 and a resolved_var->rdigits of -6. We need to multiply
- // retval by +6, and make rdigits zero:
- retval *= __gg__power_of_ten( -resolved_var->rdigits );
- *rdigits = 0;
+ // We can put the sign nybble into place at this point. Note that
+ // for COMP-6 numbers the sign_nybble value is zero, so the next
+ // operation is harmless.
+ location[length -1] |= sign_nybble;
+
+ // And we're done.
+ break;
+ }
+
+ default:
+ fprintf(stderr, "can't convert in %s() %s %d\n",
+ __func__,
+ var->name,
+ var->type);
+ abort();
+ break;
+ }
+ if( compute_error )
+ {
+ *compute_error |= size_error ? compute_error_truncate : 0;
+ }
+ }
}
+ break;
}
-
- return retval;
}
#pragma GCC diagnostic ignored "-Wformat-overflow"
}
extern "C"
-char *
-__gg__get_date_yymmdd(const cblc_field_t *field)
+void
+__gg__field_from_string(cblc_field_t *field,
+ size_t field_o,
+ size_t field_s,
+ const char *string,
+ size_t string_length)
+ {
+ // Warning: field_from_string uses charmap_t, so you can't safely feed it
+ // the results of __gg__iconverter without copying them.
+
+ // The string has to be in the field->encoding. It's legitimate for
+ // string_length to be less than field_s; we will right fill with spaces. And
+ // it can be greater than field_s, in which case __gg__move will truncate.
+
+ cblc_field_t source = {};
+ source.type = FldAlphanumeric;
+ source.encoding = field->encoding;
+ source.data = reinterpret_cast<unsigned char *>
+ (const_cast<char *>(string)),
+ source.capacity = string_length;
+ __gg__move( field, field_o, field_s,
+ &source, source.offset, source.capacity,
+ 0, truncation_e );
+ }
+
+static void
+field_from_ascii(cblc_field_t *field, char *psz)
+ {
+ cblc_field_t source = {};
+ source.type = FldAlphanumeric;
+ source.capacity = strlen(psz);
+ source.data = reinterpret_cast<unsigned char *>(psz);
+ source.encoding = __gg__console_encoding;
+ __gg__move( field, field->offset, field->capacity,
+ &source, source.offset, source.capacity,
+ 0, truncation_e );
+ }
+
+extern "C"
+void
+__gg__get_date_yymmdd(cblc_field_t *field)
{
char ach[32];
local->tm_year % 100,
local->tm_mon+1 % 100,
local->tm_mday % 100 );
- size_t charsout;
- const char *converted = __gg__iconverter(__gg__console_encoding,
- field->encoding,
- ach,
- strlen(ach),
- &charsout);
- return strdup(converted);
+ field_from_ascii(field, ach);
}
extern "C"
-char *
-__gg__get_date_yyyymmdd(const cblc_field_t *field)
+void
+__gg__get_date_yyyymmdd(cblc_field_t *field)
{
char ach[32];
-
time_t t = cobol_time();
const struct tm *local = localtime(&t);
-
sprintf(ach,
"%4.4d%2.2d%2.2d",
local->tm_year + 1900,
local->tm_mon+1,
local->tm_mday);
-
- size_t charsout;
- const char *converted = __gg__iconverter(__gg__console_encoding,
- field->encoding,
- ach,
- strlen(ach),
- &charsout);
- return strdup(converted);
+ field_from_ascii(field, ach);
}
extern "C"
-char *
-__gg__get_date_yyddd(const cblc_field_t *field)
+void
+__gg__get_date_yyddd(cblc_field_t *field)
{
char ach[32];
"%2.2d%3.3d",
local->tm_year % 100,
local->tm_yday+1);
-
- size_t charsout;
- const char *converted = __gg__iconverter(__gg__console_encoding,
- field->encoding,
- ach,
- strlen(ach),
- &charsout);
- return strdup(converted);
+ field_from_ascii(field, ach);
}
extern "C"
-char *
-__gg__get_yyyyddd(const cblc_field_t *field)
+void
+__gg__get_yyyyddd(cblc_field_t *field)
{
char ach[32];
"%4.4d%3.3d",
local->tm_year + 1900,
local->tm_yday+1);
-
- size_t charsout;
- const char *converted = __gg__iconverter(__gg__console_encoding,
- field->encoding,
- ach,
- strlen(ach),
- &charsout);
- return strdup(converted);
+ field_from_ascii(field, ach);
}
extern "C"
-char *
-__gg__get_date_dow(const cblc_field_t *field)
+void
+__gg__get_date_dow(cblc_field_t *field)
{
char ach[32];
sprintf(ach,
"%1.1d",
local->tm_wday == 0 ? 7 : local->tm_wday);
-
- size_t charsout;
- const char *converted = __gg__iconverter(__gg__console_encoding,
- field->encoding,
- ach,
- strlen(ach),
- &charsout);
- return strdup(converted);
+ field_from_ascii(field, ach);
}
static int
}
extern "C"
-char *
-__gg__get_date_hhmmssff(const cblc_field_t *field)
+void
+__gg__get_date_hhmmssff(cblc_field_t *field)
{
char ach[32];
-
struct cbl_timespec tv;
__gg__clock_gettime(&tv);
tm.tm_min,
tm.tm_sec,
hundredths);
+ field_from_ascii(field, ach);
+ }
- size_t charsout;
- const char *converted = __gg__iconverter(__gg__console_encoding,
- field->encoding,
- ach,
- strlen(ach),
- &charsout);
- return strdup(converted);
+static
+uint32_t collation_position( cbl_char_t ch )
+ {
+ uint32_t retval;
+ if( (ch & 0xFFFFFF00) == 0x00000000 )
+ {
+ // The character fits into the current DISPLAY collation
+ retval = collated(ch);
+ }
+ else
+ {
+ // It doesn't fit, so use the character value itself
+ retval = ch;
+ }
+ return retval;
+ }
+
+static cbl_char_t
+uber_compare(cbl_char_t ch_left, cbl_char_t ch_right)
+ {
+ if( ((ch_left | ch_right) & 0xFFFFFF00) == 0x00000000 )
+ {
+ // This is where collation is going to have to be fixed for multi-byte
+ // encodings. For now, if both characters fit into 0xFF, then we will
+ // use the current collation. Otherwise, we just compare them
+
+ // Both characters fit into the current DISPLAY codeset, so assume we
+ // are using the DISPLAY collation:
+ ch_left = collated(ch_left);
+ ch_right = collated(ch_right);
+ }
+ else
+ {
+ // Just compare the raw characters.
+ }
+ cbl_char_t retval = ch_left - ch_right;
+ return retval;
}
+
extern "C"
int
__gg__setop_compare(
- const char *candidate,
- int capacity,
- char *domain,
- cbl_encoding_t domain_encoding)
+ const cblc_field_t *candidate_field,
+ char *domain)
{
// This routine is called to compare the characters of 'candidate'
// against the list of character pairs in 'domain'
int h;
char *d;
- /* At the present writing, the domain was created in "source code" space,
- meaning that 'A' comes through as 0x41 no matter what the
- domain->encoding. Until Jim gets around to providing me with target
- values from the parser, we're doing the conversion here. */
+ /* The domain was created by converting characters to their UTF32
+ equivalents and then turning that information to hex. Numerical values,
+ which represent collation positions, are flagged as negative values.
+
+ In order to compare the apples in candidate to the UTF32 values in the
+ domain, we need to convert the candidate to UTF32 as well: */
- charmap_t *charmap = __gg__get_charmap(domain_encoding);
+ const charmap_t *charmap = __gg__get_charmap(DEFAULT_32_ENCODING);
- for(int i=0; i<capacity; i++)
+ size_t nbytes_converted;
+ const char *candidate = __gg__iconverter(candidate_field->encoding,
+ DEFAULT_32_ENCODING,
+ candidate_field->data,
+ candidate_field->capacity,
+ &nbytes_converted);
+ const char *candidate_end = candidate + nbytes_converted;
+ while(candidate < candidate_end)
{
- int ch = (*candidate++ & 0xFF);
+ cbl_char_t ch = charmap->getch(candidate, size_t(0));
+ candidate += charmap->stride();
+ int collation_pos = collation_position(ch);
d = domain;
while(*d)
{
retval = 0;
// We are decoding hexadecimal numbers, either in pairs,
// or singletons: "20/30 " or "20 ". The final one is
- // terminated with '\0'
+ // terminated with ' \0'
// See the comments in genapi.cc::get_class_condition_string
// to see how this string was encoded.
l = (int)strtoll(d, reinterpret_cast<char **>(&d), 16);
if( l < 0 )
{
+ // This is a collation position, as given in the COBOL program. Make
+ // it positive, and subtract 1 from it to make it the same space
+ // as the collation table:
l = -l;
+ l -= 1;
+ }
+ else
+ {
+
}
h = l;
if( *d == '/' )
h = (int)strtoll(d, reinterpret_cast<char **>(&d), 16);
if( h < 0 )
{
+ // This is a collation position; make it the same as
h = -h;
+ h -= 1;
}
}
else if( *d == ' ' )
d += 1;
}
- l = charmap->mapped_character(l);
- h = charmap->mapped_character(h);
- if( ch >= l && ch <= h )
+ if( collation_pos >= l && collation_pos <= h )
{
// This character is acceptable
retval = 1;
// MAX_FIXED_POINT_DIGITS
charmap_t *charmap = __gg__get_charmap(encoding);
- int mapped_minus = charmap->mapped_character(ascii_minus);
- int mapped_plus = charmap->mapped_character(ascii_plus);
- int mapped_decimal_point = charmap->mapped_character(__gg__decimal_point);
- int mapped_0 = charmap->mapped_character(ascii_0);
- int mapped_9 = charmap->mapped_character(ascii_9);
- int mapped_E = charmap->mapped_character(ascii_E);
- int mapped_e = charmap->mapped_character(ascii_e);
+ int stride = charmap->stride();
+
+ cbl_char_t mapped_minus = charmap->mapped_character(ascii_minus);
+ cbl_char_t mapped_plus = charmap->mapped_character(ascii_plus);
+ cbl_char_t mapped_decimal_point = charmap->mapped_character(__gg__decimal_point);
+ cbl_char_t mapped_0 = charmap->mapped_character(ascii_0);
+ cbl_char_t mapped_9 = charmap->mapped_character(ascii_9);
+ cbl_char_t mapped_E = charmap->mapped_character(ascii_E);
+ cbl_char_t mapped_e = charmap->mapped_character(ascii_e);
__int128 retval = 0;
int delta_r = 0;
// We now loop over the remaining input characters:
- unsigned char ch = '\0';
+ cbl_char_t ch = '\0';
+ size_t chindex = 0;
- if(length-- > 0)
+ if(length > 0)
{
- ch = *dirty++;
+ length -= stride;
+ ch = charmap->getch(dirty, &chindex);
if( ch == mapped_minus )
{
hyphen = 1;
}
}
- while( length-- > 0 )
+ while( length > 0 )
{
- ch = *dirty++;
+ length -= stride;
+ ch = charmap->getch(dirty, &chindex);
if( ch == mapped_decimal_point && delta_r == 0 )
{
// This is the first decimal point we've seen, so we
int exponent_sign = 1;
if( length > 0 )
{
- ch = *dirty;
+ ch = charmap->getch(dirty, chindex);
if( ch == mapped_plus)
{
- length -= 1;
- dirty += 1;
+ length -= stride;
+ dirty += stride;
}
else if( ch == mapped_minus )
{
exponent_sign = -1;
- length -= 1;
- dirty += 1;
+ length -= stride;
+ dirty += stride;
}
}
- while(length-- > 0)
+ while(length > 0)
{
- ch = *dirty++;
+ length -= stride;
+ ch = charmap->getch(dirty, &chindex);
if( ch < mapped_0
|| ch > mapped_9 )
{
int delta_r = 0;
// We now loop over the remaining input characters:
- unsigned char ch = '\0';
+ cbl_char_t ch = '\0';
charmap_t *charmap = __gg__get_charmap(field->encoding);
-
+ cbl_char_t mapped_minus = charmap->mapped_character(ascii_minus);
+ cbl_char_t mapped_plus = charmap->mapped_character(ascii_plus);
+ cbl_char_t mapped_decimal = charmap->mapped_character(__gg__decimal_point);
+ cbl_char_t mapped_0 = charmap->mapped_character(ascii_0);
+ cbl_char_t mapped_9 = charmap->mapped_character(ascii_9);
+ cbl_char_t mapped_E = charmap->mapped_character(ascii_E);
+ cbl_char_t mapped_e = charmap->mapped_character(ascii_e);
+
+ size_t index = 0;
if(length-- > 0)
{
- ch = *dirty++;
- if( ch == charmap->mapped_character(ascii_minus) )
+ ch = charmap->getch(dirty, &index);
+ if( ch == mapped_minus )
{
hyphen = 1;
}
- else if( ch == charmap->mapped_character(ascii_plus) )
+ else if( ch == mapped_plus )
{
// A plus sign is okay
}
- else if( ch == charmap->mapped_character(__gg__decimal_point) )
+ else if( ch == mapped_decimal )
{
delta_r = 1;
}
- else if( ch >= charmap->mapped_character(ascii_0)
- && ch <= charmap->mapped_character(ascii_9) )
+ else if( ch >= mapped_0
+ && ch <= mapped_9 )
{
- retval = ch - charmap->mapped_character(ascii_0) ;
+ retval = ch & 0x0F ;
}
else
{
while( length-- > 0 )
{
- ch = *dirty++;
- if( ch == charmap->mapped_character(__gg__decimal_point) && delta_r == 0 )
+ ch = charmap->getch(dirty, &index);
+ if( ch == mapped_decimal && delta_r == 0 )
{
// This is the first decimal point we've seen, so we
// can start counting rdigits:
delta_r = 1;
continue;
}
- if( ch < charmap->mapped_character(ascii_0)
- || ch > charmap->mapped_character(ascii_9) )
+ if( ch < mapped_0
+ || ch > mapped_9 )
{
// When we hit something that isn't a digit, then we are done
break;
}
retval *= 10;
- retval += ch - charmap->mapped_character(ascii_0) ;
+ retval += ch & 0x0F ;
rdigits += delta_r;
}
// Let's check for an exponent:
int exponent = 0;
- if( ch == charmap->mapped_character(ascii_E)
- || ch == charmap->mapped_character(ascii_e) )
+ if( ch == mapped_E
+ || ch == mapped_e )
{
int exponent_sign = 1;
if( length > 0 )
{
- ch = *dirty;
- if( ch == charmap->mapped_character(ascii_plus) )
+ ch = charmap->getch(dirty, &index);
+ if( ch == mapped_plus )
{
length -= 1;
dirty += 1;
}
- else if (ch == charmap->mapped_character(ascii_minus) )
+ else if(ch == mapped_minus )
{
exponent_sign = -1;
length -= 1;
}
while(length-- > 0)
{
- ch = *dirty++;
- if( ch < charmap->mapped_character(ascii_0)
- || ch > charmap->mapped_character(ascii_9) )
+ ch = charmap->getch(dirty, &index);
+ if( ch < mapped_0
+ || ch > mapped_9 )
{
// When we hit something that isn't a digit, then we are done
break;
}
exponent *= 10;
- exponent += ch - charmap->mapped_character(ascii_0) ;
+ exponent += ch & 0x0F ;
}
exponent *= exponent_sign;
}
// The routine returns the cbl_encoding_t of the result.
- cbl_encoding_t retval = var->encoding;
+ cbl_encoding_t enc_dest = var->encoding;
int source_rdigits = var->rdigits;
if( address_of )
{
// Assume that DISPLAY OF ADDRESS OF should be what's expected:
-
- __gg__realloc_if_necessary(dest, dest_size, 2*sizeof(void *) + 1);
+ const charmap_t *charmap = __gg__get_charmap(enc_dest);
+ __gg__realloc_if_necessary(dest,
+ dest_size,
+ 2*sizeof(void *) + charmap->stride());
sprintf( *dest,
"0x%*.*lx",
(int)(2*sizeof(void *)),
(int)(2*sizeof(void *)),
(unsigned long)actual_location);
- retval = __gg__console_encoding;
+ enc_dest = __gg__console_encoding;
goto done;
}
switch( var->type )
{
case FldLiteralA:
+ {
+ charmap_t *charmap = __gg__get_charmap(enc_dest);
+ __gg__realloc_if_necessary(dest,
+ dest_size,
+ actual_length+charmap->stride());
+
+ cbl_figconst_t figconst = (cbl_figconst_t)(var->attr & FIGCONST_MASK);
+ if( figconst )
+ {
+ charmap = __gg__get_charmap(enc_dest);
+ int figconst_char = charmap->figconst_character(figconst);
+ memset(*dest, figconst_char, actual_length);
+ (*dest)[actual_length] = NULLCH;
+ }
+ else
+ {
+ if( actual_location )
+ {
+ memcpy(*dest, actual_location, actual_length);
+ }
+ else
+ {
+ fprintf(stderr, "attempting to display a NULL pointer in %s\n", var->name);
+ abort();
+ }
+ (*dest)[actual_length] = NULLCH;
+ }
+ break;
+ }
+
case FldGroup:
case FldAlphanumeric:
case FldNumericEdited:
case FldAlphaEdited:
{
- __gg__realloc_if_necessary(dest, dest_size, actual_length+1);
+ charmap_t *charmap = __gg__get_charmap(enc_dest);
+ __gg__realloc_if_necessary(dest,
+ dest_size,
+ actual_length+charmap->stride());
cbl_figconst_t figconst = (cbl_figconst_t)(var->attr & FIGCONST_MASK);
if( figconst )
{
- charmap_t *charmap = __gg__get_charmap(retval);
+ charmap = __gg__get_charmap(enc_dest);
int figconst_char = charmap->figconst_character(figconst);
memset(*dest, figconst_char, actual_length);
(*dest)[actual_length] = NULLCH;
case FldNumericDisplay:
{
+ charmap_t *charmap = __gg__get_charmap(enc_dest);
if( var_is_refmod(var) )
{
// Because we are dealing with a refmod, we just output those
// characters.
- __gg__realloc_if_necessary(dest, dest_size, actual_length+1);
+ __gg__realloc_if_necessary(dest,
+ dest_size,
+ actual_length+charmap->stride());
memcpy((*dest), actual_location, actual_length);
(*dest)[actual_length] = NULLCH;
break;
// This buffer is larger than can validly be needed
unsigned char converted[128];
size_t outlength;
- retval = DEFAULT_SOURCE_ENCODING;
+ enc_dest = DEFAULT_SOURCE_ENCODING;
const char *mapped = __gg__iconverter(
var->encoding,
- retval,
+ enc_dest,
PTRCAST(char, actual_location),
actual_length,
&outlength);
memcpy(converted, mapped, outlength);
- charmap_t *charmap = __gg__get_charmap(retval);
+ charmap = __gg__get_charmap(enc_dest);
// converted[] is now an ASCII version of the value in memory. We are
// going to "validate" the characters, which might be garbage.
bool is_negative;
int index = 0; // This is the running index into our output destination
- std::ptrdiff_t signoffset;
switch(signtype)
{
case 0:
// not signable
signloc = converted;
digits = converted;
- digits_e = converted + actual_length;
+ digits_e = converted + outlength;
is_negative = false;
break;
case 4:
+ {
// internal trailing
- signloc = converted + actual_length-1;
+ const charmap_t *charmap_from = __gg__get_charmap(var->encoding);
+ cbl_char_t original_sign_digit =
+ charmap_from->getch(actual_location,
+ actual_length - charmap_from->stride());
+ signloc = converted + outlength-1;
digits = converted;
- digits_e = converted + actual_length;
+ digits_e = converted + outlength;
/* In ascii, negative is indicated by turning bit 0x40 on.
In ebcdic, by turning bit 0x20 off. In both cases, the result
is outside of the range '0' through '9'. Working this way is
variable's memory. I am not overly concerned.
*/
is_negative = *signloc > ascii_9 || *signloc < ascii_0;
- signoffset = signloc-converted;
- *signloc = charmap->mapped_character(ascii_0)
- + (actual_location[signoffset] & 0x0F);
+ *signloc = ascii_0 + (original_sign_digit & 0x0F);
break;
+ }
case 5:
+ {
// internal leading
+ const charmap_t *charmap_from = __gg__get_charmap(var->encoding);
+ cbl_char_t original_sign_digit =
+ charmap_from->getch(actual_location,
+ (size_t)0);
signloc = converted;
digits = converted;
- digits_e = converted + actual_length;
+ digits_e = converted + outlength;
is_negative = *signloc > ascii_9 || *signloc < ascii_0;
- signoffset = signloc-converted;
- *signloc = charmap->mapped_character(ascii_0)
- + (actual_location[signoffset] & 0x0F);
+ *signloc = ascii_0 + (original_sign_digit & 0x0F);
break;
+ }
case 6:
// separate trailing
- signloc = converted + actual_length-1;
+ signloc = converted + outlength-1;
digits = converted;
- digits_e = converted + actual_length-1;
+ digits_e = converted + outlength-1;
is_negative = *signloc == ascii_minus;
break;
case 7:
// separate leading
signloc = converted;
digits = converted+1;
- digits_e = converted + actual_length;
+ digits_e = converted + outlength;
is_negative = *signloc == ascii_minus;
break;
}
break;
case 6:
// separate trailing
+ // We'll stick on the trailing sign character later
break;
case 7:
// separate leading
}
char ach[128];
- retval = DEFAULT_SOURCE_ENCODING;
- charmap_t *charmap = __gg__get_charmap(retval);
+ enc_dest = DEFAULT_SOURCE_ENCODING;
+ charmap_t *charmap = __gg__get_charmap(enc_dest);
__gg__binary_to_string_ascii(ach, digits, value);
sprintf(ach, "%lu", (unsigned long)value);
__gg__realloc_if_necessary(dest, dest_size, strlen(ach)+1);
strcpy(*dest, ach);
- retval = __gg__console_encoding;
+ enc_dest = __gg__console_encoding;
}
break;
memset(*dest, 0, retsize);
strcpy(*dest, "<LEVEL88>");
}
- retval = __gg__console_encoding;
+ enc_dest = __gg__console_encoding;
break;
}
(int)(2*sizeof(void *)),
(int)(2*sizeof(void *)),
(unsigned long)value);
- retval = __gg__console_encoding;
+ enc_dest = __gg__console_encoding;
break;
}
break;
}
}
- retval = __gg__console_encoding;
+ enc_dest = __gg__console_encoding;
break;
}
if( (var->attr & scaled_e) && var->type != FldNumericDisplay )
{
- charmap_t *charmap = __gg__get_charmap(retval);
+ charmap_t *charmap = __gg__get_charmap(enc_dest);
static size_t buffer_size = MINIMUM_ALLOCATION_SIZE;
static char *buffer = static_cast<char *>(malloc(buffer_size));
}
done:
- if( retval == custom_encoding_e )
+ if( enc_dest == custom_encoding_e )
{
fprintf(stderr, "Bum encoding in format_for_display_internal\n");
abort();
}
- return retval;
+ return enc_dest;
}
static int
compare_88( const char *list,
const char *list_e,
bool fig_const,
- const cblc_field_t *conditional,
- unsigned char *conditional_location,
- int conditional_length)
+ const cblc_field_t *conditional_,
+ const unsigned char *conditional_location_,
+ int conditional_length_)
{
- charmap_t *charmap = __gg__get_charmap(conditional->encoding);
- int list_len = (int)(list_e-list);
+ int cmpval;
+
+ // We know that list through list_e are characters in UTF32 encoding.
+ size_t list_len = list_e-list;
+
+ // We need to convert the conditional to be UTF32 as well:
+ charmap_t *charmap = __gg__get_charmap(DEFAULT_32_ENCODING);
+ size_t stride = charmap->stride();
+ cbl_char_t mapped_space = charmap->mapped_character(ascii_space);
+
+ // First, convert the conditional to UTF32
+ size_t conditional_length=0;
+ char * conditional_i = __gg__miconverter(
+ conditional_->encoding,
+ DEFAULT_32_ENCODING,
+ conditional_location_,
+ conditional_length_,
+ &conditional_length);
+ const char *conditional = conditional_i;
+
+ // Now we want to trim away trailing spaces from the conditional, leaving
+ // just one so that we don't get down to an empty string.
+ while( conditional_length > stride)
+ {
+ cbl_char_t ch = charmap->getch(conditional, conditional_length - stride);
+ if( ch != mapped_space )
+ {
+ break;
+ }
+ conditional_length -= stride;
+ }
+
+ // We have conditional_length bytes at conditional. Create a test area that
+ // we will compare against conditional:
+
int test_len;
char *test;
if( fig_const )
{
- // We are working with a figurative constant
+ // The 'list' is a figurative constant, so we need to create a test
+ // buffer that is all the character designated by the figurative constant.
test = static_cast<char *>(malloc(conditional_length));
massert(test);
test_len = conditional_length;
+
// This is where we handle the zero-length strings that
// nonetheless can magically be expanded into figurative
// constants:
- int ch = charmap->mapped_character(ascii_space);
+ // We default to space, since we know that the figurative constant is
+ // S, Z, H, Q, or L
+ cbl_char_t ch = charmap->mapped_character(ascii_space);
// Check for the strings starting with 0xFF whose second character
// indicates a figurative constant:
- if( list[0] == ascii_Z )
+ cbl_char_t char_0 = charmap->getch(list, (size_t)0);
+ if( char_0 == charmap->mapped_character(ascii_Z) )
{
ch = charmap->mapped_character(ascii_0);
}
- else if( list[0] == ascii_H )
+ else if( char_0 == charmap->mapped_character(ascii_H) )
{
ch = charmap->high_value_character();
}
- else if( list[0] == ascii_Q )
+ else if( char_0 == charmap->mapped_character(ascii_Q) )
{
ch = charmap->quote_character();
}
- else if( list[0] == ascii_L )
+ else if( char_0 == charmap->mapped_character(ascii_L) )
{
ch = charmap->low_value_character();
}
- memset( test, ch, conditional_length );
+ // The test location is full of the figurative constant
+ charmap->memset( test, ch, conditional_length );
}
else if( list_len < conditional_length )
{
- // 'list' is too short; we have to right-fill with spaces:
+ // 'list' element is too short; we have to right-fill with spaces:
test = static_cast<char *>(malloc(conditional_length));
massert(test);
test_len = conditional_length;
- memset(test, charmap->mapped_character(ascii_space), conditional_length);
+ // Copy over the shorty string from 'list'
memcpy(test, list, list_len);
+ // Right fill with spaces:
+ charmap->memset(test+list_len,
+ charmap->mapped_character(ascii_space),
+ conditional_length-list_len);
}
else
{
+ // list_len is >= conditional length. Presumably the parser ensured that
+ // the list element couldn't be bigger than the maximum condition length,
+ // we we'll truncate at list_len:
test = static_cast<char *>(malloc(list_len));
massert(test);
test_len = list_len;
memcpy(test, list, list_len);
}
- int cmpval;
-
- if( test[0] == NULLCH && conditional_location[0] == 0)
- {
- cmpval = 0;
- }
- else
- {
- cmpval = cstrncmp (test,
- PTRCAST(char, conditional_location),
- conditional_length);
+ // At this point we have conditional and test, and they both have at least
+ // test_len bytes.
-// if( cmpval == 0 && (int)strlen(test) != conditional_length )
- if( cmpval == 0 && test_len != conditional_length )
- {
- // When strncmp returns 0, the actual smaller string is the
- // the shorter of the two:
- cmpval = test_len - conditional_length;
- }
- }
+ cmpval = memcmp(test, conditional, test_len);
free(test);
+ free(conditional_i);
- if( cmpval < 0 )
- {
- cmpval = -1;
- }
- else if(cmpval > 0)
- {
- cmpval = +1;
- }
return cmpval;
}
}
else if( field->type == FldLiteralN )
{
- if( __gg__decimal_point == '.' )
+ union
{
- size_t charsout;
- char *converted = __gg__iconverter(field->encoding,
- DEFAULT_SOURCE_ENCODING,
- field->initial,
- strlen(field->initial),
- &charsout);
- retval = strtofp128(converted, NULL);
- }
- else
+ __int128 i128;
+ uint64_t u64;
+ uint32_t u32;
+ uint16_t u16;
+ uint8_t u8 ;
+ int64_t i64;
+ int32_t i32;
+ int16_t i16;
+ int8_t i8 ;
+ };
+ i128 = 0;
+ memcpy(&i128, field->data, field->capacity);
+
+ if( field->attr & signable_e )
{
- // We need to replace any commas with periods
- static size_t size = 128;
- static char *buffer = static_cast<char *>(malloc(size));
- while( strlen(field->initial)+1 > size )
+ switch(field->capacity)
{
- size *= 2;
- buffer = static_cast<char *>(malloc(size));
+ case 16:
+ retval = i128;
+ break;
+ case 8:
+ retval = i64;
+ break;
+ case 4:
+ retval = i32;
+ break;
+ case 2:
+ retval = i16;
+ break;
+ case 1:
+ retval = i8;
+ break;
}
- massert(buffer);
- strcpy(buffer, field->initial);
- char *p = strchr(buffer, ',');
- if(p)
+ }
+ else
+ {
+ switch(field->capacity)
{
- *p = '.';
+ case 16:
+ retval = i128;
+ break;
+ case 8:
+ retval = u64;
+ break;
+ case 4:
+ retval = u32;
+ break;
+ case 2:
+ retval = u16;
+ break;
+ case 1:
+ retval = u8;
+ break;
}
- retval = strtofp128(buffer, NULL);
}
- }
- else
- {
- fprintf(stderr, "What's all this then?\n");
- abort();
+ if( field->rdigits )
+ {
+ retval /= __gg__power_of_ten( field->rdigits );
+ }
}
return retval;
}
__int128 value;
int rdigits;
- // list->initial points to a superstring: a double-null terminated
- // string containing pairs of strings. We are looking for equality.
+ charmap_t *charmap32 = __gg__get_charmap(DEFAULT_32_ENCODING);
+ int stride32 = charmap32->stride();
+ cbl_char_t mapped_F = charmap32->mapped_character(ascii_F);
+ cbl_char_t mapped_Z = charmap32->mapped_character(ascii_Z);
+
+ // We are disassembling strings that have the form <length><flag><value>
switch( conditional->type )
{
conditional,
conditional_location,
conditional_length);
- const char *walker = list->initial;
+ char *walker = list->initial;
while(*walker)
{
- char left_flag;
+ cbl_char_t left_flag;
size_t left_len;
- char * left;
+ char *left;
- char right_flag;
+ cbl_char_t right_flag;
size_t right_len;
- char * right;
+ char *right;
char *pend;
- left_len = strtoull(walker, &pend, 10);
- left_flag = *pend;
- left = pend+1;
- right = left + left_len;
- right_len = strtoull(right, &pend, 10);
- right_flag = *pend;
- right = pend+1;
+ left_len = charmap32->strtoull(walker, &pend, 10);
+ left_flag = charmap32->getch(pend, (size_t)0);
+ left = pend+stride32;
+
+ right = left + left_len*stride32;
+ right_len = charmap32->strtoull(right, &pend, 10);
+ right_flag = charmap32->getch(pend, (size_t)0);
+ right = pend+stride32;
- walker = right + right_len;
+ walker = right + right_len*stride32;
int left_rdigits;
int right_rdigits;
__int128 left_value;
- if( left_flag == 'F' && left[0] == 'Z' )
+ cbl_char_t left_0 = charmap32->getch(left, size_t(0));
+ if( left_flag == mapped_F && left_0 == mapped_Z )
{
left_value = 0;
left_rdigits = 0;
{
left_value = __gg__dirty_to_binary(
left,
- conditional->encoding,
- left_len,
+ DEFAULT_32_ENCODING,
+ left_len*stride32,
&left_rdigits);
}
__int128 right_value;
- if( right_flag == 'F' && right[0] == 'Z' )
+ cbl_char_t right_0 = charmap32->getch(right, size_t(0));
+ if( right_flag == ascii_F && right_0 == mapped_Z )
{
right_value = 0;
right_rdigits = 0;
{
right_value = __gg__dirty_to_binary(
right,
- conditional->encoding,
- right_len,
+ DEFAULT_32_ENCODING,
+ right_len*stride32,
&right_rdigits);
}
case FldAlphanumeric:
case FldLiteralA:
{
+ // This is an alphanumeric comparison. The list is in UTF32, so we
+ // are going to have to convert the conditional to UTF32.
char *walker = list->initial;
while(*walker)
{
char *pend;
+ cbl_char_t ch;
+
first = walker;
- first_len = strtoull(first, &pend, 10);
- fig1 = *pend == 'F';
- first = pend+1;
- first_e = first + first_len;
+ first_len = charmap32->strtoull(first, &pend, 10);
+ ch = charmap32->getch(pend, (size_t)0);
+ fig1 = ch == mapped_F;
+ first = pend+stride32;
+ first_e = first + first_len*stride32;
last = first_e;
- last_len = strtoull(last, &pend, 10);
- fig2 = *pend == 'F';
- last = pend+1;
- last_e = last + last_len;
+ last_len = charmap32->strtoull(last, &pend, 10);
+ ch = charmap32->getch(pend, (size_t)0);
+ fig2 = ch == mapped_F;
+ last = pend+stride32;
+ last_e = last + last_len*stride32;
walker = last_e;
case FldFloat:
{
+ // We need a fake field to hold the encoding for the
+ // __gg__dirty_to_float() routine.
+ cblc_field_t fakir;
+ fakir.encoding = DEFAULT_32_ENCODING;
+
GCOB_FP128 fp128 = get_float128(conditional, conditional_location) ;
- const char *walker = list->initial;
+ char *walker = list->initial;
while(*walker)
{
- char left_flag;
+ cbl_char_t left_flag;
size_t left_len;
char * left;
- char right_flag;
+ cbl_char_t right_flag;
size_t right_len;
char * right;
char *pend;
- left_len = strtoull(walker, &pend, 10);
- left_flag = *pend;
- left = pend+1;
+ left_len = charmap32->strtoull(walker, &pend, 10);
+ left_flag = charmap32->getch(pend, (size_t)0);
+ left = pend+stride32;
- right = left + left_len;
- right_len = strtoull(right, &pend, 10);
- right_flag = *pend;
- right = pend+1;
+ right = left + left_len*stride32;
+ right_len = charmap32->strtoull(right, &pend, 10);
+ right_flag = charmap32->getch(pend, (size_t)0);
+ right = pend+stride32;
- walker = right + right_len;
+ walker = right + right_len*stride32;
GCOB_FP128 left_value;
- if( left_flag == ascii_F && left[0] == ascii_Z )
+ if( left_flag == mapped_F
+ && charmap32->getch(left, (size_t)0) == mapped_Z )
{
left_value = 0;
}
{
left_value = __gg__dirty_to_float(left,
left_len,
- conditional);
+ &fakir);
}
GCOB_FP128 right_value;
- if( right_flag == 'F' && right[0] == 'Z' )
+ if( right_flag == mapped_F
+ && charmap32->getch(right, (size_t)0) == mapped_Z )
{
right_value = 0;
}
{
right_value = __gg__dirty_to_float( right,
right_len,
- conditional);
+ &fakir);
}
if( left_value <= fp128 && fp128 <= right_value )
return retval;
}
+static void
+interconvert( char **allocated_left,
+ char **allocated_right,
+ char **left_string,
+ char **right_string,
+ size_t *left_length,
+ size_t *right_length,
+ cbl_encoding_t *encoding_left,
+ cbl_encoding_t *encoding_right)
+ {
+ // This routine looks at two encodings and decides what do to about comparing
+ // apples to apples.
+ *allocated_left = nullptr;
+ *allocated_right = nullptr;
+
+ bool convert_left_to_right = false;
+ bool convert_right_to_left = false;
+
+ size_t converted_length;
+ const char *converted;
+ if( *encoding_left == *encoding_right )
+ {
+ // This is both the most-seen situation, and, happily, the easiest to
+ // handle. We just do nothing.
+ }
+ else if( *encoding_left == __gg__national_encoding
+ || *encoding_right == __gg__national_encoding )
+ {
+ // The encodings are different, but at least one is the national encoding.
+ // Convert the other one to be national as well:
+ if( *encoding_left != __gg__national_encoding )
+ {
+ convert_left_to_right = true;
+ }
+ else
+ {
+ convert_right_to_left = true;
+ }
+ }
+ else
+ {
+ // We have two different encodings, and neither of them are national. This
+ // can happen when a file descriptor has a specific codeset that doesn't
+ // match the national codeset. We will convert the narrower to the wider;
+ // if they are both the same width we will pick one arbitrarily.
+ const charmap_t *charmap_left = __gg__get_charmap(*encoding_left);
+ const charmap_t *charmap_right = __gg__get_charmap(*encoding_right);
+ if( charmap_right->stride() >= charmap_left->stride() )
+ {
+ convert_left_to_right = true;
+ }
+ else
+ {
+ convert_right_to_left = true;
+ }
+ }
+
+ if( convert_left_to_right )
+ {
+ // Convert the left side to the right encoding
+ converted = __gg__iconverter(*encoding_left,
+ *encoding_right,
+ *left_string,
+ *left_length,
+ &converted_length);
+ *encoding_left = *encoding_right ;
+ *allocated_left = static_cast<char *>(malloc(converted_length));
+ massert(*allocated_left);
+ *left_string = *allocated_left;
+ *left_length = converted_length;
+ memcpy(*left_string, converted, *left_length);
+ }
+ if( convert_right_to_left )
+ {
+ // Convert the right side to the left_encoding
+ converted = __gg__iconverter(*encoding_right,
+ *encoding_left,
+ *right_string,
+ *right_length,
+ &converted_length);
+ *encoding_right = *encoding_left ;
+ *allocated_right = static_cast<char *>(malloc(converted_length));
+ massert(*allocated_right);
+ *right_string = *allocated_right;
+ *right_length = converted_length;
+ memcpy(right_string, converted, *right_length);
+ }
+ }
+
static
int
-compare_strings(const char *left_string,
- size_t left_length,
- bool left_all,
- const char *right_string,
- size_t right_length,
- bool right_all,
- cbl_encoding_t encoding)
- {
+compare_strings(char *left_string,
+ size_t left_length,
+ bool left_all,
+ char *right_string,
+ size_t right_length,
+ bool right_all,
+ cbl_encoding_t encoding_left,
+ cbl_encoding_t encoding_right)
+ {
+ // This routine compares two strings. It sounds innocent enough, right? But
+ // we have to deal with different encodings. It's not clear what the rules
+ // are, or should be, and collation is just a mess. We are going to be
+ // playing Whac-A-Mole with this routine, possibly until the end of time.
+
+ char *allocated_left = nullptr;
+ char *allocated_right = nullptr;
+
+ interconvert(&allocated_left,
+ &allocated_right,
+ &left_string,
+ &right_string,
+ &left_length,
+ &right_length,
+ &encoding_left,
+ &encoding_right);
+
+ charmap_t *charmap_left = __gg__get_charmap(encoding_left);
+ charmap_t *charmap_right = __gg__get_charmap(encoding_right);
+
int retval = 0;
- size_t i = 0;
+ size_t index_left = 0;
+ size_t index_right = 0;
if( right_all && right_length > left_length )
{
- // In the rubber-bandy ALL situation, and the ALL is longer than the
- // fixed side, we just compare the characters of the fixed side:
+ // If the right side is ALL, and is longer than the left side, we just
+ // compare the matching characters.
right_length = left_length;
}
if( left_all && left_length > right_length )
{
+ // If the left side is ALL, and is longer than the right side, we just
+ // compare the matching characters.
left_length = right_length;
}
- while( !retval && i<left_length && i<right_length )
+ while( !retval && index_left<left_length && index_right<right_length )
{
- unsigned int chl = collated((unsigned char)left_string[i]);
- unsigned int chr = collated((unsigned char)right_string[i]);
- retval = chl - chr;
- i += 1;
+ cbl_char_t ch_left = charmap_left->getch(left_string, &index_left);
+ cbl_char_t ch_right = charmap_right->getch(right_string, &index_right);
+ retval = uber_compare(ch_left, ch_right);
}
// We need to space-extend the shorter value. That's because
// "Bob" is equal to "Bob "
if( !right_all )
{
- charmap_t *charmap = __gg__get_charmap(encoding);
- while( !retval && i<left_length )
+ while( !retval && index_left<left_length )
{
- retval = collated((unsigned char)left_string[i])
- - collated(charmap->mapped_character(ascii_space));
- i += 1;
+ cbl_char_t ch_left = charmap_left->getch(left_string, &index_left);
+ cbl_char_t ch_right = charmap_right->mapped_character(ascii_space);
+ retval = uber_compare(ch_left, ch_right);
}
}
else
{
// In an ALL situation where the ALL is shorter than the fixed side, we
// wrap around the ALL characters
- while( !retval && i<left_length )
+ while( !retval && index_left<left_length )
{
- retval = collated((unsigned char)left_string[i])
- - collated((unsigned char)right_string[i%right_length]);
- i += 1;
+ index_right %= right_length;
+ cbl_char_t ch_left = charmap_left->getch(left_string, &index_left);
+ cbl_char_t ch_right = charmap_right->getch(right_string, &index_right);
+ retval = uber_compare(ch_left, ch_right);
}
}
if( !left_all )
{
- charmap_t *charmap = __gg__get_charmap(encoding);
- while( !retval && i<right_length )
+ while( !retval && index_right<right_length )
{
- retval = collated(charmap->mapped_character(ascii_space))
- - collated((unsigned char)right_string[i]);
- i += 1;
+ cbl_char_t ch_left = charmap_left->mapped_character(ascii_space);
+ cbl_char_t ch_right = charmap_right->getch(right_string, &index_right);
+ retval = uber_compare(ch_left, ch_right);
}
}
else
{
- if( left_length > right_length )
- {
- left_length = right_length;
- }
- while( !retval && i<right_length )
+ while( !retval && index_right<right_length )
{
- retval = collated((unsigned char)left_string[i%left_length])
- - collated((unsigned char)right_string[i]);
- i += 1;
+ index_left %= left_length;
+ cbl_char_t ch_left = charmap_left->mapped_character(ascii_space);
+ cbl_char_t ch_right = charmap_right->getch(right_string, &index_right);
+ retval = uber_compare(ch_left, ch_right);
}
}
+
+ free(allocated_right);
+ free(allocated_left);
return retval;
}
extern "C"
int
-__gg__compare_2(cblc_field_t *left_side,
- unsigned char *left_location,
- size_t left_length,
- int left_attr,
- int left_flags,
- cblc_field_t *right_side,
- unsigned char *right_location,
- size_t right_length,
- int right_attr,
- int right_flags,
- int second_time_through)
+__gg__compare_2(cblc_field_t *left_side,
+ unsigned char *left_location,
+ size_t left_length,
+ uint64_t left_attr,
+ int left_flags,
+ cblc_field_t *right_side,
+ unsigned char *right_location,
+ size_t right_length,
+ uint64_t right_attr,
+ int right_flags,
+ int second_time_through)
{
// First order of business: If right_side is a FldClass, pass that off
// to the speciality squad:
cbl_encoding_t encoding_right = right_side->encoding;
charmap_t *charmap_left = __gg__get_charmap(encoding_left);
charmap_t *charmap_right = __gg__get_charmap(encoding_right);
+ int stride = charmap_left->stride();
// Figure out if we have any figurative constants
cbl_figconst_t left_figconst = (cbl_figconst_t)(left_attr & FIGCONST_MASK);
cbl_figconst_t right_figconst = (cbl_figconst_t)(right_attr & FIGCONST_MASK);
- unsigned int fig_left = 0;
- unsigned int fig_right = 0;
+ cbl_char_t fig_left = 0;
+ cbl_char_t fig_right = 0;
- fig_left = charmap_left->figconst_character(left_figconst);
- fig_right = charmap_right->figconst_character(right_figconst);
+ if( left_figconst )
+ {
+ fig_left = charmap_left->figconst_character(left_figconst);
+ }
+ if( right_figconst )
+ {
+ fig_right = charmap_right->figconst_character(right_figconst);
+ }
// We have four high-level conditions to consider depending on whether
// left and/or right are figurative constants:
if( left_figconst && right_figconst )
{
// We are comparing two figurative constants
- retval = collated(fig_left) - collated(fig_right);
+ retval = uber_compare(fig_left, fig_right);
compare = true;
goto fixup_retval;
}
case quote_value_e:
case space_value_e:
retval = 0;
- for(size_t i=0; i<left_length; i++)
+ for(size_t i=0; i<left_length; i+=stride)
{
- // The right side is a figurative constant. Compare the left side
- // to the appropriate constant.
- unsigned int fig_of_left =
+ // The right side is a figurative constant. Compare data from the
+ // left side to the figurative constant from the right converted to
+ // the left encoding:
+ cbl_char_t fig_of_right =
charmap_left->figconst_character(right_figconst);
- retval = collated((unsigned int)left_location[i])
- - collated(fig_of_left);
+ cbl_char_t left_ch = charmap_left->getch(left_location, i);
+ retval = uber_compare(left_ch, fig_of_right);
if( retval )
{
break;
default:
// We are comparing a alphanumeric string to ZEROES
retval = 0;
- for(size_t i=0; i<left_length; i++)
+ for(size_t i=0; i<left_length; i+=stride)
{
- unsigned int fig_of_left =
+ unsigned int fig_of_right =
charmap_left->figconst_character(right_figconst);
- retval = collated((unsigned int)left_location[i])
- - collated(fig_of_left);
+ cbl_char_t ch_left = charmap_left->getch(left_location, i);
+ retval = uber_compare(ch_left, fig_of_right);
if( retval )
{
break;
if( local_is_alpha(left_side->type, left_address_of)
&& local_is_alpha(right_side->type, right_address_of) )
{
- if( encoding_left != encoding_right )
+ if( (left_side->attr | right_side->attr) & hex_encoded_e )
{
- fprintf(stderr, "We don't yet know how to compare strings of different encodings\n");
- fprintf(stderr, "Let Dubner and Lowden of cobolworx know about this\n");
- abort();
+ encoding_left = encoding_right = iconv_CP1252_e;
}
-
retval = compare_strings( reinterpret_cast<char *>(left_location),
left_length,
left_all,
reinterpret_cast<char *>(right_location),
right_length,
right_all,
- encoding_left
- );
+ encoding_left,
+ encoding_right );
compare = true;
goto fixup_retval;
if( right_side->type == FldLiteralN)
{
+ right_value = get_float128(right_side, right_location);
// In order to do the comparision, we need the value from the
// literal to be the same flavor as the left side:
- // We need to replace any commas with periods
- static size_t size = 128;
- static char *buffer = static_cast<char *>(malloc(size));
- while( strlen(right_side->initial)+1 > size )
- {
- size *= 2;
- buffer = static_cast<char *>(malloc(size));
- }
- massert(buffer);
- strcpy(buffer, right_side->initial);
-
- if( __gg__decimal_point == ',' )
- {
- // We are operating in DECIMAL IS COMMA mode, so we need to
- // replace any commas with periods.
- char *p = strchr(buffer, ',');
- if(p)
- {
- *p = '.';
- }
- }
-
- // buffer[] now contains the right-side string we want to convert
- // to one of the floating-point types. We want them to be the
- // same size:
switch(left_side->capacity)
{
case 4:
{
_Float32 left_value4 = *PTRCAST(_Float32, left_location);
- _Float32 right_value4 = strtof(buffer, NULL);
+ _Float32 right_value4 = (_Float32)right_value;
retval = 0;
retval = left_value4 < right_value4 ? -1 : retval;
retval = left_value4 > right_value4 ? 1 : retval;
case 8:
{
_Float64 left_value8 = *PTRCAST(_Float64, left_location);
- _Float64 right_value8 = strtod(buffer, NULL);
+ _Float64 right_value8 = (_Float64)right_value;
retval = 0;
retval = left_value8 < right_value8 ? -1 : retval;
retval = left_value8 > right_value8 ? 1 : retval;
//_Float128 left_value = *(_Float128 *)left_location;
GCOB_FP128 left_value16;
memcpy(&left_value16, left_location, 16);
- GCOB_FP128 right_value16 = strtofp128(buffer, NULL);
+ GCOB_FP128 right_value16 = right_value;
retval = 0;
retval = left_value16 < right_value16 ? -1 : retval;
retval = left_value16 > right_value16 ? 1 : retval;
if( right_refmod )
{
+ if( (left_side->attr | right_side->attr) & hex_encoded_e )
+ {
+ encoding_left = encoding_right = iconv_CP1252_e;
+ }
retval = compare_strings( reinterpret_cast<char *>(left_location),
left_length,
left_all,
reinterpret_cast<char *>(right_location),
right_length,
right_all,
- left_side->encoding);
+ left_side->encoding,
+ right_side->encoding );
compare = true;
goto fixup_retval;
}
right_location,
right_length,
0);
-
+ size_t right_string_length = strlen(right_string);
if( encoding_formatted != encoding_left )
{
// The encodings are not the same. We need to convert the right_string
const char *converted = __gg__iconverter(encoding_formatted,
encoding_left,
right_string,
- strlen(right_string),
+ right_string_length,
&outsize);
memcpy(right_string, converted, outsize);
+ right_string_length = outsize;
}
// There is a tricky aspect to comparing an alphanumeric to
// considered a "pseudo-move", and the rule for moving a negative
// number to an alphanumeric is that negative signs get stripped off
- if( *left_location == charmap_left->mapped_character(ascii_plus)
- || *left_location == charmap_left->mapped_character(ascii_minus) )
+ cbl_char_t left_ch = charmap_left->getch(left_location, size_t(0));
+ if( left_ch == charmap_left->mapped_character(ascii_plus)
+ || left_ch == charmap_left->mapped_character(ascii_minus) )
{
- left_location += 1;
- left_length -= 1;
+ left_location += charmap_left->stride();
+ left_length -= charmap_left->stride();
}
- const char *right_fixed;
- if( *right_string == charmap_right->mapped_character(ascii_plus)
- || *right_string == charmap_right->mapped_character(ascii_minus) )
+ char *right_fixed;
+ cbl_char_t right_ch = charmap_right->getch(right_string, size_t(0));
+ if( right_ch == charmap_right->mapped_character(ascii_plus)
+ || right_ch == charmap_right->mapped_character(ascii_minus) )
{
- right_fixed = right_string + 1;
+ right_fixed = right_string + charmap_right->stride();
+ right_string_length -= charmap_right->stride();
}
else
{
right_fixed = right_string;
}
+ if( (left_side->attr | right_side->attr) & hex_encoded_e )
+ {
+ encoding_left = encoding_right = iconv_CP1252_e;
+ }
retval = compare_strings( reinterpret_cast<char *>(left_location),
left_length,
left_all,
right_fixed,
- strlen(right_fixed),
+ right_string_length,
right_all,
+ encoding_left,
encoding_left);
compare = true;
goto fixup_retval;
static for_sort_table sorter;
static int
-compare_two_records(unsigned char *range1, unsigned char *range2)
+compare_two_records(unsigned char *range1,
+ unsigned char *range2)
{
int retval = 0;
field2.data = range2
+ field2.offset
- sorter.base;
+ field1.encoding = field2.encoding = encoding_for_sort;
// We handle descending by swapping the data sources:
if( !sorter.ascending[i] )
next_record += record_size;
}
+ encoding_for_sort = table->encoding;
+
// Sort it
sort_contents(contents,
offsets,
unsigned char *qual_data,
int flag_bits)
{
- //fprintf(stderr, "CALLED WITH %s 0x%x\n", var->name, flag_bits);
-
if( flag_bits & JUST_ONCE_BIT && var->attr & initialized_e )
{
return;
&& var->type != FldLiteralA
&& var->type != FldLiteralN )
{
- //fprintf(stderr, "ABORTING on %2.2d %s %d\n", var->level, var->name, var->type);
- //abort();
var->data = static_cast<unsigned char *>(malloc(var->capacity));
}
initialize_program_state();
}
- char *local_initial = as_initial(var->initial);
-
- if( var->level == LEVEL88 )
- {
- // We need to convert the options to the var->encoding
-
- size_t buffer_size = 4;
- char *buffer = static_cast<char *>(malloc(buffer_size));
-
- size_t index = 0;
-
- const cblc_field_t *parent = var->parent;
- switch(parent->type)
- {
- case FldGroup:
- case FldAlphanumeric:
- {
- char *walker = local_initial;
- while(*walker)
- {
- static size_t first_size = MINIMUM_ALLOCATION_SIZE;
- static char *first = static_cast<char *>(malloc(first_size));
- static size_t last_size = MINIMUM_ALLOCATION_SIZE;
- static char *last = static_cast<char *>(malloc(last_size));
- if( strlen(walker)+1 > first_size )
- {
- first_size = strlen(walker)+1;
- first = static_cast<char *>(realloc(first, first_size));
- }
- if( (*walker & 0xFF) == 0xFF )
- {
- // I don't recall what 0xFF means, and I neglected to comment it
- // the first time through. Probably means numerical values follow.
- strcpy(first, walker);
- }
- else
- {
- strcpy(first, walker);
- __gg__convert_encoding( first,
- DEFAULT_SOURCE_ENCODING,
- var->encoding);
- }
- walker += strlen(first) + 1;
-
- if( strlen(walker)+1 > last_size )
- {
- last_size = strlen(walker)+1;
- last = static_cast<char *>(realloc(last, last_size));
- }
- if( (*walker & 0xFF) == 0xFF )
- {
- strcpy(last, walker);
- }
- else
- {
- __gg__convert_encoding( last,
- DEFAULT_SOURCE_ENCODING,
- var->encoding);
- }
- walker += strlen(last) + 1;
- while(index + strlen(first) + strlen(last) + 3 > buffer_size)
- {
- buffer_size *= 2;
- buffer = static_cast<char *>(realloc(buffer, buffer_size));
- }
- strcpy(buffer+index, first);
- index += strlen(first) + 1;
- strcpy(buffer+index, last);
- index += strlen(last) + 1;
- }
- buffer[index++] = 0;
- break;
- }
- }
- if( index > 0 )
- {
- buffer = static_cast<char *>(realloc(buffer, index));
- local_initial = buffer;
- }
- }
+ const char *local_initial = as_initial(var->initial);
// Next order of business: When the variable was allocated in
// parser_symbol_add(), only LEVEL 01 variables had memory allocated. All
switch( var->type )
{
case FldGroup:
+ {
+ if( var->initial )
+ {
+ memcpy(outer_location, var->initial, var->capacity);
+ }
+ break;
+ }
+
case FldAlphanumeric:
case FldAlphaEdited:
case FldNumericEdited:
case FldLiteralA:
{
- // Any initialization values were converted to single-byte-coding in the
- // right codeset during parser_symbol_add()
if( var->initial )
{
memcpy(outer_location, var->initial, var->capacity);
}
else
{
+ charmap_t *charmap = __gg__get_charmap(var->encoding);
if( !defaultbyte_in_play )
{
- charmap_t *charmap = __gg__get_charmap(var->encoding);
- memset( outer_location,
- charmap->mapped_character(ascii_space),
- capacity );
+ cbl_char_t initialization_character = ascii_space;
+ if( var->attr & linkage_e && __gg__local_init != NOT_A_CHARACTER )
+ {
+ initialization_character = __gg__local_init;
+ }
+ if( !(var->attr & linkage_e) && __gg__working_init != NOT_A_CHARACTER )
+ {
+ initialization_character = __gg__working_init;
+ }
+ charmap->memset(outer_location,
+ charmap->mapped_character(initialization_character),
+ capacity );
}
else
{
- memset( outer_location,
- defaultbyte,
- capacity );
+ charmap->memset(outer_location,
+ defaultbyte,
+ capacity );
}
}
break;
}
else
{
+ cbl_char_t init_zero = ascii_zero;
+ cbl_char_t init_plus = ascii_plus;
+
+ charmap_t *charmap = __gg__get_charmap(var->encoding);
if( !defaultbyte_in_play )
{
- charmap_t *charmap = __gg__get_charmap(var->encoding);
- memset( outer_location,
- charmap->mapped_character(ascii_zero),
- capacity );
+ charmap->memset( outer_location,
+ charmap->mapped_character(init_zero),
+ capacity );
if( (var->attr & signable_e) && (var->attr & separate_e) )
{
if( var->attr & leading_e )
{
- outer_location[0] = charmap->mapped_character(ascii_plus);
+ charmap->putch(charmap->mapped_character(init_plus),
+ outer_location,
+ size_t(0));
}
else
{
- outer_location[var->capacity-1] =
- charmap->mapped_character(ascii_plus);
+ charmap->putch(charmap->mapped_character(init_plus),
+ outer_location,
+ var->capacity-charmap->stride());
}
}
}
else
{
- memset( outer_location,
- defaultbyte,
- capacity );
+ charmap->memset(outer_location,
+ defaultbyte,
+ capacity );
}
}
break;
void
__gg__initialize_variable_clean(cblc_field_t *var, int flag_bits)
{
-// if( var->type == FldLiteralA )
-// {
-// fprintf(stderr, "BAZINGA!\n");
-// }
-
init_var_both( var,
var->data,
flag_bits);
memmove(to + (dest_length-count),
from,
count);
- memset(to, charmap->mapped_character(ascii_space), dest_length-count);
+ charmap->memset(to,
+ charmap->mapped_character(ascii_space),
+ dest_length-count);
}
}
else
memmove(to,
from,
count);
- memset( to + count,
- charmap->mapped_character(ascii_space),
- dest_length-count);
+ charmap->memset(to + count,
+ charmap->mapped_character(ascii_space),
+ dest_length-count);
}
}
}
}
+extern "C"
+void *
+__gg__memdup(const void *p, size_t size)
+ {
+ void *retval = nullptr;
+ if(size)
+ {
+ retval = malloc(size);
+ massert(retval);
+ memcpy(retval, p, size);
+ }
+ return retval;
+ }
+
static void
alpha_to_alpha_move(cblc_field_t *dest,
size_t dest_offset,
size_t outlength;
if(dest->encoding == source->encoding)
{
+ // we don't need to bother calling __gg__iconverter
outlength = source_size;
}
else
{
// Before calling the mover, we need to convert the source to the
// destination encoding:
- static size_t bufsize = 0;
- static char *buffer = nullptr;
-
- // Supposing we might someday want to convert to UCS-4, then we need four
- // output bytes for each input SBC character. This is a dumb way of
- // thinking about it. By rights we should be calculating the worst case
- // dest size as (source_size / min_bytes_per_source_position) times the
- // max_bytes_per_dest_position.
-
- // But that's work for another day. This is harmless, if perhaps a bit
- // wasteful of memory.
-
- size_t needed = 4 * source_size;
- if( needed > bufsize )
- {
- bufsize = needed;
- buffer = static_cast<char *>(realloc(buffer, bufsize));
- massert(buffer);
- }
-
source_location = __gg__iconverter( source->encoding,
dest->encoding,
source_location,
source_size,
&outlength);
}
+ char *duped = static_cast<char *>(__gg__memdup(source_location, outlength));
alpha_to_alpha_move_from_location(dest,
dest_offset,
dest_size,
- source_location,
+ duped,
outlength,
source_move_all);
+ free(duped);
}
extern "C"
__int128 value;
int rdigits;
+ charmap_t *charmap = __gg__get_charmap(fdest->encoding);
+ int stride = charmap->stride();
cbl_figconst_t source_figconst =
(cbl_figconst_t)(fsource->attr & FIGCONST_MASK);
+ int special_char = 0; // quiets cppcheck
+ if( source_figconst == low_value_e )
+ {
+ special_char = charmap->low_value_character();
+ }
+ else if( source_figconst == high_value_e )
+ {
+ special_char = charmap->high_value_character();
+ }
+ else if( source_figconst == quote_value_e )
+ {
+ special_char = charmap->quote_character();
+ }
+ else if( source_figconst == space_value_e )
+ {
+ special_char = charmap->mapped_character(ascii_space);
+ }
+ else if( source_figconst == zero_value_e )
+ {
+ special_char = charmap->mapped_character(ascii_zero);
+ }
+
cbl_field_type_t dest_type = (cbl_field_type_t)fdest->type;
cbl_field_type_t source_type = (cbl_field_type_t)fsource->type;
|| fdest->type == FldFloat )
)
{
- charmap_t *charmap_dest = __gg__get_charmap(fdest->encoding);
-
// Regardless of what you see below, as time went on it became clear that
// high-value and low-value required special processing in order to cope
// with code. Or, at least, to cope with legacy tests.
* other than QUOTE or QUOTES, to a numeric item is an archaic feature of
* standard COBOL and its use should be avoided
*/
-
- int special_char = 0; // quiets cppcheck
- if( source_figconst == low_value_e )
- {
- special_char = charmap_dest->low_value_character();
- }
- else if( source_figconst == high_value_e )
- {
- special_char = charmap_dest->high_value_character();
- }
- else if( source_figconst == quote_value_e )
- {
- special_char = charmap_dest->quote_character();
- }
- else if( source_figconst == space_value_e )
- {
- special_char = charmap_dest->mapped_character(ascii_space);
- }
- memset( fdest->data + dest_offset,
- special_char,
- dest_size);
+ charmap->memset( fdest->data + dest_offset,
+ special_char,
+ dest_size);
}
else
{
case FldPacked:
case FldNumericBin5:
case FldGroup:
+ case FldLiteralA:
{
// This is a little bold, but non-alphabetics will never
// have the rjust_e or MOVE_ALL bits on, so it's safe
case FldAlphanumeric:
case FldNumericEdited:
case FldAlphaEdited:
+ case FldLiteralA:
// This is an ordinary alpha-to-alpha move:
- alpha_to_alpha_move(fdest,
- dest_offset,
- dest_size,
- fsource,
- source_offset,
- source_size,
- !!(source_flags & REFER_T_MOVE_ALL));
+ if( source_figconst )
+ {
+ charmap->memset( fdest->data + dest_offset,
+ special_char,
+ dest_size);
+ }
+ else
+ {
+ alpha_to_alpha_move(fdest,
+ dest_offset,
+ dest_size,
+ fsource,
+ source_offset,
+ source_size,
+ !!(source_flags & REFER_T_MOVE_ALL));
+ }
break;
case FldNumericDisplay:
// alphanumeric. We ignore any sign bit, and just
// move the characters:
- charmap_t *charmap = __gg__get_charmap(fdest->encoding);
-
size_t source_digits
- = fsource->digits
- + ( fsource->rdigits < 0
- ? -fsource->rdigits : 0) ;
+ = fsource->digits + ( fsource->rdigits < 0
+ ? -fsource->rdigits : 0) ;
// Pick up the absolute value of the source
value = __gg__binary_value_from_qualified_field(&rdigits,
if( !(fdest->attr & rjust_e) )
{
- min_length = std::min( source_digits,
+ min_length = std::min( source_digits*stride,
dest_size);
memmove(fdest->data + dest_offset, ach, min_length);
if( min_length < dest_size )
// min_length is smaller than dest_length, so we
// have to space-fill the excess bytes in the
// destination:
- memset( fdest->data + dest_offset + min_length,
- charmap->mapped_character(ascii_space),
- dest_size - min_length );
+ charmap->memset(fdest->data + dest_offset + min_length ,
+ charmap->mapped_character(ascii_space),
+ dest_size - min_length );
}
}
else
{
// Destination is right-justified, so things are
// slightly more complex
- if( source_digits >= dest_size )
+ if( source_digits*stride >= dest_size )
{
// We need to truncate the source data on the
// left:
memmove(
fdest->data + dest_offset,
- ach + (source_digits - dest_size),
+ ach + (source_digits*stride - dest_size),
dest_size );
}
else
// We need to move the shorty source string to
// the right side of the destination, and space-fill
// the prefix:
- memmove(fdest->data + dest_offset + (dest_size - source_digits),
+ memmove(fdest->data
+ + dest_offset + (dest_size - source_digits*stride),
ach,
- source_digits );
- memset( fdest->data + dest_offset,
- charmap->mapped_character(ascii_space),
- dest_size - source_digits);
+ source_digits*stride );
+ charmap->memset( fdest->data + dest_offset,
+ charmap->mapped_character(ascii_space),
+ dest_size - source_digits*stride);
}
}
}
}
else
{
- charmap_t *charmap_dest = __gg__get_charmap(fdest->encoding);
char ach[128];
// Turn the integer source into a value:
if( fsource->attr & intermediate_e )
{
- charmap_t *charmap_src = __gg__get_charmap(fsource->encoding);
while(source_size > 1) // This ensures we leave one '0'
{
- if( *(pach+1) == '\0' )
+ cbl_char_t ch = charmap->getch(pach, stride);
+ if( ch == '\0' )
{
break;
}
- if( ((*pach)&0xFF) != charmap_src->mapped_character(ascii_0))
+ ch = charmap->getch(pach, size_t(0));
+ if( ch != charmap->mapped_character(ascii_0))
{
break;
}
- pach += 1;
+ pach += stride;
source_size -= 1;
}
}
if( !(fdest->attr & rjust_e) )
{
- min_length = std::min( source_size,
+ min_length = std::min( source_size*stride,
dest_size);
memmove(fdest->data+dest_offset, pach, min_length);
if( min_length < dest_size )
{
// min_length is smaller than dest_length, so we have to
// space-fill the excess bytes in the destination:
- memset( fdest->data+dest_offset + min_length,
- charmap_dest->mapped_character(ascii_space),
+ charmap->memset( fdest->data+dest_offset + min_length,
+ charmap->mapped_character(ascii_space),
dest_size - min_length );
}
}
{
// We need to move the shorty source string to the
// right side of the destination, and space-fill the prefix:
- memmove(fdest->data+dest_offset + (dest_size - source_size),
+ memmove(fdest->data+dest_offset +
+ (dest_size - source_size*stride),
pach,
- source_size );
- memset(fdest->data+dest_offset,
- charmap_dest->mapped_character(ascii_space),
- (dest_size - source_size));
+ source_size*stride );
+ charmap->memset(fdest->data+dest_offset,
+ charmap->mapped_character(ascii_space),
+ (dest_size - source_size*stride));
}
}
}
case FldIndex:
{
- charmap_t *charmap_dest = __gg__get_charmap(fdest->encoding);
char ach[128];
// Turn the integer source into a value:
fsource,
source_offset,
source_size);
- sprintf(ach, "%lu", (unsigned long)value);
-
+ // Turn the integer value into a string:
+ __gg__binary_to_string_encoded(ach,
+ source_size,
+ value,
+ fdest->encoding);
char *pach = ach;
if( !(fdest->attr & rjust_e) )
{
// min_length is smaller than dest_length, so we have to
// space-fill the excess bytes in the destination:
- memset( fdest->data+dest_offset + min_length,
- charmap_dest->mapped_character(ascii_space),
+ charmap->memset( fdest->data+dest_offset + min_length,
+ charmap->mapped_character(ascii_space),
dest_size - min_length );
}
}
pach,
source_size );
memset(fdest->data+dest_offset,
- charmap_dest->mapped_character(ascii_space),
+ charmap->mapped_character(ascii_space),
(dest_size - source_size));
}
}
case FldNumericBinary:
{
- charmap_t *charmap_dest = __gg__get_charmap(fdest->encoding);
switch( source_type )
{
case FldGroup:
// min_length is smaller than dest_length, so we have to
// space-fill the excess bytes in the destination:
memset( fdest->data+dest_offset + min_length,
- charmap_dest->mapped_character(ascii_space),
+ charmap->mapped_character(ascii_space),
dest_size - min_length );
}
fdest->attr &= ~FIGCONST_MASK;
{
// Bin5 and Index are treated with no truncation, as if they were
// trunc_bin_e. The other types aren't subject to truncation.
- charmap_t *charmap_dest = __gg__get_charmap(fdest->encoding);
switch( source_type )
{
case FldGroup:
// min_length is smaller than dest_length, so we have to
// space-fill the excess bytes in the destination:
memset( fdest->data+dest_offset + min_length,
- charmap_dest->mapped_character(ascii_space),
+ charmap->mapped_character(ascii_space),
dest_size - min_length );
}
break;
case FldAlphaEdited:
{
- charmap_t *charmap_dest = __gg__get_charmap(fdest->encoding);
switch( source_type )
{
case FldGroup:
// min_length is smaller than dest_length, so we have to
// space-fill the excess bytes in the destination:
memset( fdest->data+dest_offset + min_length,
- charmap_dest->mapped_character(ascii_space),
+ charmap->mapped_character(ascii_space),
dest_size - min_length );
}
break;
display_string_length);
int fc_char = __gg__fc_char(fsource);
- if( fc_char > -1 )
+ if( fc_char != NOT_A_CHARACTER )
{
memset(display_string, fc_char, dest_size);
__gg__convert_encoding_length(display_string,
switch( source_type )
{
case FldAlphanumeric:
+ case FldGroup:
{
// Converting alphanumeric to float means first converting to
// ascii:
size_t *ascending,
int duplicates)
{
+ encoding_for_sort = workfile->encoding;
+
// We are going to read the records of workfile into memory. We keep offsets
// into the memory buffer, and then we'll sort those offsets according to the
// things they point to.
size_t bytes_read;
size_t bytes_to_write;
+ const charmap_t *charmap = __gg__get_charmap(workfile->encoding);
+
sv_suppress_eof_ec = true;
for(;;)
{
if( workfile->record_length )
{
int rdigits;
- bytes_read = (size_t) __gg__binary_value_from_field(
- &rdigits,
- workfile->record_length);
+ // The record length is reported in character positions:
+ bytes_read = charmap->stride() * (size_t) __gg__binary_value_from_field(
+ &rdigits,
+ workfile->record_length);
}
else
{
if( workfile->record_area_min != workfile->record_area_max
&& workfile->record_length )
{
+ // Set the number of bytes to write, remembering that record_length is
+ // in characters, not bytes:
__gg__int128_to_field(workfile->record_length,
- bytes_to_write,
+ bytes_to_write/charmap->stride(),
0,
truncation_e,
NULL);
// Then we will begin.
+ encoding_for_sort = workfile->encoding;
+
sorter.nkeys = nkeys;
sorter.keys = keys;
sorter.ascending = ascending;
{
// The prior winner is bigger than the current winner, which means that
// the input files were not in order. This is a run-time error.
-
exception_raise(ec_sort_merge_sequence_e);
abort();
}
free(prior_winner);
}
+typedef std::vector<cbl_char_t>::const_iterator char_it_c ;
+typedef std::vector<cbl_char_t>::iterator char_it ;
+
static const char *
funky_find( const char *piece,
const char *piece_end,
return retval;
}
+static char_it_c
+funky_find_wide( char_it_c needle,
+ char_it_c needle_end, // Actually end+1
+ char_it_c haystack,
+ char_it_c haystack_end, // Actually end+1
+ char_it_c notfound)
+ {
+ // We are looking for the needle in the haystack
+
+ char_it_c retval = notfound;
+
+ size_t length_of_piece = needle_end - needle;
+ if(length_of_piece == 0)
+ {
+ __gg__abort("funky_find_wide() length_of_piece shouldn't be zero");
+ }
+
+ haystack_end -= length_of_piece;
+
+ while( haystack <= haystack_end )
+ {
+ // Compare the memory at needle to the memory at haystack
+ if( memcmp( &(*needle),
+ &(*haystack),
+ length_of_piece*sizeof(cbl_char_t)) == 0 )
+ {
+ // They are the same; return where needle was found
+ retval = haystack;
+ break;
+ }
+ // Not found; move to the next location in the haystach
+ haystack += 1;
+ }
+ return retval;
+ }
+
static const char *
funky_find_backward(const char *piece,
const char *piece_end,
return retval;
}
+static char_it_c
+funky_find_wide_backward( char_it_c needle,
+ char_it_c needle_end, // Actually end+1
+ char_it_c haystack,
+ char_it_c haystack_end, // Actually end+1
+ char_it_c notfound)
+ {
+ // We are looking for the needle in the haystack
+
+ char_it_c retval = notfound;
+
+ size_t length_of_piece = needle_end - needle;
+ if(length_of_piece == 0)
+ {
+ __gg__abort("funky_find_wide_backward() length_of_piece shouldn't be zero");
+ }
+
+ haystack_end -= length_of_piece;
+
+ while( haystack <= haystack_end )
+ {
+ if( memcmp( &(*needle),
+ &(*haystack_end),
+ length_of_piece*sizeof(cbl_char_t)) == 0 )
+ {
+ // They are the same; return where needle was found
+ retval = haystack_end;
+ break;
+ }
+ // Not found; move to the next location in the haystack
+ haystack_end -= 1;
+ }
+ return retval;
+ }
+
typedef struct normalized_operand
{
// These are the characters of the string. When the field is NumericDisplay
// any leading or trailing +/- characters are removed, and any embedded
// minus bits are removed.
+
+ // In order for INSPECT to handle things like UTF-8, which often has
+ // multi-byte codepoints, and UTF-16, which sometimes has multi-pair
+ // codepoints we are going to convert everything to UTF-32 for internal
+ // calculations and searches.
std::string the_characters;
- size_t offset; // Usually zero. One when there is a leading sign.
+ std::vector<cbl_char_t>the_vectorxxxx;
+
+ // offset and length are maintained in characters, not bytes
+ size_t offset; // Usually zero. Increased by one for leading separate sign.
size_t length; // Usually the same as the original. But it is one less
- // // than the original when there is a trailing sign.
+ // // than the original when there is a trailing separate sign.
} normalized_operand;
typedef struct comparand
normalized_operand identifier_5; // The replacement, for FORMAT 2
const char *alpha; // The start location within normalized_id_1
const char *omega; // The end+1 location within normalized_id_1
+ char_it_c alpha_it; // The start location within normalized_id_1
+ char_it_c omega_it; // The end+1 location within normalized_id_1
size_t leading_count;
bool leading;
bool first;
if( field )
{
+ charmap_t *charmap = __gg__get_charmap(encoding);
+
+ // This is the old-style byte-based assumption
const unsigned char *data = field->data + field_o;
cbl_figconst_t figconst
= (cbl_figconst_t)(field->attr & FIGCONST_MASK);
retval.offset = 1;
}
}
- for( size_t i=retval.offset; i<retval.length; i++ )
+ for( size_t i=retval.offset; i<retval.length; i+=1 )
{
- charmap_t *charmap = __gg__get_charmap(field->encoding);
// Because we are dealing with a NumericDisplay that might have
- // the minus bit turned on, we need to mask it off
+ // the minus bit turned on, we will to mask it off as we copy the
+ // input characters over to retval:
retval.the_characters += charmap->set_digit_negative(data[i], false);
}
}
// We are set up to create the_characters;
if( figconst == normal_value_e )
{
- for( size_t i=retval.offset; i<retval.length; i++ )
+ for( size_t i=retval.offset; i<retval.length; i+=1 )
{
retval.the_characters += data[i];
}
}
else
{
- charmap_t *charmap = __gg__get_charmap(encoding);
char ch = charmap->figconst_character(figconst);
- for( size_t i=retval.offset; i<retval.length; i++ )
+ for( size_t i=retval.offset; i<retval.length; i+=1 )
{
retval.the_characters += ch;
}
retval.offset = 0;
retval.length = 0;
}
+
+ if( field )
+ {
+ cbl_encoding_t source_encoding = field->encoding;
+ const charmap_t *charmap_source = __gg__get_charmap(source_encoding);
+ charmap_t *charmap = __gg__get_charmap(encoding);
+ int stride = charmap->stride();
+
+ const unsigned char *data = field->data + field_o;
+ cbl_figconst_t figconst = (cbl_figconst_t)(field->attr & FIGCONST_MASK);
+ if( figconst == normal_value_e )
+ {
+ retval.offset = 0;
+ retval.length = field_s / stride;
+
+ if( field->type == FldNumericDisplay )
+ {
+ // The value is NumericDisplay, so we might need to adjust the offset
+ // and length:
+ if( field->attr & separate_e )
+ {
+ // Because the sign is a separate plus or minus, the length
+ // gets reduced by one:
+ retval.length = field_s - 1;
+ if( field->attr & leading_e )
+ {
+ // Because the sign character is LEADING, we increase the
+ // offset by one
+ retval.offset = 1;
+ }
+ }
+ }
+ // We are ready to convert from the input to UTF32
+ size_t converted_characters;
+ const char *converted = __gg__iconverter(source_encoding,
+ DEFAULT_32_ENCODING,
+ data+retval.offset * stride,
+ retval.length * stride,
+ &converted_characters);
+ // We are ready to copy the characters over:
+ for( size_t i=0; i<converted_characters; i+=width_of_utf32 )
+ {
+ // Because we are dealing with a NumericDisplay that might have
+ // the minus bit turned on, we will to mask it off as we copy the
+ // input characters over to retval:
+ cbl_char_t ch = charmap->getch(converted, i);
+ if( field->type == FldNumericDisplay )
+ {
+ if( charmap_source->is_like_ebcdic() )
+ {
+ // In EBCDIC, a flagged negative digit 0xF0 through 0xF9 becomes
+ // 0xD0 through 0xD9. Those represent the characters
+ // "}JKLMNOPQR", which, now that we are in UTF32 space, don't have
+ // the right bit pattern to be fixed with set_digit_negative().
+ // So, we fix it separately with this table: Note that location
+ // 0x7D, which is ASCII '{', becomes 0x30 '0'. See also that
+ // locations 0x4A through 0x52 become 0x31 through 0x39.
+ static const uint8_t fixit[256] =
+ {
+ 0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, 0x80, 0x09, 0x0a, 0x0b, 0x0c, 0x0d, 0x0e, 0x0f,
+ 0x10, 0x11, 0x12, 0x13, 0x14, 0x15, 0x16, 0x17, 0x81, 0x19, 0x1a, 0x1b, 0x1c, 0x1d, 0x1e, 0x1f,
+ 0x20, 0x21, 0x22, 0x23, 0x24, 0x25, 0x26, 0x27, 0x82, 0x29, 0x2a, 0x2b, 0x2c, 0x2d, 0x2e, 0x2f,
+ 0x30, 0x31, 0x32, 0x33, 0x34, 0x35, 0x36, 0x37, 0x83, 0x39, 0x3a, 0x3b, 0x3c, 0x3d, 0x3e, 0x3f,
+ 0x40, 0x41, 0x42, 0x43, 0x44, 0x45, 0x46, 0x47, 0x84, 0x49, 0x31, 0x32, 0x33, 0x34, 0x35, 0x36,
+ 0x37, 0x38, 0x39, 0x53, 0x54, 0x55, 0x56, 0x57, 0x85, 0x59, 0x5a, 0x5b, 0x5c, 0x5d, 0x5e, 0x5f,
+ 0x60, 0x61, 0x62, 0x63, 0x64, 0x65, 0x66, 0x67, 0x86, 0x69, 0x6a, 0x6b, 0x6c, 0x6d, 0x6e, 0x6f,
+ 0x70, 0x71, 0x72, 0x73, 0x74, 0x75, 0x76, 0x77, 0x87, 0x79, 0x7a, 0x7b, 0x7c, 0x30, 0x7e, 0x7f,
+ 0x80, 0x81, 0x82, 0x83, 0x84, 0x85, 0x86, 0x87, 0x88, 0x89, 0x8a, 0x8b, 0x8c, 0x8d, 0x8e, 0x8f,
+ 0x90, 0x91, 0x92, 0x93, 0x94, 0x95, 0x96, 0x97, 0x89, 0x99, 0x9a, 0x9b, 0x9c, 0x9d, 0x9e, 0x9f,
+ 0xa0, 0xa1, 0xa2, 0xa3, 0xa4, 0xa5, 0xa6, 0xa7, 0x8a, 0xa9, 0xaa, 0xab, 0xac, 0xad, 0xae, 0xaf,
+ 0xb0, 0xb1, 0xb2, 0xb3, 0xb4, 0xb5, 0xb6, 0xb7, 0x8b, 0xb9, 0xba, 0xbb, 0xbc, 0xbd, 0xbe, 0xbf,
+ 0xc0, 0xc1, 0xc2, 0xc3, 0xc4, 0xc5, 0xc6, 0xc7, 0x8c, 0xc9, 0xca, 0xcb, 0xcc, 0xcd, 0xce, 0xcf,
+ 0xd0, 0xd1, 0xd2, 0xd3, 0xd4, 0xd5, 0xd6, 0xd7, 0x8d, 0xd9, 0xda, 0xdb, 0xdc, 0xdd, 0xde, 0xdf,
+ 0xe0, 0xe1, 0xe2, 0xe3, 0xe4, 0xe5, 0xe6, 0xe7, 0x8e, 0xe9, 0xea, 0xeb, 0xec, 0xed, 0xee, 0xef,
+ 0xf0, 0xf1, 0xf2, 0xf3, 0xf4, 0xf5, 0xf6, 0xf7, 0x8f, 0xf9, 0xfa, 0xfb, 0xfc, 0xfd, 0xfe, 0xff,
+ };
+ ch = fixit[ch & 0xFF];
+ }
+ else
+ {
+ ch = charmap->set_digit_negative(ch, false);
+ }
+ }
+ retval.the_vectorxxxx.push_back(ch);
+ }
+ }
+ else
+ {
+ // We need to fill the field with a figurative constant:
+ // We are set up to create the_characters;
+ charmap_t *charmap32 = __gg__get_charmap(DEFAULT_32_ENCODING);
+ char ch = charmap32->figconst_character(figconst);
+ for( size_t i=retval.offset; i<retval.length; i+=1 )
+ {
+ retval.the_characters += ch;
+ retval.the_vectorxxxx.push_back(ch);
+ }
+ }
+ }
+ else
+ {
+ // There is no field, so leave the_characters empty.
+ retval.offset = 0;
+ retval.length = 0;
+ }
+
return retval;
}
match_lengths( normalized_operand &id_target,
const normalized_operand &id_source)
{
+ // This routine gets called when id_source is a figurative constant and
+ // we need the target to be the same length as the source
+
char ch = id_target.the_characters[0];
id_target.the_characters.clear();
for(size_t i=0; i<id_source.length; i++)
{
id_target.the_characters += ch;
}
+
+ cbl_char_t wch = id_target.the_vectorxxxx[0];
+ id_target.the_vectorxxxx.clear();
+ for(size_t i=0; i<id_source.length; i++)
+ {
+ id_target.the_vectorxxxx.push_back(wch);
+ }
id_target.length = id_source.length;
}
the_alpha_and_omega(const normalized_operand &id_before,
const normalized_operand &id_after,
const char * &alpha,
- const char * &omega)
+ const char * &omega,
+ char_it_c &alpha_it,
+ char_it_c &omega_it,
+ char_it_c notfound)
{
/* The 2023 ISO description of the AFTER and BEFORE phrases of the INSPECT
statement is, in a word, garbled.
omega = found;
// If not found, we just leave omega alone.
}
+
+ char_it_c omega_found = funky_find_wide(id_before.the_vectorxxxx.begin(),
+ id_before.the_vectorxxxx.end(),
+ alpha_it,
+ omega_it,
+ notfound );
+ if( omega_found != notfound )
+ {
+ // We found id_before within alpha/omega, so reduce omega
+ // to the found location.
+ omega_it = omega_found;
+ }
}
if( id_after.length )
{
// We found id_after in the alpha/omega segment. We update alpha
// be the character after the id_after substring.
- alpha = found + (end-start);
+ alpha = found + (end-start);
+ }
+ else
+ {
+ // We didn't find the id_after string, so we set the alpha to be
+ // omega. That means that no tally or replace operation will take
+ // because no characters will qualify.
+ alpha = omega;
+ }
+
+ char_it_c omega_found = funky_find_wide(id_after.the_vectorxxxx.begin(),
+ id_after.the_vectorxxxx.end(),
+ alpha_it,
+ omega_it,
+ notfound );
+ if( omega_found != notfound)
+ {
+ // We found id_after in the alpha/omega segment. We update alpha
+ // be the character after the id_after substring.
+ alpha_it = omega_found + (end-start);
}
else
{
// We didn't find the id_after string, so we set the alpha to be
// omega. That means that no tally or replace operation will take
// because no characters will qualify.
- alpha = omega;
+ alpha_it = omega_it;
}
}
+
}
static void
the_alpha_and_omega_backward( const normalized_operand &id_before,
const normalized_operand &id_after,
const char * &alpha,
- const char * &omega)
+ const char * &omega,
+ char_it_c &alpha_it,
+ char_it_c &omega_it,
+ char_it_c notfound)
{
- /* Not unlike the_alpha_and_omega(), but for handling BACKWARD.
+ /* Like the_alpha_and_omega(), but for handling BACKWARD.
"xyzxyzBEFORExyzxyzAFTERxyzxyzxyzxyzBEFORExyzxyzAFTERxyzxyz"
^ ^
{
// We found id_before within id_1, so change alpha to the character just
// to the right of BEFORE. Otherwise, we will leave alpha alone, so that
- // it stays at the beginning of id_1
+ // it stays at the beginning of id_1. That's because if you can't find
+ // id_before, it's as if there were no BEFORE phrase.
alpha = found + id_before.length;
}
+
+ char_it_c omega_found = funky_find_wide_backward(id_before.the_vectorxxxx.begin(),
+ id_before.the_vectorxxxx.end(),
+ alpha_it,
+ omega_it,
+ notfound );
+ if( omega_found != notfound )
+ {
+ // We found id_before within id_1, so change alpha to the character just
+ // to the right of BEFORE. Otherwise, we will leave alpha alone, so that
+ // it stays at the beginning of id_1
+ alpha_it = omega_found + id_before.length;
+ }
}
if( id_after.length )
{
// If the AFTER isn't found, we need to adjust things so that nothing
// happens.
- omega = id_1;
+ omega = alpha;
+ }
+
+ char_it_c omega_found = funky_find_wide_backward(id_after.the_vectorxxxx.begin(),
+ id_after.the_vectorxxxx.end(),
+ alpha_it,
+ omega_it,
+ notfound );
+ if( omega_found != notfound)
+ {
+ // We found id_after in id_1. We update omega to be
+ // at that location.
+ omega_it = omega_found;
+ }
+ else
+ {
+ // If the AFTER isn't found, we need to adjust things so that nothing
+ // happens.
+ omega_it = alpha_it;
}
}
}
next_comparand.omega
= next_comparand.alpha + normalized_id_1.length;
- the_alpha_and_omega_backward( normalized_id_4_before,
- normalized_id_4_after,
- next_comparand.alpha,
- next_comparand.omega);
+ next_comparand.alpha_it = normalized_id_1.the_vectorxxxx.begin();
+ next_comparand.omega_it = normalized_id_1.the_vectorxxxx.end();
+
+ the_alpha_and_omega_backward(normalized_id_4_before,
+ normalized_id_4_after,
+ next_comparand.alpha,
+ next_comparand.omega,
+ next_comparand.alpha_it,
+ next_comparand.omega_it,
+ normalized_id_1.the_vectorxxxx.end());
+
comparands.push_back(next_comparand);
break;
}
normalized_operand normalized_id_4_after
= normalize_id(id4_after, id4_after_o, id4_after_s, id1->encoding);
- the_alpha_and_omega_backward( normalized_id_4_before,
- normalized_id_4_after,
- next_comparand.alpha,
- next_comparand.omega);
+ next_comparand.alpha_it = normalized_id_1.the_vectorxxxx.begin();
+ next_comparand.omega_it = normalized_id_1.the_vectorxxxx.end();
+
+ the_alpha_and_omega_backward(normalized_id_4_before,
+ normalized_id_4_after,
+ next_comparand.alpha,
+ next_comparand.omega,
+ next_comparand.alpha_it,
+ next_comparand.omega_it,
+ normalized_id_1.the_vectorxxxx.end());
+
next_comparand.leading = true;
next_comparand.leading_count = 0;
comparands.push_back(next_comparand);
// We are now set up to accomplish the data flow described
// in the language specification. We loop through the
// the character positions in normalized_id_1:
- const char *leftmost = normalized_id_1.the_characters.c_str();
- const char *rightmost = leftmost + normalized_id_1.length;
- const char *the_end_of_the_world = rightmost;
+ char_it_c leftmost = normalized_id_1.the_vectorxxxx.begin();
+ char_it_c rightmost = leftmost + normalized_id_1.length;
+ char_it_c the_end_of_the_world = rightmost;
while( leftmost < rightmost )
{
+ size_t rightmost_delta = 0;
rightmost -= 1;
// We look at the rightmost position. If that position is within the
// alpha-to-omega qualified range, we check all possible matches:
for(size_t k=0; k<comparands.size(); k++)
{
- if( rightmost < comparands[k].alpha )
+ if( rightmost < comparands[k].alpha_it )
{
// This can't be a match, because rightmost is
// to the left of the comparand's alpha.
continue;
}
- if( rightmost + comparands[k].identifier_3.length > comparands[k].omega )
+ if( rightmost + comparands[k].identifier_3.length >
+ comparands[k].omega_it )
{
// This can't be a match, because the rightmost
// character of the comparand falls to the right
// of the comparand's omega
continue;
}
- if( rightmost + comparands[k].identifier_3.length > the_end_of_the_world )
+ if( rightmost + comparands[k].identifier_3.length >
+ the_end_of_the_world )
{
// This can't be a match, because the rightmost character of the
// comparand falls past the new edge of id_1 established by a prior
{
for(size_t m=0; m<comparands[k].identifier_3.length; m++)
{
- if( comparands[k].identifier_3.the_characters[m] != rightmost[m] )
+ if( comparands[k].identifier_3.the_vectorxxxx[m] != rightmost[m] )
{
possible_match = false;
break;
if( comparands[k].leading )
{
if( rightmost + comparands[k].identifier_3.length
- == comparands[k].omega)
+ == comparands[k].omega_it)
{
// This means that the match here is just the latest of a
// string of LEADING matches that started at .omega
comparands[k].leading_count += 1;
match = true;
+ comparands[k].omega_it -= comparands[k].identifier_3.length;
+ the_end_of_the_world = rightmost;
+ rightmost_delta = comparands[k].identifier_3.length-1;
}
}
break;
// all of the possible matches from here leftward to the alpha have
// to be true as well:
- if( (rightmost - comparands[k].alpha )
+ if( (rightmost - comparands[k].alpha_it )
% comparands[k].identifier_3.length == 0 )
{
// The remaining number of characters is correct for a match.
// Assume a match until we learn otherwise:
match = true;
- const char *local_left = rightmost;
+ char_it_c local_left = rightmost;
local_left -= comparands[k].identifier_3.length;
- while( local_left >= comparands[k].alpha )
+ while( local_left >= comparands[k].alpha_it )
{
for(size_t m=0; m<comparands[k].identifier_3.length; m++)
{
- if( comparands[k].identifier_3.the_characters[m]
+ if( comparands[k].identifier_3.the_vectorxxxx[m]
!= local_left[m] )
{
// We have a mismatched character, so no trailing match is
// Bump the result counter
id_2_results[comparands[k].id_2_index].result += 1;
- // Because we are scanning from right to left, we have to drag
- // the goalpost along with us to ensure that following
- // comparisions don't spill over into the characters we just matched.
+ // We have a match here at rightmost, so we need to set the end of
+ // the world here
the_end_of_the_world = rightmost;
+ // Adjust rightmost by the additional characters in a BACKWARD
+ // LEADING search:
+ rightmost -= rightmost_delta;
break;
}
}
cblc_index += 1;
// normalize it, according to the language specification.
normalized_operand normalized_id_1
- = normalize_id(id1, id1_o, id1_s, id1->encoding);
+ = normalize_id(id1, id1_o, id1_s, id1->encoding);
std::vector<comparand> comparands;
next_comparand.omega
= next_comparand.alpha + normalized_id_1.length;
+ next_comparand.alpha_it = normalized_id_1.the_vectorxxxx.begin();
+ next_comparand.omega_it = normalized_id_1.the_vectorxxxx.end();
+
the_alpha_and_omega(normalized_id_4_before,
normalized_id_4_after,
next_comparand.alpha,
- next_comparand.omega);
+ next_comparand.omega,
+ next_comparand.alpha_it,
+ next_comparand.omega_it,
+ normalized_id_1.the_vectorxxxx.end());
+
comparands.push_back(next_comparand);
break;
}
next_comparand.omega
= next_comparand.alpha + normalized_id_1.length;
+ next_comparand.alpha_it = normalized_id_1.the_vectorxxxx.begin();
+ next_comparand.omega_it = normalized_id_1.the_vectorxxxx.end();
+
normalized_operand normalized_id_4_before
= normalize_id(id4_before, id4_before_o, id4_before_s, id1->encoding);
the_alpha_and_omega(normalized_id_4_before,
normalized_id_4_after,
next_comparand.alpha,
- next_comparand.omega);
+ next_comparand.omega,
+ next_comparand.alpha_it,
+ next_comparand.omega_it,
+ normalized_id_1.the_vectorxxxx.end());
+
next_comparand.leading = true;
next_comparand.leading_count = 0;
comparands.push_back(next_comparand);
// We are now set up to accomplish the data flow described
// in the language specification. We loop through the
// the character positions in normalized_id_1:
- const char *leftmost = normalized_id_1.the_characters.c_str();
- const char *rightmost = leftmost + normalized_id_1.length;
+ char_it_c leftmost = normalized_id_1.the_vectorxxxx.begin();
+ char_it_c rightmost = leftmost + normalized_id_1.length;
while( leftmost < rightmost )
{
for(size_t k=0; k<comparands.size(); k++)
{
- if( leftmost < comparands[k].alpha )
+ if( leftmost < comparands[k].alpha_it )
{
// This can't be a match, because leftmost is
// to the left of the comparand's alpha.
continue;
}
- if( leftmost + comparands[k].identifier_3.length > comparands[k].omega )
+ if( leftmost + comparands[k].identifier_3.length > comparands[k].omega_it )
{
// This can't be a match, because the rightmost
// character of the comparand falls to the right
{
for(size_t m=0; m<comparands[k].identifier_3.length; m++)
{
- if( comparands[k].identifier_3.the_characters[m] != leftmost[m] )
+ if( comparands[k].identifier_3.the_vectorxxxx[m] != leftmost[m] )
{
possible_match = false;
break;
// 2) leftmost / (length_of_comparand ) = current_count
//
// I get chills every time I look at that.
+
if( comparands[k].leading )
{
// So far, so good.
- size_t count = (leftmost - comparands[k].alpha)
+ size_t count = ((leftmost - comparands[k].alpha_it))
/ comparands[k].identifier_3.length;
if( count == comparands[k].leading_count )
{
// all of the possible matches from here to the omega have to be
// true as well:
- if( (comparands[k].omega-leftmost)
+ if( (comparands[k].omega_it-leftmost)
% comparands[k].identifier_3.length == 0 )
{
// The remaining number of characters is correct for a match.
// Assume a match until we learn otherwise:
match = true;
- const char *local_left = leftmost;
+ char_it_c local_left = leftmost;
local_left += comparands[k].identifier_3.length;
- while( local_left < comparands[k].omega )
+ while( match && local_left < comparands[k].omega_it )
{
for(size_t m=0; m<comparands[k].identifier_3.length; m++)
{
- if( comparands[k].identifier_3.the_characters[m]
+ if( comparands[k].identifier_3.the_vectorxxxx[m]
!= local_left[m] )
{
// We have a mismatched character, so no trailing match is
// Add our results to the identifier_2 values:
-
for(size_t i = 0; i<id_2_results.size(); i++)
{
int rdigits;
next_comparand.omega
= next_comparand.alpha + normalized_id_1.length;
- the_alpha_and_omega_backward( normalized_id_4_before,
- normalized_id_4_after,
- next_comparand.alpha,
- next_comparand.omega);
+ next_comparand.alpha_it = normalized_id_1.the_vectorxxxx.begin();
+ next_comparand.omega_it = normalized_id_1.the_vectorxxxx.end();
+
+ the_alpha_and_omega_backward(normalized_id_4_before,
+ normalized_id_4_after,
+ next_comparand.alpha,
+ next_comparand.omega,
+ next_comparand.alpha_it,
+ next_comparand.omega_it,
+ normalized_id_1.the_vectorxxxx.end());
+
+
comparands.push_back(next_comparand);
break;
}
normalized_operand normalized_id_4_after
= normalize_id(id4_after, id4_after_o, id4_after_s, id1->encoding);
- the_alpha_and_omega_backward( normalized_id_4_before,
- normalized_id_4_after,
- next_comparand.alpha,
- next_comparand.omega);
+ next_comparand.alpha_it = normalized_id_1.the_vectorxxxx.begin();
+ next_comparand.omega_it = normalized_id_1.the_vectorxxxx.end();
+
+ the_alpha_and_omega_backward(normalized_id_4_before,
+ normalized_id_4_after,
+ next_comparand.alpha,
+ next_comparand.omega,
+ next_comparand.alpha_it,
+ next_comparand.omega_it,
+ normalized_id_1.the_vectorxxxx.end());
+
next_comparand.leading = true;
next_comparand.leading_count = 0;
next_comparand.first = true;
}
}
- const char *leftmost = normalized_id_1.the_characters.c_str();
- const char *rightmost = leftmost + normalized_id_1.length;
- const char *the_end_of_the_world = rightmost;
+ // We can now look through normalized_id_1 and replace characters:
+
+ char_it_c leftmost = normalized_id_1.the_vectorxxxx.begin();
+ char_it_c rightmost = leftmost + normalized_id_1.length;
+ char_it_c the_end_of_the_world = rightmost;
while( leftmost < rightmost )
{
+ size_t rightmost_delta = 0;
+
rightmost -= 1;
// We look at the rightmost position. If that position is within the
// alpha-to-omega qualified range, we check all possible matches:
for(size_t k=0; k<comparands.size(); k++)
{
- if( rightmost < comparands[k].alpha )
+ if( rightmost < comparands[k].alpha_it )
{
// This can't be a match, because rightmost is
// to the left of the comparand's alpha.
continue;
}
- if( rightmost + comparands[k].identifier_3.length > comparands[k].omega )
+ if( rightmost + comparands[k].identifier_3.length > comparands[k].omega_it )
{
// This can't be a match, because the rightmost
// character of the comparand falls to the right
{
for(size_t m=0; m<comparands[k].identifier_3.length; m++)
{
- if( comparands[k].identifier_3.the_characters[m] != rightmost[m] )
+ if( comparands[k].identifier_3.the_vectorxxxx[m] != rightmost[m] )
{
possible_match = false;
break;
if( comparands[k].leading )
{
if( rightmost
- + comparands[k].identifier_3.length
- + comparands[k].leading_count
- == comparands[k].omega)
+ + comparands[k].identifier_3.length * (comparands[k].leading_count +1)
+ == comparands[k].omega_it)
{
// This means that the match here is just the latest of a
// string of LEADING matches that started at .omega
comparands[k].leading_count += 1;
match = true;
+ rightmost_delta = comparands[k].identifier_3.length-1;
}
}
break;
// all of the possible matches from here leftward to the alpha have
// to be true as well:
- if( (rightmost - comparands[k].alpha )
+ if( (rightmost - comparands[k].alpha_it )
% comparands[k].identifier_3.length == 0 )
{
// The remaining number of characters is correct for a match.
// Assume a match until we learn otherwise:
match = true;
- const char *local_left = rightmost;
+ char_it_c local_left = rightmost;
local_left -= comparands[k].identifier_3.length;
- while( local_left >= comparands[k].alpha )
+ while( local_left >= comparands[k].alpha_it )
{
for(size_t m=0; m<comparands[k].identifier_3.length; m++)
{
- if( comparands[k].identifier_3.the_characters[m]
+ if( comparands[k].identifier_3.the_vectorxxxx[m]
!= local_left[m] )
{
// We have a mismatched character, so no trailing match is
// with the characters from normalized_id_5
//fprintf(stderr, "Rule: %ld %p %s\n", k+1, rightmost, rightmost);
- size_t index = rightmost - normalized_id_1.the_characters.c_str();
+ size_t index = rightmost - normalized_id_1.the_vectorxxxx.begin();
for( size_t l = 0;
l < comparands[k].identifier_5.length;
l++ )
{
- char ch = comparands[k].identifier_5.
- the_characters[l];
- normalized_id_1.the_characters[index++] = ch;
+ cbl_char_t ch = comparands[k].identifier_5.
+ the_vectorxxxx[l];
+ normalized_id_1.the_vectorxxxx[index++] = ch;
}
the_end_of_the_world = rightmost;
-
+ rightmost -= rightmost_delta;
break;
}
}
}
// Here is where we take the characters from normalized_id_1 and put them
- // back into identifier_1. There is some special processing to make sure
- // an embedded sign in a NumericDisplay survives the processing.
+ // back into identifier_1.
+
+ charmap_t *charmap = __gg__get_charmap(id1->encoding);
+ // Wastefully prefill id_1 with spaces in case the processing resulted in a
+ // string shorter than the original. (There is always the possiblity that
+ // a UTF-8 or UTF-16 codeset pair got replaced with a single character.) Do
+ // this before calling __gg__converter, because both mapped_character and
+ // __gg__iconverter use the same static buffer.
unsigned char *id1_data = id1->data + id1_o;
- int index_dest = normalized_id_1.offset;
- if( id1->type == FldNumericDisplay )
- {
- for(size_t i=0; i<normalized_id_1.length; i++)
- {
- charmap_t *charmap = __gg__get_charmap(id1->encoding);
- id1_data[index_dest] = normalized_id_1.the_characters[i];
- if( charmap->is_digit_negative(normalized_id_1.the_characters[i]) )
- {
- id1_data[index_dest]
- = charmap->set_digit_negative(id1_data[index_dest], true);
- }
- else
- {
- id1_data[index_dest]
- = charmap->set_digit_negative(id1_data[index_dest], false);
- }
- index_dest += 1;
- }
- }
- else
- {
- for(size_t i=0; i<normalized_id_1.length; i++)
- {
- id1_data[index_dest++] = normalized_id_1.the_characters[i];
- }
- }
+ charmap->memset(id1_data, charmap->mapped_character(ascii_space), id1_s);
+
+ // We've been working in UTF32; we convert back to the original id1 encoding.
+ size_t bytes_converted;
+ const char *converted = __gg__iconverter( DEFAULT_32_ENCODING,
+ id1->encoding,
+ normalized_id_1.the_vectorxxxx.data(),
+ normalized_id_1.length*width_of_utf32,
+ &bytes_converted) ;
+ // And move those characters into place in id_1:
+ memcpy(id1_data,
+ converted,
+ std::min(bytes_converted, id1_s));
+
return;
}
size_t id1_s = __gg__treeplet_1s[cblc_index];
cblc_index += 1;
-#if 0
- fprintf(stderr, "%s:%d: '%.*s' id1_o %zu, id1_s %zu\n", __func__, __LINE__,
- int(id1_s), (char*)id1->data, id1_o, id1_s);
-#endif
-
// normalize it, according to the language specification.
normalized_operand normalized_id_1
= normalize_id(id1, id1_o, id1_s, id1->encoding);
-#if 0
- fprintf(stderr, "%s:%d: normalized_id_1 '%s' offset %zu, length %zu\n", __func__, __LINE__,
- normalized_id_1.the_characters.c_str(),
- normalized_id_1.offset,
- normalized_id_1.length );
-#endif
-
+
std::vector<comparand> comparands;
// Pick up the count of operations:
next_comparand.omega
= next_comparand.alpha + normalized_id_1.length;
+ next_comparand.alpha_it = normalized_id_1.the_vectorxxxx.begin();
+ next_comparand.omega_it = normalized_id_1.the_vectorxxxx.end();
+
the_alpha_and_omega(normalized_id_4_before,
normalized_id_4_after,
next_comparand.alpha,
- next_comparand.omega);
+ next_comparand.omega,
+ next_comparand.alpha_it,
+ next_comparand.omega_it,
+ normalized_id_1.the_vectorxxxx.end());
comparands.push_back(next_comparand);
break;
}
normalized_operand normalized_id_4_after
= normalize_id(id4_after, id4_after_o, id4_after_s, id1->encoding);
+ next_comparand.alpha_it = normalized_id_1.the_vectorxxxx.begin();
+ next_comparand.omega_it = normalized_id_1.the_vectorxxxx.end();
+
the_alpha_and_omega(normalized_id_4_before,
normalized_id_4_after,
next_comparand.alpha,
- next_comparand.omega);
+ next_comparand.omega,
+ next_comparand.alpha_it,
+ next_comparand.omega_it,
+ normalized_id_1.the_vectorxxxx.end());
+
next_comparand.leading = true;
next_comparand.leading_count = 0;
next_comparand.first = true;
// We are now set up to accomplish the data flow described
// in the language specification. We loop through the
// the character positions in normalized_id_1:
- const char *leftmost
- = normalized_id_1.the_characters.c_str();
- const char *rightmost
- = leftmost + normalized_id_1.length;
+ char_it_c leftmost = normalized_id_1.the_vectorxxxx.begin();
+ char_it_c rightmost = leftmost + normalized_id_1.length;
while( leftmost < rightmost )
{
for(size_t k=0; k<comparands.size(); k++)
{
- if( leftmost < comparands[k].alpha )
+ if( leftmost < comparands[k].alpha_it )
{
// This can't be a match, because leftmost is
// to the left of the comparand's alpha.
continue;
}
if( leftmost + comparands[k].identifier_3.length
- > comparands[k].omega )
+ > comparands[k].omega_it )
{
// This can't be a match, because the rightmost
// character of the comparand falls to the right
{
for(size_t m=0; m<comparands[k].identifier_3.length; m++)
{
- if( comparands[k].identifier_3.the_characters[m]
+ if( comparands[k].identifier_3.the_vectorxxxx[m]
!= leftmost[m] )
{
possible_match = false;
if( comparands[k].leading )
{
// So far, so good.
- size_t count = (leftmost - comparands[k].alpha)
+ size_t count = (leftmost - comparands[k].alpha_it)
/ comparands[k].identifier_3.length;
if( count == comparands[k].leading_count )
{
// all of the possible matches from here to the omega have to be
// true as well:
- if( (comparands[k].omega-leftmost)
+ if( (comparands[k].omega_it-leftmost)
% comparands[k].identifier_3.length == 0 )
{
// The remaining number of characters is correct for a match.
// Assume a match until we learn otherwise:
match = true;
- const char *local_left = leftmost;
+ char_it_c local_left = leftmost;
local_left += comparands[k].identifier_3.length;
- while( local_left < comparands[k].omega )
+ while( local_left < comparands[k].omega_it )
{
for(size_t m=0; m<comparands[k].identifier_3.length; m++)
{
- if( comparands[k].identifier_3.the_characters[m]
+ if( comparands[k].identifier_3.the_vectorxxxx[m]
!= local_left[m] )
{
// We have a mismatched character, so no trailing match is
// with the characters from normalized_id_5
size_t index = leftmost
- - normalized_id_1.the_characters.c_str();
+ - normalized_id_1.the_vectorxxxx.begin();
for( size_t l = 0;
l < comparands[k].identifier_5.length;
l++ )
{
char ch = comparands[k].identifier_5.
- the_characters[l];
- normalized_id_1.the_characters[index++] = ch;
+ the_vectorxxxx[l];
+ normalized_id_1.the_vectorxxxx[index++] = ch;
}
// Adjust the leftmost pointer to point to
// the rightmost character of the matched
}
// Here is where we take the characters from normalized_id_1 and put them
- // back into identifier_1. There is some special processing to make sure
- // an embedded sign in a NumericDisplay survives the processing.
+ // back into identifier_1.
+
+ charmap_t *charmap = __gg__get_charmap(id1->encoding);
+ // Wastefully prefill id_1 with spaces in case the processing resulted in a
+ // string shorter than the original. (There is always the possiblity that
+ // a UTF-8 or UTF-16 codeset pair got replaced with a single character.) Do
+ // this before calling __gg__converter, because both mapped_character and
+ // __gg__iconverter use the same static buffer.
unsigned char *id1_data = id1->data + id1_o;
- int index_dest = normalized_id_1.offset;
- if( id1->type == FldNumericDisplay )
- {
- for(size_t i=0; i<normalized_id_1.length; i++)
- {
- charmap_t *charmap = __gg__get_charmap(id1->encoding);
- id1_data[index_dest] = normalized_id_1.the_characters[i];
- if( charmap->is_digit_negative(normalized_id_1.the_characters[i]) )
- {
- id1_data[index_dest]
- = charmap->set_digit_negative(id1_data[index_dest], true);
- }
- else
- {
- id1_data[index_dest]
- = charmap->set_digit_negative(id1_data[index_dest], false);
- }
- index_dest += 1;
- }
- }
- else
- {
- for(size_t i=0; i<normalized_id_1.length; i++)
- {
- id1_data[index_dest++] = normalized_id_1.the_characters[i];
- }
- }
+ charmap->memset(id1_data, charmap->mapped_character(ascii_space), id1_s);
+
+ // We've been working in UTF32; we convert back to the original id1 encoding.
+ size_t bytes_converted;
+ const char *converted = __gg__iconverter( DEFAULT_32_ENCODING,
+ id1->encoding,
+ normalized_id_1.the_vectorxxxx.data(),
+ normalized_id_1.length*width_of_utf32,
+ &bytes_converted) ;
+ // And move those characters into place in id_1:
+ memcpy(id1_data,
+ converted,
+ std::min(bytes_converted, id1_s));
return;
}
-static char *
-normalize_for_inspect_format_4( size_t *dest_size,
- const cblc_field_t *var,
- size_t var_offset,
- size_t var_size,
- cbl_encoding_t encoding)
- {
- // Returns a malloced pointer; the caller needs to free it.
- char *retval;
- retval = static_cast<char *>(malloc(var_size+1));
+static std::u32string
+normalize_for_inspect_format_4(const cblc_field_t *var,
+ size_t var_offset,
+ size_t var_size,
+ cbl_encoding_t source_encoding)
+ {
+ std::u32string retval;
if(var)
{
+ const charmap_t *charmap_var = __gg__get_charmap(source_encoding);
+ charmap_t *charmap32 = __gg__get_charmap(DEFAULT_32_ENCODING);
+
cbl_figconst_t figconst =
static_cast<cbl_figconst_t>(var->attr & FIGCONST_MASK);
+ // We have a corner case to deal with:
+ if( strcmp(var->name, "NULLS") == 0 )
+ {
+ figconst = null_value_e;
+ }
+
if( figconst )
{
// Build up an var_size array of figconst characters
- charmap_t *charmap = __gg__get_charmap(encoding);
- char figchar = '\0';
+ cbl_char_t figchar = '\0';
switch( figconst )
{
case low_value_e :
- figchar = charmap->low_value_character();
+ figchar = charmap32->low_value_character();
break;
case zero_value_e :
- figchar = charmap->mapped_character(ascii_0);
+ figchar = charmap32->mapped_character(ascii_0);
break;
case space_value_e :
- figchar = charmap->mapped_character(ascii_space);
+ figchar = charmap32->mapped_character(ascii_space);
break;
case quote_value_e :
- figchar = charmap->quote_character();
+ figchar = charmap32->quote_character();
break;
case high_value_e :
- figchar = charmap->high_value_character();
+ {
+ if( __gg__high_value_character == DEFAULT_HIGH_VALUE_8 )
+ {
+ // See the comments where these constants are defined.
+ if(charmap_var->stride() == 1)
+ {
+ if(charmap_var->is_like_ebcdic())
+ {
+ // This maps back to 0xFF in CP1140
+ figchar = EBCDIC_HIGH_VALUE_32;
+ }
+ else
+ {
+ // This maps back to 0xFF in CP1252
+ figchar = ASCII_HIGH_VALUE_32;
+ }
+ }
+ else if(charmap_var->stride() == 2)
+ {
+ figchar = UTF16_HIGH_VALUE_32;
+ }
+ else
+ {
+ figchar = UTF32_HIGH_VALUE_32;
+ }
+ }
+ else
+ {
+ figchar = charmap32->mapped_character(__gg__high_value_character);
+ }
break;
+ }
case null_value_e:
break;
default:
abort();
break;
}
- memset(retval, figchar, var_size);
- retval[var_size] = '\0';
+ retval.push_back(figchar);
}
else
{
- // It's not a figurative constant, so convert var to the target encoding.
- memcpy(retval,
- __gg__iconverter(var->encoding,
- encoding,
- PTRCAST(char, var->data) + var_offset,
+ // It's not a figurative constant, so convert var to UTF32.
+ size_t converted_bytes;
+ const char *converted = __gg__iconverter(
+ var->encoding,
+ DEFAULT_32_ENCODING,
+ var->data + var_offset,
var_size,
- dest_size),
- var_size);
- retval[var_size] = '\0';
+ &converted_bytes);
+ void *duped = __gg__memdup(converted, converted_bytes);
+ for(size_t i=0; i<converted_bytes; i+=width_of_utf32)
+ {
+ cbl_char_t ch = charmap32->getch(duped, i);
+ retval.push_back(ch);
+ }
+ free(duped);
}
}
- else
- {
- retval = nullptr;
- *dest_size = 0;
- }
return retval;
}
// constant.
cbl_figconst_t figconst_original =
- static_cast<cbl_figconst_t>(original->attr & FIGCONST_MASK);
+ static_cast<cbl_figconst_t>(original->attr & FIGCONST_MASK);
cbl_figconst_t figconst_replacement =
- static_cast<cbl_figconst_t>(replacement->attr & FIGCONST_MASK);
+ static_cast<cbl_figconst_t>(replacement->attr & FIGCONST_MASK);
int figswitch = (figconst_original ? 2 : 0) + (figconst_replacement ? 1 : 0);
switch( figswitch )
{
// Neither are figconst; we leave the sizes alone
break;
case 1:
- // Only replacement is figconst, so we make its size the same as the
- // original. This will cause CONVERTING "ABC" TO ZERO to be the same as
+ // Only replacement is figconst, so we make its size -1
+ // This will cause CONVERTING "ABC" TO ZERO to be the same as
// CONVERTING "ABC" TO "000"
- replacement_size = original_size;
+ replacement_size = (size_t)(-1LL);
break;
case 2:
// Only original is figconst. Set the size to one. (This is necessary
after_size = 1;
}
- size_t psz_input_size ;
- size_t psz_original_size ;
- size_t psz_replacement_size;
- size_t psz_after_size ;
- size_t psz_before_size ;
-
bool all = (replacement_size == (size_t)(-1LL));
if( all )
{
// A replacement_size of -1 means that the statement is something like
// INSPECT XYZ CONVERTING "abcxyz" to ALL "?" That means replacement is
// a single character. We need to convert it to the target encoding.
- replacement_size = 1;
+ const charmap_t * charmap = __gg__get_charmap(input->encoding);
+ replacement_size = charmap->stride();
}
- char *psz_input = normalize_for_inspect_format_4(&psz_input_size , input , input_offset , input_size , input->encoding);
- char *psz_original = normalize_for_inspect_format_4(&psz_original_size , original , original_offset , original_size , input->encoding);
- char *psz_replacement = normalize_for_inspect_format_4(&psz_replacement_size, replacement, replacement_offset, replacement_size, input->encoding);
- char *psz_after = normalize_for_inspect_format_4(&psz_after_size , after , after_offset , after_size , input->encoding);
- char *psz_before = normalize_for_inspect_format_4(&psz_before_size , before , before_offset , before_size , input->encoding);
+ std::u32string str_input = normalize_for_inspect_format_4(input , input_offset , input_size , input->encoding);
+ std::u32string str_original = normalize_for_inspect_format_4(original , original_offset , original_size , input->encoding);
+ std::u32string str_replacement = normalize_for_inspect_format_4(replacement, replacement_offset, replacement_size, input->encoding);
+ std::u32string str_after = normalize_for_inspect_format_4(after , after_offset , after_size , input->encoding);
+ std::u32string str_before = normalize_for_inspect_format_4(before , before_offset , before_size , input->encoding);
if( all )
{
- // We now expand the single-byte replacement to be the same length as
+ // We now expand the single-character replacement to be the same length as
// original.
- psz_replacement_size = psz_original_size;
- psz_replacement = static_cast<char *>(realloc(psz_replacement, psz_replacement_size));
- memset(psz_replacement, psz_replacement[0], psz_replacement_size);
+ cbl_char_t ch = str_replacement[0];
+ str_replacement.clear();
+ for(size_t i=0; i<str_original.size(); i++)
+ {
+ str_replacement.push_back(ch);
+ }
}
- // Use a simple map to make this O(N), rather than an O(N-squared),
+ // Use a map to make this O(N), rather than an O(N-squared),
// computational complexity
- static const unsigned char map_init[256] =
- {
- 0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, 0x08, 0x09, 0x0a, 0x0b, 0x0c, 0x0d, 0x0e, 0x0f,
- 0x10, 0x11, 0x12, 0x13, 0x14, 0x15, 0x16, 0x17, 0x18, 0x19, 0x1a, 0x1b, 0x1c, 0x1d, 0x1e, 0x1f,
- 0x20, 0x21, 0x22, 0x23, 0x24, 0x25, 0x26, 0x27, 0x28, 0x29, 0x2a, 0x2b, 0x2c, 0x2d, 0x2e, 0x2f,
- 0x30, 0x31, 0x32, 0x33, 0x34, 0x35, 0x36, 0x37, 0x38, 0x39, 0x3a, 0x3b, 0x3c, 0x3d, 0x3e, 0x3f,
- 0x40, 0x41, 0x42, 0x43, 0x44, 0x45, 0x46, 0x47, 0x48, 0x49, 0x4a, 0x4b, 0x4c, 0x4d, 0x4e, 0x4f,
- 0x50, 0x51, 0x52, 0x53, 0x54, 0x55, 0x56, 0x57, 0x58, 0x59, 0x5a, 0x5b, 0x5c, 0x5d, 0x5e, 0x5f,
- 0x60, 0x61, 0x62, 0x63, 0x64, 0x65, 0x66, 0x67, 0x68, 0x69, 0x6a, 0x6b, 0x6c, 0x6d, 0x6e, 0x6f,
- 0x70, 0x71, 0x72, 0x73, 0x74, 0x75, 0x76, 0x77, 0x78, 0x79, 0x7a, 0x7b, 0x7c, 0x7d, 0x7e, 0x7f,
- 0x80, 0x81, 0x82, 0x83, 0x84, 0x85, 0x86, 0x87, 0x88, 0x89, 0x8a, 0x8b, 0x8c, 0x8d, 0x8e, 0x8f,
- 0x90, 0x91, 0x92, 0x93, 0x94, 0x95, 0x96, 0x97, 0x98, 0x99, 0x9a, 0x9b, 0x9c, 0x9d, 0x9e, 0x9f,
- 0xa0, 0xa1, 0xa2, 0xa3, 0xa4, 0xa5, 0xa6, 0xa7, 0xa8, 0xa9, 0xaa, 0xab, 0xac, 0xad, 0xae, 0xaf,
- 0xb0, 0xb1, 0xb2, 0xb3, 0xb4, 0xb5, 0xb6, 0xb7, 0xb8, 0xb9, 0xba, 0xbb, 0xbc, 0xbd, 0xbe, 0xbf,
- 0xc0, 0xc1, 0xc2, 0xc3, 0xc4, 0xc5, 0xc6, 0xc7, 0xc8, 0xc9, 0xca, 0xcb, 0xcc, 0xcd, 0xce, 0xcf,
- 0xd0, 0xd1, 0xd2, 0xd3, 0xd4, 0xd5, 0xd6, 0xd7, 0xd8, 0xd9, 0xda, 0xdb, 0xdc, 0xdd, 0xde, 0xdf,
- 0xe0, 0xe1, 0xe2, 0xe3, 0xe4, 0xe5, 0xe6, 0xe7, 0xe8, 0xe9, 0xea, 0xeb, 0xec, 0xed, 0xee, 0xef,
- 0xf0, 0xf1, 0xf2, 0xf3, 0xf4, 0xf5, 0xf6, 0xf7, 0xf8, 0xf9, 0xfa, 0xfb, 0xfc, 0xfd, 0xfe, 0xff
- };
- unsigned char map[256];
-
- // Initialize the map to a one-to-one correspondence.
- memcpy(map, map_init, 256);
+ std::unordered_map<cbl_char_t, cbl_char_t>map;
+ typedef std::unordered_map<cbl_char_t, cbl_char_t>::const_iterator map_it_t ;
// The rule is, if the same character appears more than once in the
// original (which is identifier-6), then the first occurrence of the
// matching character in replacement is used. So, we create the map
// backwards. The one closest to zero will win.
- for(size_t i=original_size-1; i<original_size; i--)
+ for(size_t i=str_original.size()-1; i<str_original.size(); i--)
{
- map[ (unsigned char )psz_original[i] ] = (unsigned char )psz_replacement[i];
+ map[str_original[i]] = str_replacement[i];
}
- char *pstart = NULL;
- const char *pend = NULL;
- if( backward )
+ size_t leftmost_i; // Leftmost index to replace at.
+ size_t rightmost_i; // Rightmost+1 index to replace at.
+
+ if( !backward )
{
- if( before_size )
+ // This is a forward conversion. We look for the first instance
+ // of str_after from the left. And then we look for the first instance
+ // of str_before after that. When there is no str_before, we move the
+ // rightmost limit to the end of str_input, as if there were no BEFORE
+ // phrase:
+
+ if( str_after.empty() )
+ {
+ // There is no AFTER phrase, so we start from the left.
+ leftmost_i = 0;
+ }
+ else
{
- size_t nfound = std::string(psz_input).rfind(psz_before);
- if( nfound == std::string::npos )
+ size_t nfound = str_input.find(str_after);
+ if( nfound != std::u32string::npos )
{
- // The BEFORE string isn't in the input, so we will scan from
- // the leftmost character
- pstart = psz_input;
+ // Move the left limit to one character past the found element
+ leftmost_i = nfound + str_after.size();
}
else
{
- pstart = psz_input + nfound;
- if( !pstart )
- {
- pstart = psz_input;
- }
- pstart += before_size;
+ // We didn't find the after phrase, so we move the left limit to the
+ // end of input, which means nothing will be replaced
+ leftmost_i = str_input.size();
}
}
- else
+
+ // At this point, leftmost_i has been set to something. Look for the
+ // BEFORE phrase somewhere to the right of it:
+
+ if( str_before.empty() )
{
- pstart = psz_input;
+ // There is no BEFORE phrase, so set rightmost to the end of the input
+ rightmost_i = str_input.size();
}
-
- if( after_size )
+ else
{
- size_t nfound = std::string(psz_input).rfind(psz_after);
- if( nfound == std::string::npos )
+ // Look for BEFORE to the right of leftmost_i:
+ size_t nfound = str_input.find(str_before, leftmost_i);
+ if( nfound != std::u32string::npos )
{
- nfound = input_size;
+ // We found the BEFORE phrase.
+ rightmost_i = nfound;
+ }
+ else
+ {
+ // We didn't find the BEFORE phrase; IOS says to treat this situation
+ // as if there were no BEFORE phrase
+ rightmost_i = str_input.size();
}
- pend = psz_input + nfound;
- }
- if( !pend )
- {
- pend = psz_input+input_size;
}
}
else
{
- if( after_size )
+ // We are doing a BACKWARD conversion. So, we look for the AFTER phrase
+ // and use that to establish the rightmost limit. And we look for the
+ // BEFORE to the left of AFTER phrase and use that to establish the
+ // leftmost limit
+
+ if( str_after.empty() )
{
- pstart = strstr(psz_input, psz_after);
+ // There is no AFTER phrase, so we set the rightmost limit to the end
+ // of the input:
+ rightmost_i = str_input.size();
}
- if( !pstart )
+ else
{
- pstart = psz_input;
+ // Start from the right and look for AFTER
+ size_t nfound = str_input.rfind(str_after, str_input.size());
+ if( nfound != std::u32string::npos )
+ {
+ // We found str_after, so its location becomes rightmost
+ rightmost_i = nfound;
+ }
+ else
+ {
+ // We didn't find str_after, so we move rightmost all the way to the
+ // left, so that nothing will ever be found.
+ rightmost_i = 0;
+ }
}
- pstart += after_size;
-
- if( before_size )
+ // rightmost_i has been established, so now look for BEFORE to the left
+ // of it
+ if( str_before.empty() )
{
- pend = strstr(psz_input, psz_before);
+ // There is no str_before, so the left limit is all the way to the left
+ leftmost_i = 0;
}
- if( !pend )
+ else
{
- pend = psz_input + input_size;
+ size_t nfound = str_input.rfind(str_before, rightmost_i);
+ if( nfound != std::u32string::npos )
+ {
+ // We found BEFORE, so we put the left limit just to the right of
+ // where we found it:
+ leftmost_i = nfound + str_before.size();
+ }
+ else
+ {
+ // Not finding the BEFORE phrase is the same as the BEFORE phrase
+ // not having been specified:
+ leftmost_i = 0;
+ }
}
}
-
- while(pstart && pstart < pend)
+ // leftmost_i and rightmost_i have been established. Do the conversion of
+ // characters inside those limits:
+ for(size_t i=leftmost_i; i<rightmost_i; i++)
{
- *pstart = map[(unsigned char)*pstart];
- pstart += 1;
+ cbl_char_t ch = str_input[i];
+ map_it_t cvt = map.find(ch);
+ if( cvt != map.end() )
+ {
+ str_input[i] = cvt->second;
+ }
}
- memcpy(input->data+input_offset, psz_input, input_size);
+ // We now take the converted str_input, and put it back into id_1:
- free(psz_input );
- free(psz_original );
- free(psz_replacement );
- free(psz_after );
- free(psz_before );
+ size_t bytes_converted;
+ const char *converted = __gg__iconverter(DEFAULT_32_ENCODING,
+ input->encoding,
+ str_input.data(),
+ str_input.size()*width_of_utf32,
+ &bytes_converted) ;
+
+ // And move those characters into place in input:
+ memcpy(input->data + input_offset,
+ converted,
+ std::min(bytes_converted, input_size));
}
static void
// Get the charmap after the move, because it can mess with the
// static 'to' buffer.
charmap_t *charmap = __gg__get_charmap(field->encoding);
- memset(to, charmap->mapped_character(ascii_space), dest_length-count);
+ charmap->memset(to, charmap->mapped_character(ascii_space), dest_length-count);
}
else
{
brute_force_trim(char *str, cbl_encoding_t encoding)
{
charmap_t *charmap = __gg__get_charmap(encoding);
+ int stride = charmap->stride();
char *retval = str;
- while( *retval == charmap->mapped_character(ascii_space) )
+
+ while( charmap->getch(retval, size_t(0))
+ == charmap->mapped_character(ascii_space) )
{
- retval += 1;
+ retval += stride;
}
- char *p = retval + strlen(retval)-1;
- while( p > retval && *p == charmap->mapped_character(ascii_space) )
+ char *p = retval + strlen(retval)-stride;
+ while( p > retval
+ && ( charmap->getch(p, size_t(0))
+ == charmap->mapped_character(ascii_space)) )
{
- *p-- = NULLCH;
+ charmap->putch(NULLCH, p, size_t(0));
+ p -= stride;
}
return retval;
}
size_t index_cblc = 0 ;
// Pick up the target
- const cblc_field_t *tgt = ref[index_cblc];
+ const cblc_field_t *tgt = ref[index_cblc];
- // Pick up the target encoding, which we assume controls all the parameters
+ // Pick up the target encoding, which according to the ISO specification
+ // controls all the parameters.
cbl_encoding_t tgt_encoding = tgt->encoding;
charmap_t *charmap = __gg__get_charmap(tgt_encoding);
-
- char figlow[2] = {(char)__gg__low_value_character, 0x00};
- char fighigh[2] = {(char)__gg__high_value_character, 0x00};
- char figzero[2] = {(char)charmap->mapped_character(ascii_zero), 0x00};
- char figquote[2] = {(char)charmap->mapped_character(__gg__quote_character), 0x00};
- char figspace[2] = {(char)charmap->mapped_character(ascii_space), 0x00};
+ int stride = charmap->stride();
// Pick up the rest of the parameters
size_t tgt_o = ref_o[index_cblc];
size_t tgt_s = ref_s[index_cblc];
index_cblc += 1;
+
char *dest = reinterpret_cast<char *>(tgt->data + tgt_o);
- ssize_t dest_length = tgt_s;
+ size_t dest_length = tgt_s/stride;
// Skip over the index of POINTER:
index_cblc += 1;
// Pick up the pointer, if any
- ssize_t pointer = 0;
+ size_t pointer = 0;
+ int overflow = 0;
if( ref[INDEX_OF_POINTER] )
{
int rdigits;
- pointer = (size_t)__gg__binary_value_from_qualified_field(
+ int p = (size_t)__gg__binary_value_from_qualified_field(
&rdigits,
ref [INDEX_OF_POINTER],
ref_o[INDEX_OF_POINTER],
ref_s[INDEX_OF_POINTER]
);
- pointer -= 1;
+ if( p<0 )
+ {
+ overflow = 1;
+ }
+ pointer = p - 1;
}
- int overflow = 0;
-
// Make sure that the destination pointer is within the destination
- if( pointer >= 0 || pointer < dest_length )
+ if( pointer < dest_length )
{
// We are go for looping through identifier-2 values:
for( size_t i=0; i<N; i++ )
{
+ // Pick up the number of M identifier-1 values for this list of
+ // identifier-2 values:
size_t M = integers[index_int++];
// Pick up the identifier_2 DELIMITED BY value
- const cblc_field_t *id2 = ref[index_cblc];
- size_t id2_o = ref_o[index_cblc];
- size_t id2_s = ref_s[index_cblc];
+ std::u32string str_id2 = normalize_for_inspect_format_4(
+ ref[index_cblc],
+ ref_o[index_cblc],
+ ref_s[index_cblc],
+ tgt_encoding);
index_cblc += 1;
- char *piece;
- const char *piece_end;
- cbl_figconst_t figconst = (cbl_figconst_t) ( id2
- ? (id2->attr & FIGCONST_MASK)
- : 0 );
- switch(figconst)
- {
- case low_value_e:
- piece = figlow;
- piece_end = piece + 1;
- break;
- case zero_value_e:
- piece = figzero;
- piece_end = piece + 1;
- break;
- case space_value_e:
- piece = figspace;
- piece_end = piece + 1;
- break;
- case quote_value_e:
- piece = figquote;
- piece_end = piece + 1;
- break;
- case high_value_e:
- piece = fighigh;
- piece_end = piece + 1;
- break;
- default:
- piece = id2 ? reinterpret_cast<char *>(id2->data + id2_o) : NULL;
- piece_end = id2 ? piece + id2_s : NULL;
- break;
- }
-
for(size_t j=0; j<M; j++)
{
- // Pick up the next identifier-1 source string:
- const cblc_field_t *id1 = ref[index_cblc];
- size_t id1_o = ref_o[index_cblc];
- size_t id1_s = ref_s[index_cblc];
+ // Pick up the next id-1 source string for the current id-2 delimiter
+ std::u32string str_id1 = normalize_for_inspect_format_4(
+ ref[index_cblc],
+ ref_o[index_cblc],
+ ref_s[index_cblc],
+ tgt_encoding);
index_cblc += 1;
- const char *whole = id1 ? reinterpret_cast<char *>(id1->data + id1_o): NULL ;
- const char *whole_end = id1 ? whole + id1_s : NULL;
-
- // As usual, we need to cope with figurative constants:
- figconst = (cbl_figconst_t) ( id1 ? (id1->attr & FIGCONST_MASK) : 0 );
- switch( figconst )
+ size_t nfound;
+ if( str_id2.size() == 0 )
{
- case low_value_e:
- whole = figlow;
- whole_end = whole + 1;
- break;
- case zero_value_e:
- whole = figzero;
- whole_end = whole + 1;
- break;
- case space_value_e:
- whole = figspace;
- whole_end = whole + 1;
- break;
- case quote_value_e:
- whole = figquote;
- whole_end = whole + 1;
- break;
- case high_value_e:
- whole = fighigh;
- whole_end = whole + 1;
- break;
- default:
- break;
+ // No given delimiter means DELIMITED BY SIZE
+ nfound = str_id1.size();
}
-
- if(piece)
+ else
{
- const char *found = funky_find( piece, piece_end,
- whole, whole_end);
- if(found)
+ // We have an id2, so we look for it inside id1
+ nfound = str_id1.find(str_id2);
+ if( nfound == std::u32string::npos )
{
- whole_end = found;
+ nfound = str_id1.size();
}
}
- while(whole < whole_end)
+
+
{
- if(pointer >= dest_length)
+ // We have found id2 inside id1 at location nfound.
+
+ // Convert the UTF32 to the original encoding:
+ size_t bytes_converted;
+ char *converted = __gg__miconverter(DEFAULT_32_ENCODING,
+ tgt_encoding,
+ str_id1.data(),
+ nfound*width_of_utf32,
+ &bytes_converted );
+ size_t k = 0;
+ while(k < nfound)
{
- overflow = 1;
- break;
+ if( pointer >= dest_length )
+ {
+ overflow = 1;
+ break;
+ }
+ cbl_char_t ch = charmap->getch(converted, k*stride);
+ charmap->putch(ch, dest, pointer*stride);
+ k += 1;
+ pointer += 1;
}
- dest[pointer++] = *whole++;
+ free(converted);
}
if( overflow )
{
display_string,
conversion_length,
&outlength);
+ // Trim off the trailing null, if present.
+ outlength = strlen(converted);
write(file_descriptor,
converted,
outlength);
str,
length,
&outlength);
+ // Trim off trailing NUL, if present.
+ outlength = strlen(converted);
write( file_descriptor,
converted,
outlength);
}
}
-/*
- * Because this variable is static, the contructor runs before main and is
- * guaranted to run.
- */
-static class rt_encoding_t
- {
- const char *ctype, *lc_ctype;
- public:
- rt_encoding_t() : ctype( setlocale(LC_CTYPE, "") )
- {
- lc_ctype = nl_langinfo(CODESET);
- // Let's learn what the computer is using for the console:
- // We need to establish the codeset used by the system console:
- __gg__console_encoding = use_locale();
-
- if( getenv("CODESET") )
- {
- fprintf(stderr, "%s:%d: ctype=%s,lc_ctype=%s\n", __func__, __LINE__,
- ctype? ctype : "error" , lc_ctype);
- }
- }
- cbl_encoding_t use_locale() const
- {
- auto encoding = strstr(ctype, "UTF-8") ?
- iconv_UTF_8_e : __gg__encoding_iconv_type(lc_ctype);
- if( getenv("CODESET") )
- {
- fprintf(stderr, "%s:%d: console encoding is '%s'\n", __func__, __LINE__,
- __gg__encoding_iconv_name(encoding) );
- }
- return encoding;
- }
- } rt_encoding;
-
extern "C"
void
__gg__onetime_initialization( )
is_numeric_display_numeric(cblc_field_t *field, size_t offset, size_t size)
{
charmap_t *charmap = __gg__get_charmap(field->encoding);
+ int stride = charmap->stride();
int retval = 1;
bool signable = !!(field->attr & signable_e);
if( leading && separate && signable )
{
// First character must be +/-
+ cbl_char_t ch = charmap->getch(digits, size_t(0));
if( digits < digits_e
- || ( *digits != charmap->mapped_character(ascii_plus)
- && *digits != charmap->mapped_character(ascii_minus)) )
+ || ( ch != charmap->mapped_character(ascii_plus)
+ && ch != charmap->mapped_character(ascii_minus)) )
{
retval = 0;
}
- digits += 1;
+ digits += stride;
}
if( !leading && separate && signable )
{
// Last character must be +/-
- digits_e -= 1;
+ digits_e -= stride;
+ cbl_char_t ch = charmap->getch(digits_e, size_t(0));
if( digits < digits_e
- || ( *digits_e != charmap->mapped_character(ascii_plus)
- && *digits_e != charmap->mapped_character(ascii_minus)) )
+ || ( ch != charmap->mapped_character(ascii_plus)
+ && ch != charmap->mapped_character(ascii_minus)) )
{
retval = 0;
}
if( leading && !separate && signable )
{
- // The first character is allowed to have a sign bit.
+ // The first character is allowed to have a sign bit. Let's make sure that
+ // making that first digit unsigned leaves us with zero through nine:
if( digits < digits_e )
{
- unsigned char first_char = (unsigned char)*digits;
+ cbl_char_t first_char = charmap->getch(digits, size_t(0));
first_char = charmap->set_digit_negative(first_char, false);
if( first_char < charmap->mapped_character(ascii_0)
|| first_char > charmap->mapped_character(ascii_9))
if( digits < digits_e )
{
digits_e -= 1;
- unsigned char final_char = (unsigned char)*digits_e;
+ cbl_char_t final_char = charmap->getch(digits, size_t(0));
final_char = charmap->set_digit_negative(final_char, false);
if( final_char<charmap->mapped_character(ascii_0)
|| final_char>charmap->mapped_character(ascii_9) )
{
retval = 0;
break;
- }
- nybble += 1;
+ }
+ nybble += 1;
+ }
+ return retval;
+ }
+
+static int
+is_alpha_a_number(const cblc_field_t *field,
+ size_t offset,
+ size_t size)
+ {
+ charmap_t *charmap = __gg__get_charmap(field->encoding);
+ cbl_char_t mapped_0 = charmap->mapped_character(ascii_0);
+ cbl_char_t mapped_9 = charmap->mapped_character(ascii_9);
+ int retval = 1;
+ size_t i = offset;
+ while(i < size)
+ {
+ cbl_char_t ch = charmap->getch(field->data, &i);
+ if( (ch < mapped_0)
+ || (ch > mapped_9) )
+ {
+ retval = 0;
+ break;
+ }
+ }
+ return retval;
+ }
+
+static int
+classify_numeric_type(cblc_field_t *field,
+ size_t offset,
+ size_t size)
+ {
+ int retval = 1;
+ switch( field->type )
+ {
+ case FldNumericEdited:
+ retval = is_numeric_edited_numeric(field, offset, size);
+ break;
+ case FldNumericDisplay:
+ retval = is_numeric_display_numeric(field, offset, size);
+ break;
+ case FldPacked:
+ retval = is_packed_numeric(field, offset, size);
+ break;
+ case FldGroup:
+ case FldAlphanumeric:
+ case FldAlphaEdited:
+ retval = is_alpha_a_number(field, offset, size);
+ break;
+
+ case FldNumericBinary:
+ case FldNumericBin5:
+ // These need to checked for fitting into field->digits
+ break;
+
+ default:
+ fprintf(stderr,
+ "We need code for %s numeric type %d\n",
+ field->name,
+ field->type);
+ abort();
+ break;
}
return retval;
}
static int
-is_alpha_a_number(const cblc_field_t *field,
- size_t offset,
- size_t size)
+classify_alphabetic_type( const cblc_field_t *field,
+ size_t offset,
+ size_t size,
+ int (checker)( std::wint_t ch ))
{
- charmap_t *charmap = __gg__get_charmap(field->encoding);
- int mapped_0 = charmap->mapped_character(ascii_0);
- int mapped_9 = charmap->mapped_character(ascii_9);
int retval = 1;
- const unsigned char *bytes = (field->data + offset);
- for( size_t i=0; i<size; i++ )
+ charmap_t *charmap = __gg__get_charmap(DEFAULT_32_ENCODING);
+ cbl_char_t space = charmap->mapped_character(ascii_space);
+ size_t nbytes_converted;
+ const char *converted = __gg__iconverter(field->encoding,
+ DEFAULT_32_ENCODING,
+ field->data+offset,
+ size,
+ &nbytes_converted);
+ size_t i=0;
+ while( i < nbytes_converted )
{
- unsigned char ch = bytes[i];
- if( (ch < mapped_0)
- || (ch > mapped_9) )
+ cbl_char_t ch = charmap->getch(converted, &i);
+ if( !checker(ch) && ch != space )
{
retval = 0;
break;
size_t size)
{
// The default answer is TRUE
- int retval = 1;
-
- const unsigned char *alpha = reinterpret_cast<unsigned char *>(field->data+offset);
-
- size_t str_length = size;
-
- const unsigned char *omega = alpha + str_length;
+ int retval;
- if(alpha >= omega)
+ if( size == 0 )
{
- // If there is nothing there, then it can't be TRUE. Can it?
+ // If there is nothing there, then it can't be TRUE.
retval = 0;
}
-
- unsigned char ch;
- switch(type)
+ else
{
- case ClassNumericType:
+ switch(type)
{
- switch( field->type )
- {
- case FldNumericEdited:
- retval = is_numeric_edited_numeric(field, offset, size);
- break;
- case FldNumericDisplay:
- retval = is_numeric_display_numeric(field, offset, size);
- break;
- case FldPacked:
- retval = is_packed_numeric(field, offset, size);
- break;
- case FldGroup:
- case FldAlphanumeric:
- case FldAlphaEdited:
- retval = is_alpha_a_number(field, offset, size);
- break;
-
- case FldNumericBinary:
- case FldNumericBin5:
- // These need to checked for fitting into field->digits
- break;
-
- default:
- fprintf(stderr,
- "We need code for %s numeric type %d\n",
- field->name,
- field->type);
- abort();
- break;
- }
+ case ClassNumericType:
+ retval = classify_numeric_type(field, offset, size);
+ break;
- break;
- }
+ case ClassAlphabeticType:
+ retval = classify_alphabetic_type(field, offset, size, std::iswalpha);
+ break;
- case ClassAlphabeticType:
- {
- charmap_t *charmap = __gg__get_charmap(field->encoding);
- int mapped_space = charmap->mapped_character(ascii_space);
- int mapped_A = charmap->mapped_character(ascii_A);
- int mapped_I = charmap->mapped_character(ascii_I);
- int mapped_J = charmap->mapped_character(ascii_J);
- int mapped_R = charmap->mapped_character(ascii_R);
- int mapped_S = charmap->mapped_character(ascii_S);
- int mapped_Z = charmap->mapped_character(ascii_Z);
- int mapped_a = charmap->mapped_character(ascii_a);
- int mapped_i = charmap->mapped_character(ascii_i);
- int mapped_j = charmap->mapped_character(ascii_j);
- int mapped_r = charmap->mapped_character(ascii_r);
- int mapped_s = charmap->mapped_character(ascii_s);
- int mapped_z = charmap->mapped_character(ascii_z);
- while(alpha < omega)
- {
- ch = (*alpha++)&0xFF;
- if( ch == mapped_space )
- {
- continue;
- }
- // If necessary, this could be sped up with the creation of
- // appropriate mapping tables.
-
- // The oddball construction of this if() statement is a consequence of
- // EBCDIC. Because of peculiarities going all the back to the encoding
- // of characters on IBM cards, where it wasn't a good idea to have too
- // many consecutive punches in a column because it would weaken the card
- // to the point where its structural integrity might be threatened, the
- // coding for the letter of the alphabet are not contiguous.
- if(!( ( ch >= mapped_A && ch <= mapped_I)
- || (ch >= mapped_J && ch <= mapped_R)
- || (ch >= mapped_S && ch <= mapped_Z)
- || (ch >= mapped_a && ch <= mapped_i)
- || (ch >= mapped_j && ch <= mapped_r)
- || (ch >= mapped_s && ch <= mapped_z) ) )
- {
- // The character is not alphabetic
- retval = 0;
- break;
- }
- }
- break;
- }
+ case ClassLowerType:
+ retval = classify_alphabetic_type(field, offset, size, std::iswlower);
+ break;
- case ClassLowerType:
- {
- charmap_t *charmap = __gg__get_charmap(field->encoding);
- int mapped_space = charmap->mapped_character(ascii_space);
- int mapped_a = charmap->mapped_character(ascii_a);
- int mapped_i = charmap->mapped_character(ascii_i);
- int mapped_j = charmap->mapped_character(ascii_j);
- int mapped_r = charmap->mapped_character(ascii_r);
- int mapped_s = charmap->mapped_character(ascii_s);
- int mapped_z = charmap->mapped_character(ascii_z);
- while(alpha < omega)
- {
- ch = *alpha++;
- if( ch == mapped_space )
- {
- continue;
- }
- if(!( ( ch >= mapped_a && ch <= mapped_i)
- || (ch >= mapped_j && ch <= mapped_r)
- || (ch >= mapped_s && ch <= mapped_z) ) )
- {
- retval = 0;
- break;
- }
- }
- break;
- }
+ case ClassUpperType:
+ retval = classify_alphabetic_type(field, offset, size, std::iswupper);
+ break;
- case ClassUpperType:
- {
- charmap_t *charmap = __gg__get_charmap(field->encoding);
- int mapped_space = charmap->mapped_character(ascii_space);
- int mapped_A = charmap->mapped_character(ascii_A);
- int mapped_I = charmap->mapped_character(ascii_I);
- int mapped_J = charmap->mapped_character(ascii_J);
- int mapped_R = charmap->mapped_character(ascii_R);
- int mapped_S = charmap->mapped_character(ascii_S);
- int mapped_Z = charmap->mapped_character(ascii_Z);
- while(alpha < omega)
- {
- ch = *alpha++;
- if( ch == mapped_space )
- {
- continue;
- }
- if(!( ( ch >= mapped_A && ch <= mapped_I)
- || (ch >= mapped_J && ch <= mapped_R)
- || (ch >= mapped_S && ch <= mapped_Z) ) )
- {
- retval = 0;
- break;
- }
- }
- break;
+ case ClassInvalidType:
+ case ClassDbcsType:
+ case ClassKanjiType:
+ default:
+ warnx("%s(): Don't know how to handle %s",
+ __func__,
+ classify_str(type));
+ abort();
+ break;
}
-
- case ClassInvalidType:
- case ClassDbcsType:
- case ClassKanjiType:
- default:
- warnx("%s(): Don't know how to handle %s",
- __func__,
- classify_str(type));
- abort();
- break;
}
return retval;
cbl_encoding_t to )
{
// This does an in-place conversion of psz
+ charmap_t *charmap_from = __gg__get_charmap(from);
+ const charmap_t *charmap = __gg__get_charmap(to);
if( from > custom_encoding_e )
{
size_t charsout;
const char *converted = __gg__iconverter(from,
to,
psz,
- strlen(psz),
+ charmap_from->strlen(psz),
&charsout);
- strcpy(psz, converted);
+ // Copy over the converted string, including the final NUL
+ memcpy(psz, converted, charsout + charmap->stride());
}
}
cbl_encoding_t encoding)
{
int retval = 1; // 1 means we couldn't find it
+
if( psz_name )
{
- tgt_length = tgt_length ? tgt_length : tgt->capacity;
-
- // Pick up the environment variable name
- char *env = strdup(psz_name);
- massert(env);
-
+ charmap_t *charmap = __gg__get_charmap(encoding);
+ size_t psz_name_length = charmap->strlen(psz_name);
+
+ // convert psz_name to the console encoding:
+ size_t converted_length;
+ const char *converted = __gg__iconverter(encoding,
+ __gg__console_encoding,
+ psz_name,
+ psz_name_length,
+ &converted_length);
+ // Copy converted, because brute_force_trim uses charmap_t:
+ char *env = strdup(converted);
// Get rid of leading and trailing space characters:
- char *trimmed_env = brute_force_trim( env,
- encoding );
+ const char *trimmed_env = brute_force_trim( env,
+ __gg__console_encoding );
- // Convert the name to the console codeset:
- __gg__convert_encoding( trimmed_env,
- encoding,
- DEFAULT_SOURCE_ENCODING);
-
- // Pick up the environment variable, and convert it to the internal codeset
+ // Pick up the environment variable
const char *p = getenv(trimmed_env);
+ free(env);
if(p)
{
- retval = 0; // Okay
- move_string(tgt, tgt_offset, tgt_length, p, DEFAULT_SOURCE_ENCODING);
+ retval = 0; // We found the environment variable:
+ // Convert it to the target encoding:
+ converted = __gg__iconverter(__gg__console_encoding,
+ tgt->encoding,
+ p,
+ strlen(p),
+ &converted_length);
+ __gg__field_from_string(tgt, tgt_offset, tgt_length,
+ converted, converted_length);
+ }
+ else
+ {
+ // Leave the target unchanged, as per spec.
}
- free(env);
}
if( retval == 1 )
{
- // Could't find it
+ // Could't find that environment variable
exception_raise(ec_argument_imp_environment_e);
}
size_t name_offset,
size_t name_length)
{
- // We need the name to be nul-terminated:
- char *p = static_cast<char *>(malloc(name_length + 1));
+ // We need the name to be nul-terminated, so we will tack on four extra
+ // nulls to handle characters up to 32 bits wide
+ char *p = static_cast<char *>(malloc(name_length + width_of_utf32));
massert(p);
memcpy(p, name->data+name_offset, name_length);
+ memset(p + name_length, 0, width_of_utf32);
p[name_length] = '\0';
int retval = accept_envar(tgt,
tgt_offset,
command_line_plan_b();
char ach[128];
sprintf(ach, "%d", stashed_argc);
- move_string(dest, offset, length, ach, __gg__console_encoding);
+ size_t nbytes;
+ char *converted = __gg__miconverter(__gg__console_encoding,
+ dest->encoding,
+ ach,
+ strlen(ach),
+ &nbytes );
+ __gg__field_from_string(dest, offset, length, converted, nbytes);
+ __gg__adjust_dest_size(dest, nbytes);
+ free(converted);
}
extern "C"
}
else
{
- move_string(dest,
- dest_offset,
- dest_length,
- stashed_argv[N],
- DEFAULT_SOURCE_ENCODING);
+ size_t nbytes;
+ char *converted = __gg__miconverter(__gg__console_encoding,
+ dest->encoding,
+ stashed_argv[N],
+ strlen(stashed_argv[N]),
+ &nbytes );
+ __gg__field_from_string(dest, dest_offset, dest_length, converted, nbytes);
+ __gg__adjust_dest_size(dest, nbytes);
+ free(converted);
retcode = 0; // Okay
}
return retcode;
if( *retval )
{
flength = flength ? flength : field->capacity;
- move_string(field, offset, flength, retval, __gg__console_encoding);
+ size_t nbytes;
+ char *converted = __gg__miconverter(__gg__console_encoding,
+ field->encoding,
+ retval,
+ strlen(retval),
+ &nbytes );
+ __gg__field_from_string(field, offset, flength, converted, nbytes);
+ __gg__adjust_dest_size(field, nbytes);
+ free(converted);
retcode = 0; // Okay
}
else
cbl_figconst_t figconst = (cbl_figconst_t)figconst_;
- unsigned char special = charmap->mapped_character(ascii_space);
+ cbl_char_t special = charmap->mapped_character(ascii_space);
switch(figconst)
{
case space_value_e:
default:
break;
}
- memset( var->data, special, var->capacity);
+ charmap->memset( var->data, special, var->capacity);
}
extern "C"
extern "C"
int
-__gg__literaln_alpha_compare(const char *left_side,
+__gg__literaln_alpha_compare( char *left_side,
const cblc_field_t *right,
size_t offset,
size_t length,
length = right->capacity;
}
+ cbl_encoding_t right_encoding = right->encoding;
+ if( right->attr & hex_encoded_e )
+ {
+ right_encoding = iconv_CP1252_e;
+ }
retval = compare_strings( left_side,
strlen(left_side),
false,
reinterpret_cast<char *>((right->data + offset)),
length,
!!(flags & REFER_T_MOVE_ALL),
- right->encoding);
- return retval;
- }
-
-static char *
-string_in( char *str,
- const char *str_e,
- const char *frag,
- const char *frag_e)
- {
- // This simple routine could be improved. Instead of using memcmp, we could
- // use established, albeit complex, techniques of string searching:
-
- // Looking for "abcde" in "abcdabcde", for example. One could notice that
- // starting at the first 'a' results in a mismatch at the second 'a'. There
- // is thus no need to start the second search at the first 'b' in the searched
- // string; one could jump ahead to the second 'a' and continue from there.
-
- // Feel free. It won't matter in the real world; a program whose innermost
- // loop is an UNSTRING is difficult to imagine. But feel free.
-
- char *retval = NULL;
- size_t nchars = frag_e - frag;
- char *p = str;
- while( p + nchars <= str_e )
- {
- if( memcmp(p, frag, nchars) == 0 )
- {
- retval = p;
- break;
- }
- p += 1;
- }
+ right_encoding,
+ right_encoding);
return retval;
}
// Initialize the state variables
int overflow = 0;
int tally = 0;
- int pointer = 1;
+ size_t pointer = 1;
size_t nreceiver;
- char *left = NULL;
- char *right = NULL;
- int previous_delimiter;
+ size_t left;
+ size_t right;
+
+ std::u32string str_id1;
+ std::vector<std::u32string> delimiters;
+
+ const charmap_t *charmap_id1 = __gg__get_charmap(id1->encoding);
+ int stride_id1 = charmap_id1->stride();
if( id8 )
{
if( id7 )
{
int rdigits;
- pointer = (int)__gg__binary_value_from_qualified_field(&rdigits,
+ int p = (int)__gg__binary_value_from_qualified_field(&rdigits,
id7,
id7_o,
id7_s);
+ if( p < 1 )
+ {
+ overflow = 1;
+ goto done;
+ }
+ pointer = p;
}
// As per the spec, if the string is zero-length; we are done.
// As per the spec, we have an overflow condition if pointer is out of
// range:
- if( pointer < 1 || pointer > (int)id1_s )
+ if( pointer > id1_s/stride_id1 )
{
overflow = 1;
goto done;
}
+ // pointer is one-based throughout; don't forget that
- left = reinterpret_cast<char *>(id1->data+id1_o) + pointer-1;
- right = reinterpret_cast<char *>(id1->data+id1_o) + id1_s;
+ /* I thought long and hard about converting things to UTF32 for UNSTRING. It
+ was not obviously necessary. But, darn it all, sooner or later somebody
+ is going to demand UTF-8 capability and I can't think of any obvious way
+ of being able to handle multibyte codepoints as single characters without
+ doing something like converting to UTF32. */
+ str_id1 = normalize_for_inspect_format_4( id1,
+ id1_o,
+ id1_s,
+ id1->encoding);
+ left = pointer-1;
+ right = str_id1.size();
if( ndelimiteds == 0 )
{
// There are no DELIMITED BY identifier-2 values, so we just peel off
// characters from identifier-1 and put them into each identifier-4:
- for( size_t i=0; i<nreceivers; i++ )
+ for( size_t receiver=0; receiver<nreceivers; receiver++ )
{
if( left >= right )
{
+ // We have run out of input characters.
break;
}
- size_t id_4_size = id4_s[i];
- if( id4[i]->attr & separate_e )
+ // We will peel off enough characters to fit the receiving id4:
+ size_t id_4_size = id4_s[receiver]/stride_id1;
+ if( id4[receiver]->attr & separate_e )
{
- // The receiver is NumericDisplay with a separate signe
- id_4_size = id4_s[i] - 1;
+ // The receiver is NumericDisplay with a separate sign, so, as per
+ // the spec, we reduce the size by one character.
+ id_4_size = id4_s[receiver] - 1;
}
- // Make sure id_4_size doesn't move past the end of the universe
+ // Make sure id_4_size doesn't take us past the end of the universe
if( left + id_4_size > right )
{
id_4_size = right - left;
}
- // Move the data into place:
- move_string(id4[i],
- id4_o[i],
- id4_s[i],
- left,
- id1->encoding,
- id_4_size);
-
+ // Convert the specified str_id1 characters back to id1->encoding.
+ size_t bytes_converted;
+ const char *converted = __gg__iconverter(DEFAULT_32_ENCODING,
+ id1->encoding,
+ &str_id1[left],
+ (right-left)*width_of_utf32,
+ &bytes_converted );
+ char *duped = static_cast<char *>(__gg__memdup(converted, bytes_converted));
+ // Put the converted string into place:
+ __gg__field_from_string(id4[receiver],
+ id4_o[receiver],
+ id4_s[receiver],
+ duped,
+ bytes_converted);
+ free(duped);
// Update the state variables:
left += id_4_size;
pointer += id_4_size;
// Arriving here means there is some number of ndelimiteds
+ // Convert them to the same encoding as str_id1:
+ for( size_t i=0; i<ndelimiteds; i++ )
+ {
+ std::u32string delimiter
+ = normalize_for_inspect_format_4(id2[i],
+ id2_o[i],
+ id2_s[i],
+ id1->encoding);
+ delimiters.push_back(delimiter);
+ }
+
nreceiver = 0;
- previous_delimiter = -1;
while( left < right )
{
- // Starting at 'left', see if we can find any of the delimiters
- char *leftmost_delimiter = NULL;
- int ifound = -1;
- cbl_figconst_t figconst;
- char achfigconst[1];
- cbl_encoding_t fig_encoding;
+ // Starting at 'left', see if we can find any of the delimiters. For each
+ // 'left' position, we look through all of the delimiters,
+
+ int best_delimiter = -1;
+ size_t best_leftmost = right; // This is the location of the start of ALL
+ size_t best_location = right; // This is the location of the last of ALL
for( size_t i=0; i<ndelimiteds; i++ )
{
- fig_encoding = id1->encoding;
- charmap_t *charmap = __gg__get_charmap(fig_encoding);
- char *pfound;
- figconst = (cbl_figconst_t)(id2[i]->attr & FIGCONST_MASK);
-
- switch(figconst)
- {
- case low_value_e :
- achfigconst[0] = charmap->figconst_character(figconst);
- pfound = string_in( left,
- right,
- achfigconst,
- achfigconst+1);
- break;
-
- case zero_value_e :
- achfigconst[0] = charmap->figconst_character(figconst);
- pfound = string_in( left,
- right,
- achfigconst,
- achfigconst+1);
- break;
-
- case space_value_e :
- achfigconst[0] = charmap->figconst_character(figconst);
- pfound = string_in( left,
- right,
- achfigconst,
- achfigconst+1);
- break;
-
- case quote_value_e :
- achfigconst[0] = charmap->figconst_character(figconst);
- pfound = string_in( left,
- right,
- achfigconst,
- achfigconst+1);
- break;
-
- case high_value_e :
- achfigconst[0] = charmap->figconst_character(figconst);
- pfound = string_in( left,
- right,
- achfigconst,
- achfigconst+1);
- break;
-
- case normal_value_e :
- default:
- pfound = string_in( left,
- right,
- reinterpret_cast<char *>(id2[i]->data+id2_o[i]),
- reinterpret_cast<char *>((id2[i]->data+id2_o[i])
- + id2_s[i]));
- break;
- }
-
- if( pfound )
+ std::u32string str_id2 = delimiters[i];
+ size_t nfound = str_id1.find(str_id2, left);
+ if( nfound != std::u32string::npos )
{
// We found a delimiter
- if( !leftmost_delimiter || pfound < leftmost_delimiter )
+ if( nfound > best_leftmost )
{
- ifound = i;
- leftmost_delimiter = pfound;
+ // This delimiter lives to the right of the best one we found so far.
+ // Ignore it, and proceed to the next delimiter.
+ continue;
}
- }
- }
+ // This delimiter is the leftmost we've seen so far:
+ best_delimiter = i;
+ best_leftmost = nfound;
+ best_location = nfound;
- if( ifound >= 0
- && leftmost_delimiter == left
- && ifound == previous_delimiter )
- {
- // We found another instance of an ALL delimiter.
- // So, we just skip it.
- left += id2_s[previous_delimiter];
- pointer += id2_s[previous_delimiter];
- continue;
+ if( all_flags[i] == ascii_1 )
+ {
+ // This delimiter is flagged as ALL, so we need to see if we have
+ // a flock of them:
+ size_t next = nfound + str_id2.size() ;
+ while( str_id1.find(str_id2, next ) == next )
+ {
+ // We found another consecutive one at next:
+ best_location = next;
+ next += str_id2.size();
+ }
+ }
+ }
}
- // We did not re-find an ALL DELIMITER
- previous_delimiter = -1;
-
// If we've used up all receivers, we bail at this point
if( nreceiver >= nreceivers )
{
break;
}
- if( ifound >= 0 && all_flags[ifound] == ascii_1 )
- {
- // Arriving here means we found a new delimiter.
- // If the ALL flag was on, set up to notice repeats
- previous_delimiter = ifound;
- }
-
- if( !leftmost_delimiter )
+ if( best_delimiter == -1 )
{
// We were unable to find a delimiter, so we eat up the remainder
// of the sender:
- leftmost_delimiter = right;
+ best_leftmost = right;
+ best_location = right;
}
// Apply what we have learned to the next receiver:
- size_t examined = leftmost_delimiter - left;
-
- // Move the data into place:
- move_string(id4[nreceiver],
- id4_o[nreceiver],
- id4_s[nreceiver],
- left,
- id1->encoding,
- examined);
-
- // Update the left pointer
- left = leftmost_delimiter;
- if( ifound >= 0 )
- {
- // And skip over the delimiter
- left += id2_s[ifound];
- }
-
+ size_t examined = best_leftmost - left;
+
+ // Convert the data from left to leftmost_delimiter back to encoding of
+ // id1:
+ size_t bytes_converted;
+ const char *converted = __gg__iconverter(
+ DEFAULT_32_ENCODING,
+ id1->encoding,
+ &str_id1[left],
+ (best_leftmost-left)*width_of_utf32,
+ &bytes_converted );
+ char *duped = static_cast<char *>(__gg__memdup(converted, bytes_converted));
+ // Put the converted string into place:
+ __gg__field_from_string(id4[nreceiver],
+ id4_o[nreceiver],
+ id4_s[nreceiver],
+ duped,
+ bytes_converted);
+ free(duped);
+ // Update the left edge
+ left = best_location + (best_delimiter > -1
+ ? delimiters[best_delimiter].size()
+ : 0) ;
if( id5[nreceiver] )
{
- if( ifound >= 0 )
- {
- if( figconst )
- {
- move_string(id5[nreceiver],
- id5_o[nreceiver],
- id5_s[nreceiver],
- achfigconst,
- fig_encoding,
- 1);
- }
- else
- {
- move_string(id5[nreceiver],
- id5_o[nreceiver],
- id5_s[nreceiver],
- reinterpret_cast<char *>(id2[ifound]->data+id2_o[ifound]),
- id2[ifound]->encoding,
- id2_s[ifound]);
- }
+ // The caller wants to know what the delimiter was:
+ if( best_delimiter > -1 )
+ {
+ converted = __gg__iconverter(
+ DEFAULT_32_ENCODING,
+ id1->encoding,
+ delimiters[best_delimiter].data(),
+ delimiters[best_delimiter].size()*width_of_utf32,
+ &bytes_converted );
+ duped = static_cast<char *>(__gg__memdup(converted, bytes_converted));
+ __gg__field_from_string(id5[nreceiver],
+ id5_o[nreceiver],
+ id5_s[nreceiver],
+ duped,
+ bytes_converted);
+ free(duped);
}
else
{
- move_string(id5[nreceiver],
- id5_o[nreceiver],
- id5_s[nreceiver],
- "",
- DEFAULT_SOURCE_ENCODING);
+ // We didn't find a delimiter
+ __gg__field_from_string(id5[nreceiver],
+ id5_o[nreceiver],
+ id5_s[nreceiver],
+ "",
+ 0);
}
}
// Update the state variables:
tally += 1;
nreceiver += 1;
- if( ifound >= 0 )
+ if( best_delimiter > -1 )
{
- pointer += examined + id2_s[ifound];
+ pointer = left+1 ;
}
}
return (__int128)fvalue;
}
-
extern "C"
void
__gg__adjust_dest_size(cblc_field_t *dest, size_t ncount)
{
if( dest->allocated < ncount )
{
- fprintf(stderr, "libgcobol.cc:__gg__adjust_dest_size(): Adjusting size upward is not possible.\n");
+ fprintf(stderr, "libgcobol.cc:__gg__adjust_dest_size(): "
+ "Adjusting %s size upward is not possible.\n",
+ dest->name);
abort();
// dest->allocated = ncount;
// dest->data = (unsigned char *)realloc(dest->data, ncount);
// We need ach_name to be in ASCII:
size_t charsout;
const char *converted = __gg__iconverter(field->encoding,
- DEFAULT_SOURCE_ENCODING,
+ __gg__console_encoding,
PTRCAST(char, field->data),
length,
&charsout);
length,
&charsout);
memcpy(ach_name, converted, length);
+ char *p = strchr(ach_name, ascii_space);
+ if(p)
+ {
+ *p = '\0';
+ }
+ length = strlen(ach_name);
// At this point we have a null-terminated ascii function name.
{
// Get the encoded character associated with the figconst
retval = __gg__fc_char(field);
- if(retval == -1)
+ if(retval == NOT_A_CHARACTER)
{
retval = (int)(unsigned char)__gg__get_integer_binary_value(field);
}
{
// This returns the figconst character for a field, if the field->attr
// indicates that the field is a figconst. Otherwise, it comes back -1
- int retval = -1;
+ int retval = NOT_A_CHARACTER;
charmap_t *charmap = __gg__get_charmap(field->encoding);
cbl_figconst_t figconst = (cbl_figconst_t)(field->attr & FIGCONST_MASK);
- retval = charmap->figconst_character(figconst);
+ if( figconst )
+ {
+ retval = charmap->figconst_character(figconst);
+ }
+ return retval;
+ }
+
+extern "C"
+void
+__gg__refer_from_string(cblc_field_t *field,
+ size_t field_offset,
+ size_t field_size,
+ const char *string)
+ {
+ // 'string' has to be in the 'field' encoding. Use this when the input
+ // might, or might not, be nul-terminated, and you don't want a
+ // nul-terminator in the data of the target field.
+ charmap_t *charmap = __gg__get_charmap(field->encoding);
+ size_t nbytes = charmap->strlen(string, field_size);
+ __gg__field_from_string(field, field_offset, field_size, string, nbytes);
+ }
+
+extern "C"
+void
+__gg__refer_from_psz(cblc_field_t *field,
+ size_t field_offset,
+ size_t field_size,
+ const char *string)
+ {
+ // 'string' has to be in the 'field' encoding. Use this when the input
+ // might, or might not, be nul-terminated, and you *do* want a
+ // nul-terminator in the data of the target field if there was one in the
+ // input.
+
+ // One typical use is processing returned values from external C-style
+ // functions, which often return nul-terminated strings.
+ charmap_t *charmap = __gg__get_charmap(field->encoding);
+ size_t nbytes = charmap->strlen(string, field_size);
+ __gg__field_from_string(field,
+ field_offset,
+ field_size,
+ string,
+ nbytes);
+ }
+
+
+extern "C"
+void
+__gg__find_string( cblc_field_t *dest,
+ const cblc_field_t *haystack,
+ size_t haystack_o,
+ size_t haystack_s,
+ const cblc_field_t *needle,
+ size_t needle_o,
+ size_t needle_s,
+ const cblc_field_t *after,
+ size_t after_o,
+ size_t after_s,
+ bool last,
+ bool anycase)
+ {
+ int retval = 0;
+ cbl_encoding_t encoding = dest->encoding;
+ std::u32string str_id1 = normalize_for_inspect_format_4(
+ haystack,
+ haystack_o,
+ haystack_s,
+ encoding);
+ std::u32string str_id2 = normalize_for_inspect_format_4(
+ needle,
+ needle_o,
+ needle_s,
+ encoding);
+ if( !str_id1.empty() && !str_id2.empty() )
+ {
+ if( anycase )
+ {
+ std::transform( str_id1.begin(),
+ str_id1.end(),
+ str_id1.begin(),
+ std::towlower);
+ std::transform( str_id2.begin(),
+ str_id2.end(),
+ str_id2.begin(),
+ std::towlower);
+ }
+
+ // This is the count of how many to skip before returning an answer:
+ int after_count = 1;
+ size_t search_position;
+ if( last )
+ {
+ // We will search from right to left:
+ search_position = str_id1.size();
+ }
+ else
+ {
+ // We will search from left to right:
+ search_position = 0;
+ }
+
+ if( after )
+ {
+ int rdigits;
+ after_count = static_cast<int>(get_binary_value_local(&rdigits,
+ after,
+ after->data+after_o,
+ after_s));
+ after_count += 1;
+ }
+ while( after_count-- >= 1 )
+ {
+ if( !last )
+ {
+ // We are searching from left to right
+ search_position = str_id1.find(str_id2, search_position);
+ if( search_position == std::u32string::npos )
+ {
+ // Alas, our search was fruitless
+ retval = 0;
+ break;
+ }
+ search_position += 1;
+ if( after_count == 0 )
+ {
+ // This was the find we were looking for!
+ // COBOL positions are 1-based positions, not zero-based offsets:
+ retval = search_position;
+ break;
+ }
+ }
+ else
+ {
+ // We are searching from right_to)left
+ search_position = str_id1.rfind(str_id2, search_position);
+ if( search_position == std::u32string::npos )
+ {
+ // Alas, our search was fruitless
+ break;
+ }
+ if( after_count == 0 )
+ {
+ // This was the find we were looking for!
+ // COBOL positions are 1-based positions, not zero-based offsets:
+ retval = search_position + 1;
+ break;
+ }
+ if( search_position == 0)
+ {
+ // There's no point in continuing the search leftwardsspast the
+ // left edge, and if we subtract 1 from the size_t search_position,
+ // we are not going to be happy with the result.
+ break;
+ }
+ search_position -= 1;
+ }
+ }
+ }
+ // Set the return value:
+ __gg__int128_to_field(dest,
+ retval,
+ NO_RDIGITS,
+ truncation_e,
+ NULL);
+ }
+
+static
+char *
+convert_for_convert( cbl_encoding_t dest_enc,
+ const cblc_field_t *input,
+ size_t input_o,
+ size_t input_s,
+ size_t *nbytes)
+ {
+ // iconverter takes care of untranslateable characters.
+ char *retval = __gg__miconverter(input->encoding,
+ dest_enc,
+ input->data + input_o,
+ input_s,
+ nbytes);
return retval;
}
+
+extern "C"
+void
+__gg__convert(cblc_field_t *dest,
+ const cblc_field_t *input,
+ size_t input_o,
+ size_t input_s,
+ int /*source_format*/,
+ int dest_format)
+ {
+ /* convert formulations:
+ * 1. ANY to ALNUM HEX, or NAT HEX
+ * 2. HEX to BYTE
+ * 3. ALNUM to NAT, ALNUM HEX, or NAT HEX
+ * 4. NAT to ALNUM, ALNUM HEX, or NAT HEX
+ */
+
+ /* enum convert_type_t
+ *convert_alpha_e = 0x01,
+ *convert_nat_e = 0x02,
+ *convert_any_e = 0x03, // i.e., both
+ *convert_byte_e = 0x04,
+ *convert_hex_e = 0x08, // may be combined with alpha or national
+ *convert_just_bit_e = 0x10,
+ *convert_just_e = 0x18, // combined with HEX
+ *convert_rjust_bit_e = 0x20,
+ *convert_rjust_e = 0x38, // combined with JUSTIFY
+ */
+ cbl_encoding_t tgt_enc = (dest_format & convert_nat_e)
+ ? __gg__national_encoding
+ : __gg__display_encoding;
+ const charmap_t *charmap_tgt = __gg__get_charmap(tgt_enc);
+
+ charmap_t *charmap_dest = __gg__get_charmap(dest->encoding);
+
+ if( dest_format & convert_hex_e )
+ {
+ size_t nbytes;
+ char *converted = convert_for_convert(tgt_enc,
+ input,
+ input_o,
+ input_s,
+ &nbytes);
+ // We output 'converted' as a stream of hexadecimal characters in the
+ // destination encoding:
+ size_t i = 0;
+ size_t d = 0;
+ while(i < nbytes && d < dest->capacity )
+ {
+ cbl_char_t byte = charmap_tgt->getch(converted, &i);
+ unsigned char hi = byte>>4;
+ hi += hi < 10 ? ascii_0 : ascii_A-10;
+ char lo = byte & 0x0F;
+ lo += lo < 10 ? ascii_0 : ascii_A-10;
+ charmap_dest->putch(charmap_dest->mapped_character(hi), dest->data, &d);
+ charmap_dest->putch(charmap_dest->mapped_character(lo), dest->data, &d);
+ }
+ free(converted);
+ __gg__adjust_dest_size(dest, d);
+ }
+ else if( dest_format == convert_byte_e )
+ {
+ // The input is a series of hexadecimal characters
+ size_t nbytes;
+ char *converted = __gg__miconverter( input->encoding,
+ iconv_CP1252_e,
+ input->data+input_o,
+ input_s,
+ &nbytes);
+ size_t i = 0;
+ size_t d = 0;
+ while(i < nbytes && d < dest->capacity )
+ {
+ // Each character is part of a string of hexadecimal digits. So, the
+ // idea is that A1 should be turned into 1010.0001. There is no
+ // guarantee that these characters actually are hexadecimal.
+ cbl_char_t nybble = charmap_tgt->getch(converted, &i);
+ if( nybble >= ascii_0 && nybble <= ascii_9 )
+ {
+ nybble -= ascii_0;
+ }
+ else if( nybble >= ascii_a && nybble <= ascii_f )
+ {
+ nybble -= ascii_a - 10;
+ }
+ else if( nybble >= ascii_A && nybble <= ascii_F )
+ {
+ nybble -= ascii_A - 10;
+ }
+ else
+ {
+ nybble = 0;
+ }
+
+ for(int j=0; j<4; j++)
+ {
+ if( nybble & 0x08 )
+ {
+ charmap_dest->putch(ascii_1, dest->data, &d);
+ }
+ else
+ {
+ charmap_dest->putch(ascii_0, dest->data, &d);
+ }
+ nybble <<= 1;
+ }
+ }
+ free(converted);
+ __gg__adjust_dest_size(dest, d);
+ }
+ else
+ {
+ size_t nbytes;
+ char *converted = convert_for_convert(dest->encoding,
+ input,
+ input_o,
+ input_s,
+ &nbytes);
+ size_t len = std::min(nbytes, dest->capacity);
+ memcpy(dest->data, converted, len);
+ free(converted);
+ __gg__adjust_dest_size(dest, len);
+ }
+ }
extern "C" __int128 __gg__binary_value_from_field( int *rdigits,
cblc_field_t *var);
-extern "C" int __gg__compare_2( cblc_field_t *left_side,
- unsigned char *left_location,
- size_t left_length,
- int left_attr,
- int left_flags,
- cblc_field_t *right_side,
- unsigned char *right_location,
- size_t right_length,
- int right_attr,
- int right_flags,
- int second_time_through);
+extern "C" int __gg__compare_2( cblc_field_t *left_side,
+ unsigned char *left_location,
+ size_t left_length,
+ uint64_t left_attr,
+ int left_flags,
+ cblc_field_t *right_side,
+ unsigned char *right_location,
+ size_t right_length,
+ uint64_t right_attr,
+ int right_flags,
+ int second_time_through);
extern "C" void __gg__int128_to_field(cblc_field_t *tgt,
__int128 value,
int source_rdigits,
const unsigned short *__gg__current_collation();
+// Warning: field_from_string uses charmap_t, so you can't safely feed it
+// the results of __gg__iconverter without copying them.
+extern "C"
+void __gg__field_from_string( cblc_field_t *field,
+ size_t field_o,
+ size_t field_s,
+ const char *string,
+ size_t string_length);
+extern "C"
+void *__gg__memdup(const void *p, size_t size);
+
+enum {width_of_utf32 = 4};
+
#endif
+.SUFFIXES: .scr .cbl
+
+ROOT = $(shell git rev-parse --show-toplevel)
+
#
# Demonstrate how to generate a new COBOL binding from a man page.
#
+SCRAPE = $(ROOT)/libgcobol/posix/bin/scrape.awk
+UDF.GEN = $(ROOT)/libgcobol/posix/bin/udf-gen
+
+posix-funcs:
+ test "$(FUNCS)"
+ for F in $(FUNCS); \
+ do man 2 $$F | col -b | $(SCRAPE) > posix-$$F.scr; \
+ $(MAKE) -f $(ROOT)/libgcobol/posix/bin/Makefile posix-$${F}.cbl; done
+
+posix-$(FUNC).cbl:
+ man 2 $(FUNC) | col -b | $(SCRAPE) | \
+ $(UDF.GEN) -D mode_t=unsigned\ long > $@~
+ @mv $@~ $@
+
posix-mkdir.cbl:
- man 2 mkdir | ./scrape.awk | \
- ../udf-gen -D mode_t=unsigned\ long > $@~
+ man 2 mkdir | col -b | $(SCRAPE) | \
+ $(UDF.GEN) -D mode_t=unsigned\ long > $@~
@mv $@~ $@
# ... or
posix-stat-many.scr:
- man 2 stat | col -b | ./scrape.awk > $@~
+ man 2 stat | col -b | $(SCRAPE) > $@~
@mv $@~ $@
-.scr.cbl:
- ./udf-gen -D mode_t=unsigned\ long $^ > $@~
+%.cbl : %.scr
+ test -s $^
+ $(UDF.GEN) $(CPPFLAGS) $^ > $@~
@mv $@~ $@
exit
}
+# Print lines that end in dots, a comma, a brace, or a semicolon.
/SYNOPSIS/,/DESCRIPTION/ {
if( /([.][.]|[{},;]) *$/ ) {
print
import sys, os, getopt, re, copy
from pycparser import c_parser, c_generator, c_ast, parse_file
+from pycparser.plyparser import ParseError
def starify(param):
stars = ""
cpp_args = ''
def process(srcfile):
- ast = parse_file(srcfile, use_cpp=True, cpp_args=cpp_args)
+ try:
+ ast = parse_file(srcfile, use_cpp=True, cpp_args=cpp_args)
+ except ParseError as oops:
+ print(oops, file=sys.stderr)
+ sys.exit(1)
# print(c_generator.CGenerator().visit(ast))
v = VisitPrototypes()
v.visit(ast)
--- /dev/null
+ >> PUSH source format
+ >>SOURCE format is fixed
+
+ * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * *
+ * This file is in the public domain.
+ * Contributed by James K. Lowden of Cobolworx in November 2025.
+ * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * *
+
+ >>DEFINE SEEK_SET AS 2
+ >>DEFINE SEEK_CUR AS 4
+ >>DEFINE SEEK_END AS 8
+
+ >> POP source format
+
--- /dev/null
+#include <sys/types.h>
+#include <unistd.h>
+
+#include <cassert>
+#include <map>
+
+#define offsetof(TYPE, MEMBER) __builtin_offsetof (TYPE, MEMBER)
+
+extern "C" {
+
+off_t
+posix_lseek(int fd, off_t offset, int whence) {
+
+ static const std::map<int, int> whences {
+ { 2, SEEK_SET },
+ { 4, SEEK_CUR },
+ { 8, SEEK_END },
+ };
+
+ /*
+ * Map valid input whence value onto C standard library value.
+ * Invalid values are passed through and rejected by lseek(2) per its documentation.
+ * (The caller always needs to check for errors anyway.)
+ */
+ auto p = whences.find(whence);
+ if( p != whences.end() ) whence = p.second;
+
+ return lseek(fd, offset, whence);
+}
+
+} // extern "C"
01 Lk-fd PIC 9(8) Usage COMP.
01 Lk-offset Binary-Long.
01 Lk-whence Binary-Long.
+ 88 SEEK-SET VALUE 2.
+ 88 SEEK-CUR VALUE 4.
+ 88 SEEK-END VALUE 8.
Procedure Division using
By Value Lk-fd,
By Value Lk-offset,
By Value Lk-whence
Returning Return-Value.
- Call "lseek" using
+ Call "posix_lseek" using
By Value Lk-fd,
By Value Lk-offset,
By Value Lk-whence
Returning Return-Value.
Move Lk-pathname To Ws-pathname.
- Inspect Ws-pathname
- Replacing Trailing Space By Low-Value
+ D Inspect Ws-pathname
+ D Replacing Trailing Space By Low-Value
Inspect Backward Ws-pathname Replacing Leading Space,
- - By Low-Value.
+ By Low-Value.
Call "unlink" using
By Reference Ws-pathname,
Returning Return-Value.
bool
__gg__binary_to_string_encoded( char *result,
- int digits,
+ size_t digits,
__int128 value,
cbl_encoding_t encoding)
{
- charmap_t *charmap = __gg__get_charmap(encoding);
- zero_char = charmap->mapped_character(ascii_0);
+ // A non-zero retval means the number was too big to fit into the desired
+ // number of digits.
+
+ zero_char = ascii_0;
// Note that this routine does not terminate the generated string with a
// NUL. This routine is sometimes used to generate a NumericDisplay string
value = -value;
}
- // A non-zero retval means the number was too big to fit into the desired
- // number of digits:
bool retval = !!(value / mask);
// mask off the bottom digits to avoid garbage when value is too large
combined.run = digits;
combined.val128 = value;
string_from_combined(combined);
- memcpy(result, combined_string, digits);
+ size_t converted_bytes;
+ const char *converted = __gg__iconverter(DEFAULT_SOURCE_ENCODING,
+ encoding,
+ combined_string,
+ digits,
+ &converted_bytes);
+ memcpy(result, converted, converted_bytes);
return retval;
}
extern "C"
__int128
__gg__numeric_display_to_binary(unsigned char *signp,
- const unsigned char *psz,
- int n,
+ const unsigned char *pdigits,
+ int ndigits,
cbl_encoding_t encoding)
{
/* This is specific to numeric display values.
/* We are assuming that 64-bit arithmetic is faster than 128-bit arithmetic,
and so we build up a 128-bit result in three 64-bit pieces, and assemble
them at the end. */
+ size_t digit_index = 0;
+ cbl_char_t ch;
charmap_t *charmap = __gg__get_charmap(encoding);
- unsigned char zero = charmap->mapped_character(ascii_0);
- unsigned char minus = charmap->mapped_character(ascii_minus);
+ cbl_char_t minus = charmap->mapped_character(ascii_minus);
- bool is_ebcdic = (zero == 0xF0);
+ bool is_ebcdic = charmap->is_like_ebcdic();
static const uint8_t lookup[] =
{
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, // 0x40
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, // 0x50
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, // 0x60
- 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, // 0x70
+ 0,1,2,3,4,5,6,7,8,9,0,0,0,0,0,0, // 0x70
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, // 0x80
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, // 0x90
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, // 0xa0
bool is_negative = false;
// Pick up the original sign byte:
- unsigned char sign_byte = *signp;
+ cbl_char_t sign_byte = charmap->getch(signp, (size_t)0);
const unsigned char *mapper;
if( is_ebcdic )
// forcing the zone to 0xF0. Note that this is harmless if redundant, and
// harmless as well if the data SIGN IS SEPARATE. Whatever we do to this
// byte will be undone at the end of the routine.
- *signp |= 0xF0;
+ charmap->putch(sign_byte|0xF0, signp, (size_t)0);
}
else
{
is_negative = true;
// Make it a valid positive digit by turning the zone to 0x30
- *signp &= 0x3F;
+ charmap->putch(sign_byte&0x3F, signp, (size_t)0);
}
}
// Digits 1 through 18 come from the bottom:
- if( n <= 18 )
+ if( ndigits <= 18 )
{
- count_bottom = n;
+ count_bottom = ndigits;
count_middle = 0;
count_top = 0;
}
- else if( n<= 36 )
+ else if( ndigits<= 36 )
{
count_bottom = 18;
- count_middle = n - 18;
+ count_middle = ndigits - 18;
count_top = 0;
}
else
{
count_bottom = 18;
count_middle = 18;
- count_top = n - 36;
+ count_top = ndigits - 36;
}
- if( n & 1 )
+ if( ndigits & 1 )
{
// We are dealing with an odd number of digits
if( count_top )
{
- top = mapper[*psz++];
+ ch = charmap->getch(pdigits, &digit_index);
+ top = mapper[ch];
count_top -= 1;
}
else if( count_middle )
{
- middle = mapper[*psz++];
+ ch = charmap->getch(pdigits, &digit_index);
+ middle = mapper[ch];
count_middle -= 1;
}
else
{
- bottom = mapper[*psz++];
+ ch = charmap->getch(pdigits, &digit_index);
+ bottom = mapper[ch];
count_bottom -= 1;
}
}
while( count_top )
{
- add_me = mapper[*psz++] << 4;
- add_me += mapper[*psz++];
+ ch = charmap->getch(pdigits, &digit_index);
+ add_me = mapper[ch] << 4;
+ ch = charmap->getch(pdigits, &digit_index);
+ add_me += mapper[ch];
top *= 100 ;
top += lookup[add_me];
count_top -= 2;
while( count_middle )
{
- add_me = mapper[*psz++] << 4;
- add_me += mapper[*psz++];
+ ch = charmap->getch(pdigits, &digit_index);
+ add_me = mapper[ch] << 4;
+ ch = charmap->getch(pdigits, &digit_index);
+ add_me += mapper[ch];
middle *= 100 ;
middle += lookup[add_me];
count_middle -= 2;
while( count_bottom )
{
- add_me = mapper[*psz++] << 4;
- add_me += mapper[*psz++];
+ ch = charmap->getch(pdigits, &digit_index);
+ add_me = mapper[ch] << 4;
+ ch = charmap->getch(pdigits, &digit_index);
+ add_me += mapper[ch];
bottom *= 100 ;
bottom += lookup[add_me];
count_bottom -= 2;
}
// Replace the original sign byte:
- *signp = sign_byte; // cppcheck-suppress redundantAssignment
+ charmap->putch(sign_byte, signp, (size_t)0);
return retval;
}
__int128 value);
extern "C"
bool __gg__binary_to_string_encoded(char *result,
- int digits,
+ size_t digits, // Desired digits
__int128 value,
cbl_encoding_t encoding);
// We need to expand the picture string. We assume that the caller left
// enough room in dest to take the expanded picture string.
+ // Note that we do not put on a nul terminator, so if you need one, it's
+ // your job to put it there.
+
int dlength = expand_picture(dest, picture);
// At the present time, I am taking a liberty. In principle, a 'V'