This patch includes the implementation, documentation, and test case for SPLIT.
gcc/fortran/ChangeLog:
* check.cc (gfc_check_split): Argument check for SPLIT.
* gfortran.h (enum gfc_isym_id): Define GFC_ISYM_SPLIT.
* intrinsic.cc (add_subroutines): Register SPLIT intrinsic.
* intrinsic.h (gfc_check_split): New decl.
(gfc_resolve_split): Ditto.
* intrinsic.texi: SPLIT documentation.
* iresolve.cc (gfc_resolve_split): Add resolved_sym for SPLIT.
* trans-decl.cc (gfc_build_intrinsic_function_decls): Add decl for
SPLIT in libgfortran.
* trans-intrinsic.cc (conv_intrinsic_split): SPLIT codegen.
(gfc_conv_intrinsic_subroutine): Handle SPLIT case.
* trans.h (GTY): Declare gfor_fndecl_string_split{, _char4}.
libgfortran/ChangeLog:
* gfortran.map: Add split symbol.
* intrinsics/string_intrinsics_inc.c (string_split):
Runtime support for SPLIT.
gcc/testsuite/ChangeLog:
* gfortran.dg/split_1.f90: New test.
* gfortran.dg/split_2.f90: New test.
* gfortran.dg/split_3.f90: New test.
* gfortran.dg/split_4.f90: New test.
Signed-off-by: Yuao Ma <c8ef@outlook.com>
return true;
}
+bool
+gfc_check_split (gfc_expr *string, gfc_expr *set, gfc_expr *pos, gfc_expr *back)
+{
+ if (!type_check (string, 0, BT_CHARACTER))
+ return false;
+
+ if (!type_check (set, 1, BT_CHARACTER))
+ return false;
+
+ if (!type_check (pos, 2, BT_INTEGER) || !scalar_check (pos, 2))
+ return false;
+
+ if (back != NULL
+ && (!type_check (back, 3, BT_LOGICAL) || !scalar_check (back, 3)))
+ return false;
+
+ if (!same_type_check (string, 0, set, 1))
+ return false;
+
+ return true;
+}
bool
gfc_check_secnds (gfc_expr *r)
GFC_ISYM_COSPI,
GFC_ISYM_SINPI,
GFC_ISYM_TANPI,
+
+ GFC_ISYM_SPLIT,
};
enum init_local_logical
pt, BT_INTEGER, di, OPTIONAL, INTENT_IN,
gt, BT_INTEGER, di, OPTIONAL, INTENT_OUT);
+ add_sym_4s ("split", GFC_ISYM_SPLIT, CLASS_PURE,
+ BT_UNKNOWN, 0, GFC_STD_F2023,
+ gfc_check_split, NULL, gfc_resolve_split,
+ "string", BT_CHARACTER, dc, REQUIRED, INTENT_IN,
+ "set", BT_CHARACTER, dc, REQUIRED, INTENT_IN,
+ "pos", BT_INTEGER, di, REQUIRED, INTENT_INOUT,
+ "back", BT_LOGICAL, dl, OPTIONAL, INTENT_IN);
+
/* The following subroutines are part of ISO_C_BINDING. */
add_sym_3s ("c_f_pointer", GFC_ISYM_C_F_POINTER, CLASS_IMPURE, BT_UNKNOWN, 0,
bool gfc_check_random_init (gfc_expr *, gfc_expr *);
bool gfc_check_random_number (gfc_expr *);
bool gfc_check_random_seed (gfc_expr *, gfc_expr *, gfc_expr *);
+bool gfc_check_split (gfc_expr *, gfc_expr *, gfc_expr *, gfc_expr *);
bool gfc_check_dtime_etime_sub (gfc_expr *, gfc_expr *);
bool gfc_check_fgetputc_sub (gfc_expr *, gfc_expr *, gfc_expr *);
bool gfc_check_fgetput_sub (gfc_expr *, gfc_expr *);
void gfc_resolve_symlnk_sub (gfc_code *);
void gfc_resolve_signal_sub (gfc_code *);
void gfc_resolve_sleep_sub (gfc_code *);
+void gfc_resolve_split (gfc_code *);
void gfc_resolve_stat_sub (gfc_code *);
void gfc_resolve_system_clock (gfc_code *);
void gfc_resolve_system_sub (gfc_code *);
* @code{SIZEOF}: SIZEOF, Determine the size in bytes of an expression
* @code{SLEEP}: SLEEP, Sleep for the specified number of seconds
* @code{SPACING}: SPACING, Smallest distance between two numbers of a given type
+* @code{SPLIT}: SPLIT, Parse a string into tokens, one at a time.
* @code{SPREAD}: SPREAD, Add a dimension to an array
* @code{SQRT}: SQRT, Square-root function
* @code{SRAND}: SRAND, Reinitialize the random number generator
+@node SPLIT
+@section @code{SPLIT} --- Parse a string into tokens, one at a time
+@fnindex SPLIT
+@cindex string, split
+
+@table @asis
+@item @emph{Synopsis}:
+@code{RESULT = SPLIT(STRING, SET, POS [, BACK])}
+
+@item @emph{Description}:
+Updates the integer @var{POS} to the position of the next (or previous)
+separator in @var{STRING}.
+
+If @var{BACK} is absent or is present with the value false, @var{POS} is
+assigned the position of the leftmost token delimiter in @var{STRING} whose
+position is greater than @var{POS}, or if there is no such character, it is
+assigned a value one greater than the length of @var{STRING}. This identifies
+a token with starting position one greater than the value of @var{POS} on
+invocation, and ending position one less than the value of @var{POS} on return.
+
+If @var{BACK} is present with the value true, @var{POS} is assigned the
+position of the rightmost token delimiter in @var{STRING} whose position is
+less than @var{POS}, or if there is no such character, it is assigned the value
+zero. This identifies a token with ending position one less than the value of
+@var{POS} on invocation, and starting position one greater than the value of
+@var{POS} on return.
+
+@item @emph{Class}:
+Subroutine
+
+@item @emph{Arguments}:
+@multitable @columnfractions .15 .70
+@item @var{STRING} @tab Shall be of type @code{CHARACTER}.
+@item @var{SET} @tab Shall be of type @code{CHARACTER}.
+@item @var{POS} @tab Shall be of type @code{INTEGER}.
+@item @var{BACK} @tab (Optional) Shall be of type @code{LOGICAL}.
+@end multitable
+
+@item @emph{Example}:
+@smallexample
+character(len=:), allocatable :: input
+character(len=2) :: set = ', '
+integer :: p
+input = "one,last example"
+p = 0
+do
+ if (p > len(input)) exit
+ istart = p + 1
+ call split(input, set, p)
+ iend = p - 1
+ print '(t7, a)', input(istart:iend)
+end do
+@end smallexample
+
+@item @emph{Standard}:
+Fortran 2023
+
+@item @emph{See also}:
+@ref{SCAN}
+@end table
+
+
+
@node SPREAD
@section @code{SPREAD} --- Add a dimension to an array
@fnindex SPREAD
c->resolved_sym = gfc_get_intrinsic_sub_symbol (name);
}
+void
+gfc_resolve_split (gfc_code *c)
+{
+ const char *name;
+ gfc_expr *string;
+
+ string = c->ext.actual->expr;
+ if (string->ts.type == BT_CHARACTER && string->ts.kind == 4)
+ name = "__split_char4";
+ else
+ name = "__split";
+ c->resolved_sym = gfc_get_intrinsic_sub_symbol (name);
+}
/* G77 compatibility function srand(). */
tree gfor_fndecl_string_verify;
tree gfor_fndecl_string_trim;
tree gfor_fndecl_string_minmax;
+tree gfor_fndecl_string_split;
tree gfor_fndecl_adjustl;
tree gfor_fndecl_adjustr;
tree gfor_fndecl_select_string;
tree gfor_fndecl_string_verify_char4;
tree gfor_fndecl_string_trim_char4;
tree gfor_fndecl_string_minmax_char4;
+tree gfor_fndecl_string_split_char4;
tree gfor_fndecl_adjustl_char4;
tree gfor_fndecl_adjustr_char4;
tree gfor_fndecl_select_string_char4;
build_pointer_type (pchar1_type_node), integer_type_node,
integer_type_node);
+ gfor_fndecl_string_split = gfc_build_library_function_decl_with_spec (
+ get_identifier (PREFIX ("string_split")), ". . R . R . . ",
+ gfc_charlen_type_node, 6, gfc_charlen_type_node, pchar1_type_node,
+ gfc_charlen_type_node, pchar1_type_node, gfc_charlen_type_node,
+ gfc_logical4_type_node);
+
gfor_fndecl_adjustl = gfc_build_library_function_decl_with_spec (
get_identifier (PREFIX("adjustl")), ". W . R ",
void_type_node, 3, pchar1_type_node, gfc_charlen_type_node,
build_pointer_type (pchar4_type_node), integer_type_node,
integer_type_node);
+ gfor_fndecl_string_split_char4 = gfc_build_library_function_decl_with_spec (
+ get_identifier (PREFIX ("string_split_char4")), ". . R . R . . ",
+ gfc_charlen_type_node, 6, gfc_charlen_type_node, pchar4_type_node,
+ gfc_charlen_type_node, pchar4_type_node, gfc_charlen_type_node,
+ gfc_logical4_type_node);
+
gfor_fndecl_adjustl_char4 = gfc_build_library_function_decl_with_spec (
get_identifier (PREFIX("adjustl_char4")), ". W . R ",
void_type_node, 3, pchar4_type_node, gfc_charlen_type_node,
return gfc_finish_block (&block);
}
+static tree
+conv_intrinsic_split (gfc_code *code)
+{
+ stmtblock_t block, post_block;
+ gfc_se se;
+ gfc_expr *string_expr, *set_expr, *pos_expr, *back_expr;
+ tree string, string_len;
+ tree set, set_len;
+ tree pos, pos_for_call;
+ tree back;
+ tree fndecl, call;
+
+ string_expr = code->ext.actual->expr;
+ set_expr = code->ext.actual->next->expr;
+ pos_expr = code->ext.actual->next->next->expr;
+ back_expr = code->ext.actual->next->next->next->expr;
+
+ gfc_start_block (&block);
+ gfc_init_block (&post_block);
+
+ gfc_init_se (&se, NULL);
+ gfc_conv_expr (&se, string_expr);
+ gfc_conv_string_parameter (&se);
+ gfc_add_block_to_block (&block, &se.pre);
+ gfc_add_block_to_block (&post_block, &se.post);
+ string = se.expr;
+ string_len = se.string_length;
+
+ gfc_init_se (&se, NULL);
+ gfc_conv_expr (&se, set_expr);
+ gfc_conv_string_parameter (&se);
+ gfc_add_block_to_block (&block, &se.pre);
+ gfc_add_block_to_block (&post_block, &se.post);
+ set = se.expr;
+ set_len = se.string_length;
+
+ gfc_init_se (&se, NULL);
+ gfc_conv_expr (&se, pos_expr);
+ gfc_add_block_to_block (&block, &se.pre);
+ gfc_add_block_to_block (&post_block, &se.post);
+ pos = se.expr;
+ pos_for_call = fold_convert (gfc_charlen_type_node, pos);
+
+ if (back_expr)
+ {
+ gfc_init_se (&se, NULL);
+ gfc_conv_expr (&se, back_expr);
+ gfc_add_block_to_block (&block, &se.pre);
+ gfc_add_block_to_block (&post_block, &se.post);
+ back = se.expr;
+ }
+ else
+ back = logical_false_node;
+
+ if (string_expr->ts.kind == 1)
+ fndecl = gfor_fndecl_string_split;
+ else if (string_expr->ts.kind == 4)
+ fndecl = gfor_fndecl_string_split_char4;
+ else
+ gcc_unreachable ();
+
+ call = build_call_expr_loc (input_location, fndecl, 6, string_len, string,
+ set_len, set, pos_for_call, back);
+ gfc_add_modify (&block, pos, fold_convert (TREE_TYPE (pos), call));
+
+ gfc_add_block_to_block (&block, &post_block);
+ return gfc_finish_block (&block);
+}
/* Return a character string containing the tty name. */
res = conv_intrinsic_system_clock (code);
break;
+ case GFC_ISYM_SPLIT:
+ res = conv_intrinsic_split (code);
+ break;
+
default:
res = NULL_TREE;
break;
extern GTY(()) tree gfor_fndecl_string_verify;
extern GTY(()) tree gfor_fndecl_string_trim;
extern GTY(()) tree gfor_fndecl_string_minmax;
+extern GTY(()) tree gfor_fndecl_string_split;
extern GTY(()) tree gfor_fndecl_adjustl;
extern GTY(()) tree gfor_fndecl_adjustr;
extern GTY(()) tree gfor_fndecl_select_string;
extern GTY(()) tree gfor_fndecl_string_verify_char4;
extern GTY(()) tree gfor_fndecl_string_trim_char4;
extern GTY(()) tree gfor_fndecl_string_minmax_char4;
+extern GTY(()) tree gfor_fndecl_string_split_char4;
extern GTY(()) tree gfor_fndecl_adjustl_char4;
extern GTY(()) tree gfor_fndecl_adjustr_char4;
extern GTY(()) tree gfor_fndecl_select_string_char4;
--- /dev/null
+! { dg-do run }
+program b
+ character(len=:), allocatable :: input
+ character(len=2) :: set = ', '
+ integer :: p
+ input = " one,last example,"
+ p = 0
+
+ call split(input, set, p)
+ if (p /= 1) STOP 1
+ call split(input, set, p)
+ if (p /= 5) STOP 2
+ call split(input, set, p)
+ if (p /= 10) STOP 3
+ call split(input, set, p)
+ if (p /= 18) STOP 4
+ call split(input, set, p)
+ if (p /= 19) STOP 5
+
+ call split(input, set, p, .true.)
+ if (p /= 18) STOP 6
+ call split(input, set, p, .true.)
+ if (p /= 10) STOP 7
+ call split(input, set, p, .true.)
+ if (p /= 5) STOP 8
+ call split(input, set, p, .true.)
+ if (p /= 1) STOP 9
+end program b
--- /dev/null
+! { dg-do run }
+program b
+ integer, parameter :: ucs4 = selected_char_kind('ISO_10646')
+ character(kind=ucs4, len=:), allocatable :: input, set
+ integer :: p = 0
+
+ input = char(int(z'4f60'), ucs4) // char(int(z'597d'), ucs4) // char(int(z'4f60'), ucs4) // char(int(z'4e16'), ucs4)
+ set = char(int(z'597d'), ucs4) // char(int(z'4e16'), ucs4)
+
+ call split(input, set, p)
+ if (p /= 2) stop 1
+ call split(input, set, p)
+ if (p /= 4) stop 2
+ call split(input, set, p)
+ if (p /= 5) stop 3
+ call split(input, set, p, .true.)
+ if (p /= 4) stop 4
+ call split(input, set, p, .true.)
+ if (p /= 2) stop 5
+ call split(input, set, p, .true.)
+ if (p /= 0) stop 6
+end program b
--- /dev/null
+! { dg-do run }
+! { dg-shouldfail "Fortran runtime error" }
+
+program b
+ character(len=:), allocatable :: input
+ character(len=2) :: set = ', '
+ integer :: p
+ input = " one,last example,"
+ p = -1
+ call split(input, set, p)
+end program b
--- /dev/null
+! { dg-do run }
+! { dg-shouldfail "Fortran runtime error" }
+
+program b
+ character(len=:), allocatable :: input
+ character(len=2) :: set = ', '
+ integer :: p
+ input = " one,last example,"
+ p = 0
+ call split(input, set, p, .true.)
+end program b
_gfortran_mmaxloc1_16_m16;
_gfortran_smaxloc1_16_m16;
} GFORTRAN_15;
+
+GFORTRAN_16 {
+ global:
+ _gfortran_string_split;
+ _gfortran_string_split_char4;
+} GFORTRAN_15.2;
#define string_verify SUFFIX(string_verify)
#define string_trim SUFFIX(string_trim)
#define string_minmax SUFFIX(string_minmax)
+#define string_split SUFFIX(string_split)
#define zero_length_string SUFFIX(zero_length_string)
#define compare_string SUFFIX(compare_string)
extern void string_minmax (gfc_charlen_type *, CHARTYPE **, int, int, ...);
export_proto(string_minmax);
+extern gfc_charlen_type string_split (gfc_charlen_type, const CHARTYPE *,
+ gfc_charlen_type, const CHARTYPE *,
+ gfc_charlen_type, GFC_LOGICAL_4);
+export_proto (string_split);
/* Use for functions which can return a zero-length string. */
static CHARTYPE zero_length_string = 0;
*dest = tmp;
}
}
+
+gfc_charlen_type
+string_split (gfc_charlen_type stringlen, const CHARTYPE *string,
+ gfc_charlen_type setlen, const CHARTYPE *set,
+ gfc_charlen_type pos, GFC_LOGICAL_4 back)
+{
+ gfc_charlen_type i, j;
+
+ if (!back)
+ {
+ if (pos > stringlen)
+ runtime_error ("If BACK is present with the value false, the value of "
+ "POS shall be in the range [0, LEN (STRING)], "
+ "where POS = %ld and LEN (STRING) = %ld",
+ pos, stringlen);
+
+ for (i = pos + 1; i <= stringlen; i++)
+ {
+ for (j = 0; j < setlen; j++)
+ {
+ if (string[i - 1] == set[j])
+ return i;
+ }
+ }
+
+ return stringlen + 1;
+ }
+ else
+ {
+ if (pos < 1 || pos > (stringlen + 1))
+ runtime_error ("If BACK is present with the value true, the value of "
+ "POS shall be in the range [1, LEN (STRING) + 1], "
+ "where POS = %ld and LEN (STRING) = %ld",
+ pos, stringlen);
+
+ for (i = pos - 1; i != 0; i--)
+ {
+ for (j = 0; j < setlen; j++)
+ {
+ if (string[i - 1] == set[j])
+ return i;
+ }
+ }
+
+ return 0;
+ }
+}