From 34186331bdeedb5a8ecae0a9a95d87e8624ea726 Mon Sep 17 00:00:00 2001 From: Nicolas Koenig Date: Sun, 8 Nov 2020 17:07:48 +0100 Subject: [PATCH] Implement stat and errmsg. gcc/fortran/ChangeLog: * trans-decl.c (gfc_sym_mangled_function_id): Whitespace fix. (gfc_build_builtin_function_decls): Correct fn specs. * trans-intrinsic.c (trans_argument): Re-apply fix (?) (conv_cas_reduce): Likewise. (conv_co_collective): Likewise. libgfortran/ChangeLog: * Makefile.am: Add counter_barrier.c and counter_barrier.h * Makefile.in: Regenerate. * generated/nca_minmax_i1.c: Regenerated. * generated/nca_minmax_i16.c: Regenerated. * generated/nca_minmax_i2.c: Regenerated. * generated/nca_minmax_i4.c: Regenerated. * generated/nca_minmax_i8.c: Regenerated. * generated/nca_minmax_r10.c: Regenerated. * generated/nca_minmax_r16.c: Regenerated. * generated/nca_minmax_r4.c: Regenerated. * generated/nca_minmax_r8.c: Regenerated. * generated/nca_minmax_s1.c: Regenerated. * generated/nca_minmax_s4.c: Regenerated. * m4/nca-minmax-s.m4: Add stat and errmsg. * m4/nca_minmax.m4: Likewise. * nca/coarraynative.c (get_master): New function. (test_for_cas_errors): New function. (master_is_image_active): New function. (master_get_num_active_images): New function. (master_bind_active_image_barrier): New function. (error_on_missing_images): New function. (cas_master): New function. * nca/collective_subroutine.c (collsub_sync): Replace pthread_barrier_wait by counter_barrier. (collsub_reduce_array): Add error_on_missing_images. Adjust to number of images. (collsub_reduce_scalar): Likewise. (collsub_iface_init): Likewise. * nca/collective_subroutine.h: Replace pthread_barrier_t by counter_barrier. * nca/libcoarraynative.h: Include counter_barrier.h. Add handling for failed images, stat and errmsg. * nca/sync.c (sync_all_init): Replace pthread_barrir by counter_barrier. (sync_iface_init): Adjust handling to total_num_images. (sync_table): Likewise. (sync_all): LIkewise. * nca/sync.h: Include some theaders, adjust to counter_barrier. * nca/util.h: Add internal_proto to unpack_array_finish. * nca/wrapper.c (cas_collsub_reduce_array): Adjust to total_num_images, handle status and errmsg. (cas_collsub_reduce_scalar): Likewise. (cas_collsub_broadcast_array): Likewise. (cas_collsub_broadcast_scalar): Likewise. (cas_coarray_alloc): Likewise. (cas_coarray_free): Likewise. (cas_coarray_num_images): Likewise. (cas_coarray_sync_all): Likewise. (cas_sync_images): Likewise. * nca/counter_barrier.c: New file. * nca/counter_barrier.h: New file. --- gcc/fortran/trans-decl.c | 47 +++++--- gcc/fortran/trans-intrinsic.c | 58 +++++++--- libgfortran/Makefile.am | 4 +- libgfortran/Makefile.in | 17 ++- libgfortran/generated/nca_minmax_i1.c | 96 +++++++++------- libgfortran/generated/nca_minmax_i16.c | 96 +++++++++------- libgfortran/generated/nca_minmax_i2.c | 96 +++++++++------- libgfortran/generated/nca_minmax_i4.c | 96 +++++++++------- libgfortran/generated/nca_minmax_i8.c | 96 +++++++++------- libgfortran/generated/nca_minmax_r10.c | 96 +++++++++------- libgfortran/generated/nca_minmax_r16.c | 96 +++++++++------- libgfortran/generated/nca_minmax_r4.c | 96 +++++++++------- libgfortran/generated/nca_minmax_r8.c | 96 +++++++++------- libgfortran/generated/nca_minmax_s1.c | 60 ++++++---- libgfortran/generated/nca_minmax_s4.c | 60 ++++++---- libgfortran/m4/nca-minmax-s.m4 | 30 +++-- libgfortran/m4/nca_minmax.m4 | 32 +++--- libgfortran/nca/coarraynative.c | 134 +++++++++++++++++++--- libgfortran/nca/collective_subroutine.c | 44 ++++---- libgfortran/nca/collective_subroutine.h | 2 +- libgfortran/nca/counter_barrier.c | 141 ++++++++++++++++++++++++ libgfortran/nca/counter_barrier.h | 36 ++++++ libgfortran/nca/libcoarraynative.h | 57 ++++++++-- libgfortran/nca/sync.c | 24 ++-- libgfortran/nca/sync.h | 5 +- libgfortran/nca/util.h | 2 + libgfortran/nca/wrapper.c | 51 ++++----- 27 files changed, 1091 insertions(+), 577 deletions(-) create mode 100644 libgfortran/nca/counter_barrier.c create mode 100644 libgfortran/nca/counter_barrier.h diff --git a/gcc/fortran/trans-decl.c b/gcc/fortran/trans-decl.c index ff429d9bed62..4f5b8f0fd1b7 100644 --- a/gcc/fortran/trans-decl.c +++ b/gcc/fortran/trans-decl.c @@ -460,7 +460,7 @@ gfc_sym_mangled_function_id (gfc_symbol * sym) return get_identifier (sym->binding_label); if ((sym->module == NULL || sym->attr.proc == PROC_EXTERNAL - || (sym->module != NULL && (sym->attr.external + || (sym->module != NULL && (sym->attr.external || sym->attr.if_source == IFSRC_IFBODY))) && !sym->attr.module_procedure) { @@ -4141,39 +4141,52 @@ gfc_build_builtin_function_decls (void) get_identifier (PREFIX("cas_coarray_sync_all")), ". X ", void_type_node, 1, build_pointer_type (integer_type_node), NULL_TREE); gfor_fndecl_cas_sync_images = gfc_build_library_function_decl_with_spec ( - get_identifier (PREFIX("cas_sync_images")), ". R R X X X ", + get_identifier (PREFIX("cas_sync_images")), ". R R w w . ", void_type_node, 5, integer_type_node, pint_type, pint_type, - pchar_type_node, size_type_node, NULL_TREE); + pchar_type_node, size_type_node); gfor_fndecl_cas_lock = gfc_build_library_function_decl_with_spec ( get_identifier (PREFIX("cas_lock")), ". w ", void_type_node, 1, - pvoid_type_node, NULL_TREE); + pvoid_type_node); gfor_fndecl_cas_unlock = gfc_build_library_function_decl_with_spec ( get_identifier (PREFIX("cas_unlock")), ". w ", void_type_node, 1, - pvoid_type_node, NULL_TREE); + pvoid_type_node); gfor_fndecl_cas_reduce_scalar = gfc_build_library_function_decl_with_spec ( - get_identifier (PREFIX("cas_collsub_reduce_scalar")), ". w r W ", - void_type_node, 3, pvoid_type_node, + get_identifier (PREFIX("cas_collsub_reduce_scalar")), ". w . r r w w . ", + void_type_node, 7, pvoid_type_node, /* object. */ + size_type_node, /* elem_size. */ build_pointer_type (build_function_type_list (void_type_node, - pvoid_type_node, pvoid_type_node, NULL_TREE)), - pint_type, NULL_TREE); + pvoid_type_node, pvoid_type_node, NULL_TREE)), /* assign function. */ + pint_type, /* result_image. */ + pint_type, /* stat. */ + pchar_type_node, /* errmsg. */ + size_type_node /* errmsg_len. */); gfor_fndecl_cas_reduce_array = gfc_build_library_function_decl_with_spec ( - get_identifier (PREFIX("cas_collsub_reduce_array")), ". w r W R ", - void_type_node, 4, pvoid_type_node, + get_identifier (PREFIX("cas_collsub_reduce_array")), ". W r r w w . ", + void_type_node, 6, pvoid_type_node /* desc. */, build_pointer_type (build_function_type_list (void_type_node, - pvoid_type_node, pvoid_type_node, NULL_TREE)), - pint_type, integer_type_node, NULL_TREE); + pvoid_type_node, pvoid_type_node, NULL_TREE)) /* assign function. */, + pint_type, /* result_image. */ + pint_type, /* stat. */ + pchar_type_node, /* errmsg. */ + size_type_node /* errmsg_len. */ ); gfor_fndecl_cas_broadcast_scalar = gfc_build_library_function_decl_with_spec ( - get_identifier (PREFIX ("cas_collsub_broadcast_scalar")), ". w . . ", - void_type_node, 3, pvoid_type_node, size_type_node, integer_type_node); + get_identifier (PREFIX ("cas_collsub_broadcast_scalar")), ". w . . w w . ", + void_type_node, 6, pvoid_type_node, size_type_node, integer_type_node, + pint_type, /* stat. */ + pchar_type_node, /* errmsg. */ + size_type_node /* errmsg_len. */ ); gfor_fndecl_cas_broadcast_array = gfc_build_library_function_decl_with_spec ( - get_identifier (PREFIX ("cas_collsub_broadcast_array")), ". W . ", - void_type_node, 2, pvoid_type_node, integer_type_node); + get_identifier (PREFIX ("cas_collsub_broadcast_array")), ". W . w w . ", + void_type_node, 5, pvoid_type_node, integer_type_node, + pint_type, /* stat. */ + pchar_type_node, /* errmsg. */ + size_type_node /* errmsg_len. */ ); } diff --git a/gcc/fortran/trans-intrinsic.c b/gcc/fortran/trans-intrinsic.c index f9df1c9198b6..7824dcf55a08 100644 --- a/gcc/fortran/trans-intrinsic.c +++ b/gcc/fortran/trans-intrinsic.c @@ -11098,34 +11098,42 @@ gfc_walk_intrinsic_function (gfc_ss * ss, gfc_expr * expr, } } -/* Helper function - translate an argument and advance to the next. - Coarrays are irrelevant here, since we just translate normal arguments. */ +/* Helper function - translate an argument and advance to the next. + Coarrays are irrelevant here, since we just translate normal + arguments. */ static tree trans_argument (gfc_actual_arglist **curr_al, stmtblock_t *blk, stmtblock_t *postblk, gfc_se *argse, tree def) { - if (!(*curr_al)->expr) + gfc_expr *expr = (*curr_al)->expr; + + *curr_al = (*curr_al)->next; + + if (expr == NULL) return def; - if ((*curr_al)->expr->rank > 0) - gfc_conv_expr_descriptor (argse, (*curr_al)->expr); + + if (expr->rank > 0) + gfc_conv_expr_descriptor (argse, expr); else - gfc_conv_expr (argse, (*curr_al)->expr); + gfc_conv_expr (argse, expr); + gfc_add_block_to_block (blk, &argse->pre); gfc_add_block_to_block (postblk, &argse->post); - *curr_al = (*curr_al)->next; + return argse->expr; } -/* Convert CO_REDUCE for native coarrays. */ +/* Convert CO_REDUCE for shared coarrays. */ static tree conv_cas_reduce (gfc_code *code, stmtblock_t *blk, stmtblock_t *postblk) { gfc_actual_arglist *curr_al; - tree var, reduce_op, result_image, elem_size; + tree var, reduce_op, result_image, elem_size, stat, errmsg, errmsg_len; gfc_se argse; int is_array; + bool has_errmsg; curr_al = code->ext.actual; @@ -11144,14 +11152,34 @@ conv_cas_reduce (gfc_code *code, stmtblock_t *blk, stmtblock_t *postblk) argse.want_pointer = 1; result_image = trans_argument (&curr_al, blk, postblk, &argse, null_pointer_node); - + + gfc_init_se (&argse, NULL); + argse.want_pointer = 1; + stat = trans_argument (&curr_al, blk, postblk, &argse, null_pointer_node); + + has_errmsg = curr_al->expr != NULL; + gfc_init_se (&argse, NULL); + argse.want_pointer = 1; + errmsg = trans_argument (&curr_al, blk, postblk, &argse, null_pointer_node); + + if (has_errmsg) + { + errmsg_len = fold_convert (size_type_node, argse.string_length); + } + else + { + errmsg_len = build_zero_cst (size_type_node); + } + if (is_array) return build_call_expr_loc (input_location, gfor_fndecl_cas_reduce_array, - 3, var, reduce_op, result_image); + 6, var, reduce_op, result_image, stat, errmsg, + errmsg_len); elem_size = size_in_bytes(TREE_TYPE(TREE_TYPE(var))); - return build_call_expr_loc (input_location, gfor_fndecl_cas_reduce_scalar, 4, - var, elem_size, reduce_op, result_image); + return build_call_expr_loc (input_location, gfor_fndecl_cas_reduce_scalar, 7, + var, elem_size, reduce_op, result_image, stat, + errmsg, errmsg_len); } static tree @@ -11184,7 +11212,7 @@ conv_cas_broadcast (gfc_code *code, stmtblock_t *blk, stmtblock_t *postblk) static tree conv_co_collective (gfc_code *); -/* Convert collective subroutines for native coarrays. */ +/* Convert collective subroutines for shared coarrays. */ static tree conv_cas_collective (gfc_code *code) @@ -11336,7 +11364,7 @@ conv_co_collective (gfc_code *code) errmsg_len = build_zero_cst (size_type_node); } - /* For native coarrays, we only come here for CO_BROADCAST. */ + /* For shared coarrays, we only come here for CO_BROADCAST. */ gcc_assert (code->resolved_isym->id == GFC_ISYM_CO_BROADCAST || flag_coarray != GFC_FCOARRAY_SHARED); diff --git a/libgfortran/Makefile.am b/libgfortran/Makefile.am index ef2b9f95488b..bb4f126862f2 100644 --- a/libgfortran/Makefile.am +++ b/libgfortran/Makefile.am @@ -82,12 +82,12 @@ mylibdir = $(libdir)/gcc/$(target_alias)/$(gcc_version)$(MULTISUBDIR) libcaf_shared_la_SOURCES = nca/alloc.c nca/allocator.c nca/coarraynative.c \ nca/hashmap.c \ nca/sync.c nca/util.c nca/wrapper.c nca/collective_subroutine.c \ - nca/shared_memory.c \ + nca/shared_memory.c nca/counter_barrier.c \ $(i_nca_minmax_c) $(i_nca_minmax_s_c) libcaf_shared_la_DEPENDENCIES = nca/alloc.h nca/allocator.h nca/hashmap.h nca/libcoarraynative.h nca/sync.h shared_memory.h \ nca/util.h nca/lock.h nca/collective_subroutine.h\ - nca/collective_inline.h + nca/collective_inline.h nca/counter_barrier.h libcaf_shared_la_LINK = $(LINK) $(libcaf_shared_la_LDFLAGS) endif diff --git a/libgfortran/Makefile.in b/libgfortran/Makefile.in index 5d9d7e48b82e..42f4724cf11d 100644 --- a/libgfortran/Makefile.in +++ b/libgfortran/Makefile.in @@ -230,8 +230,9 @@ am__objects_2 = nca_minmax_s1.lo nca_minmax_s4.lo @LIBGFOR_NATIVE_COARRAY_TRUE@ hashmap.lo sync.lo util.lo \ @LIBGFOR_NATIVE_COARRAY_TRUE@ wrapper.lo \ @LIBGFOR_NATIVE_COARRAY_TRUE@ collective_subroutine.lo \ -@LIBGFOR_NATIVE_COARRAY_TRUE@ shared_memory.lo $(am__objects_1) \ -@LIBGFOR_NATIVE_COARRAY_TRUE@ $(am__objects_2) +@LIBGFOR_NATIVE_COARRAY_TRUE@ shared_memory.lo \ +@LIBGFOR_NATIVE_COARRAY_TRUE@ counter_barrier.lo \ +@LIBGFOR_NATIVE_COARRAY_TRUE@ $(am__objects_1) $(am__objects_2) libcaf_shared_la_OBJECTS = $(am_libcaf_shared_la_OBJECTS) @LIBGFOR_NATIVE_COARRAY_TRUE@am_libcaf_shared_la_rpath = -rpath \ @LIBGFOR_NATIVE_COARRAY_TRUE@ $(mylibdir) @@ -788,7 +789,7 @@ i_nca_minmax_s_c = \ @LIBGFOR_NATIVE_COARRAY_TRUE@libcaf_shared_la_SOURCES = nca/alloc.c nca/allocator.c nca/coarraynative.c \ @LIBGFOR_NATIVE_COARRAY_TRUE@ nca/hashmap.c \ @LIBGFOR_NATIVE_COARRAY_TRUE@ nca/sync.c nca/util.c nca/wrapper.c nca/collective_subroutine.c \ -@LIBGFOR_NATIVE_COARRAY_TRUE@ nca/shared_memory.c \ +@LIBGFOR_NATIVE_COARRAY_TRUE@ nca/shared_memory.c nca/counter_barrier.c \ @LIBGFOR_NATIVE_COARRAY_TRUE@ $(i_nca_minmax_c) $(i_nca_minmax_s_c) @LIBGFOR_NATIVE_COARRAY_TRUE@libcaf_shared_la_DEPENDENCIES = nca/alloc.h nca/allocator.h nca/hashmap.h @@ -1840,6 +1841,7 @@ distclean-compile: @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/count_2_l.Plo@am__quote@ @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/count_4_l.Plo@am__quote@ @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/count_8_l.Plo@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/counter_barrier.Plo@am__quote@ @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/cpu_time.Plo@am__quote@ @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/cshift0.Plo@am__quote@ @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/cshift0_c10.Plo@am__quote@ @@ -2901,6 +2903,13 @@ shared_memory.lo: nca/shared_memory.c @AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ @am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(AM_CFLAGS) $(CFLAGS) -c -o shared_memory.lo `test -f 'nca/shared_memory.c' || echo '$(srcdir)/'`nca/shared_memory.c +counter_barrier.lo: nca/counter_barrier.c +@am__fastdepCC_TRUE@ $(AM_V_CC)$(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(AM_CFLAGS) $(CFLAGS) -MT counter_barrier.lo -MD -MP -MF $(DEPDIR)/counter_barrier.Tpo -c -o counter_barrier.lo `test -f 'nca/counter_barrier.c' || echo '$(srcdir)/'`nca/counter_barrier.c +@am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) $(DEPDIR)/counter_barrier.Tpo $(DEPDIR)/counter_barrier.Plo +@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='nca/counter_barrier.c' object='counter_barrier.lo' libtool=yes @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(AM_CFLAGS) $(CFLAGS) -c -o counter_barrier.lo `test -f 'nca/counter_barrier.c' || echo '$(srcdir)/'`nca/counter_barrier.c + nca_minmax_i1.lo: $(srcdir)/generated/nca_minmax_i1.c @am__fastdepCC_TRUE@ $(AM_V_CC)$(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(AM_CFLAGS) $(CFLAGS) -MT nca_minmax_i1.lo -MD -MP -MF $(DEPDIR)/nca_minmax_i1.Tpo -c -o nca_minmax_i1.lo `test -f '$(srcdir)/generated/nca_minmax_i1.c' || echo '$(srcdir)/'`$(srcdir)/generated/nca_minmax_i1.c @am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) $(DEPDIR)/nca_minmax_i1.Tpo $(DEPDIR)/nca_minmax_i1.Plo @@ -7235,7 +7244,7 @@ uninstall-am: uninstall-cafexeclibLTLIBRARIES uninstall-gfor_cHEADERS \ @LIBGFOR_USE_SYMVER_SUN_TRUE@@LIBGFOR_USE_SYMVER_TRUE@ > $@ || (rm -f $@ ; exit 1) @LIBGFOR_NATIVE_COARRAY_TRUE@ nca/libcoarraynative.h nca/sync.h shared_memory.h \ @LIBGFOR_NATIVE_COARRAY_TRUE@ nca/util.h nca/lock.h nca/collective_subroutine.h\ -@LIBGFOR_NATIVE_COARRAY_TRUE@ nca/collective_inline.h +@LIBGFOR_NATIVE_COARRAY_TRUE@ nca/collective_inline.h nca/counter_barrier.h # Turn on vectorization and loop unrolling for matmul. $(patsubst %.c,%.lo,$(notdir $(i_matmul_c))): AM_CFLAGS += -ffast-math -ftree-vectorize -funroll-loops --param max-unroll-times=4 diff --git a/libgfortran/generated/nca_minmax_i1.c b/libgfortran/generated/nca_minmax_i1.c index 30a8587ea559..a34018165a9f 100644 --- a/libgfortran/generated/nca_minmax_i1.c +++ b/libgfortran/generated/nca_minmax_i1.c @@ -37,9 +37,7 @@ export_proto(cas_collsub_max_scalar_i1); void cas_collsub_max_scalar_i1 (GFC_INTEGER_1 *obj, int *result_image, - int *stat __attribute__ ((unused)), - char *errmsg __attribute__ ((unused)), - index_type errmsg_len __attribute__ ((unused))) + int *stat, char *errmsg, index_type errmsg_len) { int cbit = 0; int imoffset; @@ -47,17 +45,21 @@ cas_collsub_max_scalar_i1 (GFC_INTEGER_1 *obj, int *result_image, GFC_INTEGER_1 *buffer, *this_image_buf; collsub_iface *ci; + STAT_ERRMSG_ENTRY_CHECK(stat, errmsg, errmsg_len); + + error_on_missing_images(); + ci = &local->ci; - buffer = get_collsub_buf (ci, sizeof(GFC_INTEGER_1) * local->num_images); + buffer = get_collsub_buf (ci, sizeof(GFC_INTEGER_1) * local->total_num_images); this_image_buf = buffer + this_image.image_num; *this_image_buf = *obj; collsub_sync (ci); - for (; ((this_image.image_num >> cbit) & 1) == 0 && (local->num_images >> cbit) != 0; cbit++) + for (; ((this_image.image_num >> cbit) & 1) == 0 && (local->total_num_images >> cbit) != 0; cbit++) { imoffset = 1 << cbit; - if (this_image.image_num + imoffset < local->num_images) + if (this_image.image_num + imoffset < local->total_num_images) { a = this_image_buf; b = this_image_buf + imoffset; @@ -66,7 +68,7 @@ cas_collsub_max_scalar_i1 (GFC_INTEGER_1 *obj, int *result_image, } collsub_sync (ci); } - for ( ; (local->num_images >> cbit) != 0; cbit++) + for ( ; (local->total_num_images >> cbit) != 0; cbit++) collsub_sync (ci); if (!result_image || (*result_image - 1) == this_image.image_num) @@ -82,9 +84,7 @@ export_proto(cas_collsub_min_scalar_i1); void cas_collsub_min_scalar_i1 (GFC_INTEGER_1 *obj, int *result_image, - int *stat __attribute__ ((unused)), - char *errmsg __attribute__ ((unused)), - index_type errmsg_len __attribute__ ((unused))) + int *stat, char *errmsg, index_type errmsg_len) { int cbit = 0; int imoffset; @@ -92,17 +92,21 @@ cas_collsub_min_scalar_i1 (GFC_INTEGER_1 *obj, int *result_image, GFC_INTEGER_1 *buffer, *this_image_buf; collsub_iface *ci; + STAT_ERRMSG_ENTRY_CHECK(stat, errmsg, errmsg_len); + + error_on_missing_images(); + ci = &local->ci; - buffer = get_collsub_buf (ci, sizeof(GFC_INTEGER_1) * local->num_images); + buffer = get_collsub_buf (ci, sizeof(GFC_INTEGER_1) * local->total_num_images); this_image_buf = buffer + this_image.image_num; *this_image_buf = *obj; collsub_sync (ci); - for (; ((this_image.image_num >> cbit) & 1) == 0 && (local->num_images >> cbit) != 0; cbit++) + for (; ((this_image.image_num >> cbit) & 1) == 0 && (local->total_num_images >> cbit) != 0; cbit++) { imoffset = 1 << cbit; - if (this_image.image_num + imoffset < local->num_images) + if (this_image.image_num + imoffset < local->total_num_images) { a = this_image_buf; b = this_image_buf + imoffset; @@ -111,7 +115,7 @@ cas_collsub_min_scalar_i1 (GFC_INTEGER_1 *obj, int *result_image, } collsub_sync (ci); } - for ( ; (local->num_images >> cbit) != 0; cbit++) + for ( ; (local->total_num_images >> cbit) != 0; cbit++) collsub_sync (ci); if (!result_image || (*result_image - 1) == this_image.image_num) @@ -127,9 +131,7 @@ export_proto(cas_collsub_sum_scalar_i1); void cas_collsub_sum_scalar_i1 (GFC_INTEGER_1 *obj, int *result_image, - int *stat __attribute__ ((unused)), - char *errmsg __attribute__ ((unused)), - index_type errmsg_len __attribute__ ((unused))) + int *stat, char *errmsg, index_type errmsg_len) { int cbit = 0; int imoffset; @@ -137,17 +139,21 @@ cas_collsub_sum_scalar_i1 (GFC_INTEGER_1 *obj, int *result_image, GFC_INTEGER_1 *buffer, *this_image_buf; collsub_iface *ci; + STAT_ERRMSG_ENTRY_CHECK(stat, errmsg, errmsg_len); + + error_on_missing_images(); + ci = &local->ci; - buffer = get_collsub_buf (ci, sizeof(GFC_INTEGER_1) * local->num_images); + buffer = get_collsub_buf (ci, sizeof(GFC_INTEGER_1) * local->total_num_images); this_image_buf = buffer + this_image.image_num; *this_image_buf = *obj; collsub_sync (ci); - for (; ((this_image.image_num >> cbit) & 1) == 0 && (local->num_images >> cbit) != 0; cbit++) + for (; ((this_image.image_num >> cbit) & 1) == 0 && (local->total_num_images >> cbit) != 0; cbit++) { imoffset = 1 << cbit; - if (this_image.image_num + imoffset < local->num_images) + if (this_image.image_num + imoffset < local->total_num_images) { a = this_image_buf; b = this_image_buf + imoffset; @@ -155,7 +161,7 @@ cas_collsub_sum_scalar_i1 (GFC_INTEGER_1 *obj, int *result_image, } collsub_sync (ci); } - for ( ; (local->num_images >> cbit) != 0; cbit++) + for ( ; (local->total_num_images >> cbit) != 0; cbit++) collsub_sync (ci); if (!result_image || (*result_image - 1) == this_image.image_num) @@ -171,9 +177,7 @@ export_proto (cas_collsub_max_array_i1); void cas_collsub_max_array_i1 (gfc_array_i1 * restrict array, int *result_image, - int *stat __attribute__ ((unused)), - char *errmsg __attribute__ ((unused)), - index_type errmsg_len __attribute__ ((unused))) + int *stat, char *errmsg, index_type errmsg_len) { index_type count[GFC_MAX_DIMENSIONS]; index_type stride[GFC_MAX_DIMENSIONS]; @@ -188,6 +192,10 @@ cas_collsub_max_array_i1 (gfc_array_i1 * restrict array, int *result_image, int imoffset; collsub_iface *ci; + STAT_ERRMSG_ENTRY_CHECK(stat, errmsg, errmsg_len); + + error_on_missing_images(); + ci = &local->ci; dim = GFC_DESCRIPTOR_RANK (array); @@ -212,7 +220,7 @@ cas_collsub_max_array_i1 (gfc_array_i1 * restrict array, int *result_image, num_elems = ssize / sizeof (GFC_INTEGER_1); - buffer = get_collsub_buf (ci, ssize * local->num_images); + buffer = get_collsub_buf (ci, ssize * local->total_num_images); this_shared_ptr = buffer + this_image.image_num * num_elems; if (packed) @@ -261,10 +269,10 @@ cas_collsub_max_array_i1 (gfc_array_i1 * restrict array, int *result_image, a_______b___ r___________ */ - for (; ((this_image.image_num >> cbit) & 1) == 0 && (local->num_images >> cbit) != 0; cbit++) + for (; ((this_image.image_num >> cbit) & 1) == 0 && (local->total_num_images >> cbit) != 0; cbit++) { imoffset = 1 << cbit; - if (this_image.image_num + imoffset < local->num_images) + if (this_image.image_num + imoffset < local->total_num_images) { GFC_INTEGER_1 * other_shared_ptr; /* Points to the shared memory allocated to another image. */ @@ -282,7 +290,7 @@ cas_collsub_max_array_i1 (gfc_array_i1 * restrict array, int *result_image, } collsub_sync (ci); } - for ( ; (local->num_images >> cbit) != 0; cbit++) + for ( ; (local->total_num_images >> cbit) != 0; cbit++) collsub_sync (ci); if (!result_image || (*result_image - 1) == this_image.image_num) @@ -331,9 +339,7 @@ export_proto (cas_collsub_min_array_i1); void cas_collsub_min_array_i1 (gfc_array_i1 * restrict array, int *result_image, - int *stat __attribute__ ((unused)), - char *errmsg __attribute__ ((unused)), - index_type errmsg_len __attribute__ ((unused))) + int *stat, char *errmsg, index_type errmsg_len) { index_type count[GFC_MAX_DIMENSIONS]; index_type stride[GFC_MAX_DIMENSIONS]; @@ -348,6 +354,10 @@ cas_collsub_min_array_i1 (gfc_array_i1 * restrict array, int *result_image, int imoffset; collsub_iface *ci; + STAT_ERRMSG_ENTRY_CHECK(stat, errmsg, errmsg_len); + + error_on_missing_images(); + ci = &local->ci; dim = GFC_DESCRIPTOR_RANK (array); @@ -372,7 +382,7 @@ cas_collsub_min_array_i1 (gfc_array_i1 * restrict array, int *result_image, num_elems = ssize / sizeof (GFC_INTEGER_1); - buffer = get_collsub_buf (ci, ssize * local->num_images); + buffer = get_collsub_buf (ci, ssize * local->total_num_images); this_shared_ptr = buffer + this_image.image_num * num_elems; if (packed) @@ -421,10 +431,10 @@ cas_collsub_min_array_i1 (gfc_array_i1 * restrict array, int *result_image, a_______b___ r___________ */ - for (; ((this_image.image_num >> cbit) & 1) == 0 && (local->num_images >> cbit) != 0; cbit++) + for (; ((this_image.image_num >> cbit) & 1) == 0 && (local->total_num_images >> cbit) != 0; cbit++) { imoffset = 1 << cbit; - if (this_image.image_num + imoffset < local->num_images) + if (this_image.image_num + imoffset < local->total_num_images) { GFC_INTEGER_1 * other_shared_ptr; /* Points to the shared memory allocated to another image. */ @@ -442,7 +452,7 @@ cas_collsub_min_array_i1 (gfc_array_i1 * restrict array, int *result_image, } collsub_sync (ci); } - for ( ; (local->num_images >> cbit) != 0; cbit++) + for ( ; (local->total_num_images >> cbit) != 0; cbit++) collsub_sync (ci); if (!result_image || (*result_image - 1) == this_image.image_num) @@ -491,9 +501,7 @@ export_proto (cas_collsub_sum_array_i1); void cas_collsub_sum_array_i1 (gfc_array_i1 * restrict array, int *result_image, - int *stat __attribute__ ((unused)), - char *errmsg __attribute__ ((unused)), - index_type errmsg_len __attribute__ ((unused))) + int *stat, char *errmsg, index_type errmsg_len) { index_type count[GFC_MAX_DIMENSIONS]; index_type stride[GFC_MAX_DIMENSIONS]; @@ -508,6 +516,10 @@ cas_collsub_sum_array_i1 (gfc_array_i1 * restrict array, int *result_image, int imoffset; collsub_iface *ci; + STAT_ERRMSG_ENTRY_CHECK(stat, errmsg, errmsg_len); + + error_on_missing_images(); + ci = &local->ci; dim = GFC_DESCRIPTOR_RANK (array); @@ -532,7 +544,7 @@ cas_collsub_sum_array_i1 (gfc_array_i1 * restrict array, int *result_image, num_elems = ssize / sizeof (GFC_INTEGER_1); - buffer = get_collsub_buf (ci, ssize * local->num_images); + buffer = get_collsub_buf (ci, ssize * local->total_num_images); this_shared_ptr = buffer + this_image.image_num * num_elems; if (packed) @@ -581,10 +593,10 @@ cas_collsub_sum_array_i1 (gfc_array_i1 * restrict array, int *result_image, a_______b___ r___________ */ - for (; ((this_image.image_num >> cbit) & 1) == 0 && (local->num_images >> cbit) != 0; cbit++) + for (; ((this_image.image_num >> cbit) & 1) == 0 && (local->total_num_images >> cbit) != 0; cbit++) { imoffset = 1 << cbit; - if (this_image.image_num + imoffset < local->num_images) + if (this_image.image_num + imoffset < local->total_num_images) { GFC_INTEGER_1 * other_shared_ptr; /* Points to the shared memory allocated to another image. */ @@ -601,7 +613,7 @@ cas_collsub_sum_array_i1 (gfc_array_i1 * restrict array, int *result_image, } collsub_sync (ci); } - for ( ; (local->num_images >> cbit) != 0; cbit++) + for ( ; (local->total_num_images >> cbit) != 0; cbit++) collsub_sync (ci); if (!result_image || (*result_image - 1) == this_image.image_num) diff --git a/libgfortran/generated/nca_minmax_i16.c b/libgfortran/generated/nca_minmax_i16.c index f2e458182a51..6642fa6de2f6 100644 --- a/libgfortran/generated/nca_minmax_i16.c +++ b/libgfortran/generated/nca_minmax_i16.c @@ -37,9 +37,7 @@ export_proto(cas_collsub_max_scalar_i16); void cas_collsub_max_scalar_i16 (GFC_INTEGER_16 *obj, int *result_image, - int *stat __attribute__ ((unused)), - char *errmsg __attribute__ ((unused)), - index_type errmsg_len __attribute__ ((unused))) + int *stat, char *errmsg, index_type errmsg_len) { int cbit = 0; int imoffset; @@ -47,17 +45,21 @@ cas_collsub_max_scalar_i16 (GFC_INTEGER_16 *obj, int *result_image, GFC_INTEGER_16 *buffer, *this_image_buf; collsub_iface *ci; + STAT_ERRMSG_ENTRY_CHECK(stat, errmsg, errmsg_len); + + error_on_missing_images(); + ci = &local->ci; - buffer = get_collsub_buf (ci, sizeof(GFC_INTEGER_16) * local->num_images); + buffer = get_collsub_buf (ci, sizeof(GFC_INTEGER_16) * local->total_num_images); this_image_buf = buffer + this_image.image_num; *this_image_buf = *obj; collsub_sync (ci); - for (; ((this_image.image_num >> cbit) & 1) == 0 && (local->num_images >> cbit) != 0; cbit++) + for (; ((this_image.image_num >> cbit) & 1) == 0 && (local->total_num_images >> cbit) != 0; cbit++) { imoffset = 1 << cbit; - if (this_image.image_num + imoffset < local->num_images) + if (this_image.image_num + imoffset < local->total_num_images) { a = this_image_buf; b = this_image_buf + imoffset; @@ -66,7 +68,7 @@ cas_collsub_max_scalar_i16 (GFC_INTEGER_16 *obj, int *result_image, } collsub_sync (ci); } - for ( ; (local->num_images >> cbit) != 0; cbit++) + for ( ; (local->total_num_images >> cbit) != 0; cbit++) collsub_sync (ci); if (!result_image || (*result_image - 1) == this_image.image_num) @@ -82,9 +84,7 @@ export_proto(cas_collsub_min_scalar_i16); void cas_collsub_min_scalar_i16 (GFC_INTEGER_16 *obj, int *result_image, - int *stat __attribute__ ((unused)), - char *errmsg __attribute__ ((unused)), - index_type errmsg_len __attribute__ ((unused))) + int *stat, char *errmsg, index_type errmsg_len) { int cbit = 0; int imoffset; @@ -92,17 +92,21 @@ cas_collsub_min_scalar_i16 (GFC_INTEGER_16 *obj, int *result_image, GFC_INTEGER_16 *buffer, *this_image_buf; collsub_iface *ci; + STAT_ERRMSG_ENTRY_CHECK(stat, errmsg, errmsg_len); + + error_on_missing_images(); + ci = &local->ci; - buffer = get_collsub_buf (ci, sizeof(GFC_INTEGER_16) * local->num_images); + buffer = get_collsub_buf (ci, sizeof(GFC_INTEGER_16) * local->total_num_images); this_image_buf = buffer + this_image.image_num; *this_image_buf = *obj; collsub_sync (ci); - for (; ((this_image.image_num >> cbit) & 1) == 0 && (local->num_images >> cbit) != 0; cbit++) + for (; ((this_image.image_num >> cbit) & 1) == 0 && (local->total_num_images >> cbit) != 0; cbit++) { imoffset = 1 << cbit; - if (this_image.image_num + imoffset < local->num_images) + if (this_image.image_num + imoffset < local->total_num_images) { a = this_image_buf; b = this_image_buf + imoffset; @@ -111,7 +115,7 @@ cas_collsub_min_scalar_i16 (GFC_INTEGER_16 *obj, int *result_image, } collsub_sync (ci); } - for ( ; (local->num_images >> cbit) != 0; cbit++) + for ( ; (local->total_num_images >> cbit) != 0; cbit++) collsub_sync (ci); if (!result_image || (*result_image - 1) == this_image.image_num) @@ -127,9 +131,7 @@ export_proto(cas_collsub_sum_scalar_i16); void cas_collsub_sum_scalar_i16 (GFC_INTEGER_16 *obj, int *result_image, - int *stat __attribute__ ((unused)), - char *errmsg __attribute__ ((unused)), - index_type errmsg_len __attribute__ ((unused))) + int *stat, char *errmsg, index_type errmsg_len) { int cbit = 0; int imoffset; @@ -137,17 +139,21 @@ cas_collsub_sum_scalar_i16 (GFC_INTEGER_16 *obj, int *result_image, GFC_INTEGER_16 *buffer, *this_image_buf; collsub_iface *ci; + STAT_ERRMSG_ENTRY_CHECK(stat, errmsg, errmsg_len); + + error_on_missing_images(); + ci = &local->ci; - buffer = get_collsub_buf (ci, sizeof(GFC_INTEGER_16) * local->num_images); + buffer = get_collsub_buf (ci, sizeof(GFC_INTEGER_16) * local->total_num_images); this_image_buf = buffer + this_image.image_num; *this_image_buf = *obj; collsub_sync (ci); - for (; ((this_image.image_num >> cbit) & 1) == 0 && (local->num_images >> cbit) != 0; cbit++) + for (; ((this_image.image_num >> cbit) & 1) == 0 && (local->total_num_images >> cbit) != 0; cbit++) { imoffset = 1 << cbit; - if (this_image.image_num + imoffset < local->num_images) + if (this_image.image_num + imoffset < local->total_num_images) { a = this_image_buf; b = this_image_buf + imoffset; @@ -155,7 +161,7 @@ cas_collsub_sum_scalar_i16 (GFC_INTEGER_16 *obj, int *result_image, } collsub_sync (ci); } - for ( ; (local->num_images >> cbit) != 0; cbit++) + for ( ; (local->total_num_images >> cbit) != 0; cbit++) collsub_sync (ci); if (!result_image || (*result_image - 1) == this_image.image_num) @@ -171,9 +177,7 @@ export_proto (cas_collsub_max_array_i16); void cas_collsub_max_array_i16 (gfc_array_i16 * restrict array, int *result_image, - int *stat __attribute__ ((unused)), - char *errmsg __attribute__ ((unused)), - index_type errmsg_len __attribute__ ((unused))) + int *stat, char *errmsg, index_type errmsg_len) { index_type count[GFC_MAX_DIMENSIONS]; index_type stride[GFC_MAX_DIMENSIONS]; @@ -188,6 +192,10 @@ cas_collsub_max_array_i16 (gfc_array_i16 * restrict array, int *result_image, int imoffset; collsub_iface *ci; + STAT_ERRMSG_ENTRY_CHECK(stat, errmsg, errmsg_len); + + error_on_missing_images(); + ci = &local->ci; dim = GFC_DESCRIPTOR_RANK (array); @@ -212,7 +220,7 @@ cas_collsub_max_array_i16 (gfc_array_i16 * restrict array, int *result_image, num_elems = ssize / sizeof (GFC_INTEGER_16); - buffer = get_collsub_buf (ci, ssize * local->num_images); + buffer = get_collsub_buf (ci, ssize * local->total_num_images); this_shared_ptr = buffer + this_image.image_num * num_elems; if (packed) @@ -261,10 +269,10 @@ cas_collsub_max_array_i16 (gfc_array_i16 * restrict array, int *result_image, a_______b___ r___________ */ - for (; ((this_image.image_num >> cbit) & 1) == 0 && (local->num_images >> cbit) != 0; cbit++) + for (; ((this_image.image_num >> cbit) & 1) == 0 && (local->total_num_images >> cbit) != 0; cbit++) { imoffset = 1 << cbit; - if (this_image.image_num + imoffset < local->num_images) + if (this_image.image_num + imoffset < local->total_num_images) { GFC_INTEGER_16 * other_shared_ptr; /* Points to the shared memory allocated to another image. */ @@ -282,7 +290,7 @@ cas_collsub_max_array_i16 (gfc_array_i16 * restrict array, int *result_image, } collsub_sync (ci); } - for ( ; (local->num_images >> cbit) != 0; cbit++) + for ( ; (local->total_num_images >> cbit) != 0; cbit++) collsub_sync (ci); if (!result_image || (*result_image - 1) == this_image.image_num) @@ -331,9 +339,7 @@ export_proto (cas_collsub_min_array_i16); void cas_collsub_min_array_i16 (gfc_array_i16 * restrict array, int *result_image, - int *stat __attribute__ ((unused)), - char *errmsg __attribute__ ((unused)), - index_type errmsg_len __attribute__ ((unused))) + int *stat, char *errmsg, index_type errmsg_len) { index_type count[GFC_MAX_DIMENSIONS]; index_type stride[GFC_MAX_DIMENSIONS]; @@ -348,6 +354,10 @@ cas_collsub_min_array_i16 (gfc_array_i16 * restrict array, int *result_image, int imoffset; collsub_iface *ci; + STAT_ERRMSG_ENTRY_CHECK(stat, errmsg, errmsg_len); + + error_on_missing_images(); + ci = &local->ci; dim = GFC_DESCRIPTOR_RANK (array); @@ -372,7 +382,7 @@ cas_collsub_min_array_i16 (gfc_array_i16 * restrict array, int *result_image, num_elems = ssize / sizeof (GFC_INTEGER_16); - buffer = get_collsub_buf (ci, ssize * local->num_images); + buffer = get_collsub_buf (ci, ssize * local->total_num_images); this_shared_ptr = buffer + this_image.image_num * num_elems; if (packed) @@ -421,10 +431,10 @@ cas_collsub_min_array_i16 (gfc_array_i16 * restrict array, int *result_image, a_______b___ r___________ */ - for (; ((this_image.image_num >> cbit) & 1) == 0 && (local->num_images >> cbit) != 0; cbit++) + for (; ((this_image.image_num >> cbit) & 1) == 0 && (local->total_num_images >> cbit) != 0; cbit++) { imoffset = 1 << cbit; - if (this_image.image_num + imoffset < local->num_images) + if (this_image.image_num + imoffset < local->total_num_images) { GFC_INTEGER_16 * other_shared_ptr; /* Points to the shared memory allocated to another image. */ @@ -442,7 +452,7 @@ cas_collsub_min_array_i16 (gfc_array_i16 * restrict array, int *result_image, } collsub_sync (ci); } - for ( ; (local->num_images >> cbit) != 0; cbit++) + for ( ; (local->total_num_images >> cbit) != 0; cbit++) collsub_sync (ci); if (!result_image || (*result_image - 1) == this_image.image_num) @@ -491,9 +501,7 @@ export_proto (cas_collsub_sum_array_i16); void cas_collsub_sum_array_i16 (gfc_array_i16 * restrict array, int *result_image, - int *stat __attribute__ ((unused)), - char *errmsg __attribute__ ((unused)), - index_type errmsg_len __attribute__ ((unused))) + int *stat, char *errmsg, index_type errmsg_len) { index_type count[GFC_MAX_DIMENSIONS]; index_type stride[GFC_MAX_DIMENSIONS]; @@ -508,6 +516,10 @@ cas_collsub_sum_array_i16 (gfc_array_i16 * restrict array, int *result_image, int imoffset; collsub_iface *ci; + STAT_ERRMSG_ENTRY_CHECK(stat, errmsg, errmsg_len); + + error_on_missing_images(); + ci = &local->ci; dim = GFC_DESCRIPTOR_RANK (array); @@ -532,7 +544,7 @@ cas_collsub_sum_array_i16 (gfc_array_i16 * restrict array, int *result_image, num_elems = ssize / sizeof (GFC_INTEGER_16); - buffer = get_collsub_buf (ci, ssize * local->num_images); + buffer = get_collsub_buf (ci, ssize * local->total_num_images); this_shared_ptr = buffer + this_image.image_num * num_elems; if (packed) @@ -581,10 +593,10 @@ cas_collsub_sum_array_i16 (gfc_array_i16 * restrict array, int *result_image, a_______b___ r___________ */ - for (; ((this_image.image_num >> cbit) & 1) == 0 && (local->num_images >> cbit) != 0; cbit++) + for (; ((this_image.image_num >> cbit) & 1) == 0 && (local->total_num_images >> cbit) != 0; cbit++) { imoffset = 1 << cbit; - if (this_image.image_num + imoffset < local->num_images) + if (this_image.image_num + imoffset < local->total_num_images) { GFC_INTEGER_16 * other_shared_ptr; /* Points to the shared memory allocated to another image. */ @@ -601,7 +613,7 @@ cas_collsub_sum_array_i16 (gfc_array_i16 * restrict array, int *result_image, } collsub_sync (ci); } - for ( ; (local->num_images >> cbit) != 0; cbit++) + for ( ; (local->total_num_images >> cbit) != 0; cbit++) collsub_sync (ci); if (!result_image || (*result_image - 1) == this_image.image_num) diff --git a/libgfortran/generated/nca_minmax_i2.c b/libgfortran/generated/nca_minmax_i2.c index fd8d718fbe10..1a9d43afe32c 100644 --- a/libgfortran/generated/nca_minmax_i2.c +++ b/libgfortran/generated/nca_minmax_i2.c @@ -37,9 +37,7 @@ export_proto(cas_collsub_max_scalar_i2); void cas_collsub_max_scalar_i2 (GFC_INTEGER_2 *obj, int *result_image, - int *stat __attribute__ ((unused)), - char *errmsg __attribute__ ((unused)), - index_type errmsg_len __attribute__ ((unused))) + int *stat, char *errmsg, index_type errmsg_len) { int cbit = 0; int imoffset; @@ -47,17 +45,21 @@ cas_collsub_max_scalar_i2 (GFC_INTEGER_2 *obj, int *result_image, GFC_INTEGER_2 *buffer, *this_image_buf; collsub_iface *ci; + STAT_ERRMSG_ENTRY_CHECK(stat, errmsg, errmsg_len); + + error_on_missing_images(); + ci = &local->ci; - buffer = get_collsub_buf (ci, sizeof(GFC_INTEGER_2) * local->num_images); + buffer = get_collsub_buf (ci, sizeof(GFC_INTEGER_2) * local->total_num_images); this_image_buf = buffer + this_image.image_num; *this_image_buf = *obj; collsub_sync (ci); - for (; ((this_image.image_num >> cbit) & 1) == 0 && (local->num_images >> cbit) != 0; cbit++) + for (; ((this_image.image_num >> cbit) & 1) == 0 && (local->total_num_images >> cbit) != 0; cbit++) { imoffset = 1 << cbit; - if (this_image.image_num + imoffset < local->num_images) + if (this_image.image_num + imoffset < local->total_num_images) { a = this_image_buf; b = this_image_buf + imoffset; @@ -66,7 +68,7 @@ cas_collsub_max_scalar_i2 (GFC_INTEGER_2 *obj, int *result_image, } collsub_sync (ci); } - for ( ; (local->num_images >> cbit) != 0; cbit++) + for ( ; (local->total_num_images >> cbit) != 0; cbit++) collsub_sync (ci); if (!result_image || (*result_image - 1) == this_image.image_num) @@ -82,9 +84,7 @@ export_proto(cas_collsub_min_scalar_i2); void cas_collsub_min_scalar_i2 (GFC_INTEGER_2 *obj, int *result_image, - int *stat __attribute__ ((unused)), - char *errmsg __attribute__ ((unused)), - index_type errmsg_len __attribute__ ((unused))) + int *stat, char *errmsg, index_type errmsg_len) { int cbit = 0; int imoffset; @@ -92,17 +92,21 @@ cas_collsub_min_scalar_i2 (GFC_INTEGER_2 *obj, int *result_image, GFC_INTEGER_2 *buffer, *this_image_buf; collsub_iface *ci; + STAT_ERRMSG_ENTRY_CHECK(stat, errmsg, errmsg_len); + + error_on_missing_images(); + ci = &local->ci; - buffer = get_collsub_buf (ci, sizeof(GFC_INTEGER_2) * local->num_images); + buffer = get_collsub_buf (ci, sizeof(GFC_INTEGER_2) * local->total_num_images); this_image_buf = buffer + this_image.image_num; *this_image_buf = *obj; collsub_sync (ci); - for (; ((this_image.image_num >> cbit) & 1) == 0 && (local->num_images >> cbit) != 0; cbit++) + for (; ((this_image.image_num >> cbit) & 1) == 0 && (local->total_num_images >> cbit) != 0; cbit++) { imoffset = 1 << cbit; - if (this_image.image_num + imoffset < local->num_images) + if (this_image.image_num + imoffset < local->total_num_images) { a = this_image_buf; b = this_image_buf + imoffset; @@ -111,7 +115,7 @@ cas_collsub_min_scalar_i2 (GFC_INTEGER_2 *obj, int *result_image, } collsub_sync (ci); } - for ( ; (local->num_images >> cbit) != 0; cbit++) + for ( ; (local->total_num_images >> cbit) != 0; cbit++) collsub_sync (ci); if (!result_image || (*result_image - 1) == this_image.image_num) @@ -127,9 +131,7 @@ export_proto(cas_collsub_sum_scalar_i2); void cas_collsub_sum_scalar_i2 (GFC_INTEGER_2 *obj, int *result_image, - int *stat __attribute__ ((unused)), - char *errmsg __attribute__ ((unused)), - index_type errmsg_len __attribute__ ((unused))) + int *stat, char *errmsg, index_type errmsg_len) { int cbit = 0; int imoffset; @@ -137,17 +139,21 @@ cas_collsub_sum_scalar_i2 (GFC_INTEGER_2 *obj, int *result_image, GFC_INTEGER_2 *buffer, *this_image_buf; collsub_iface *ci; + STAT_ERRMSG_ENTRY_CHECK(stat, errmsg, errmsg_len); + + error_on_missing_images(); + ci = &local->ci; - buffer = get_collsub_buf (ci, sizeof(GFC_INTEGER_2) * local->num_images); + buffer = get_collsub_buf (ci, sizeof(GFC_INTEGER_2) * local->total_num_images); this_image_buf = buffer + this_image.image_num; *this_image_buf = *obj; collsub_sync (ci); - for (; ((this_image.image_num >> cbit) & 1) == 0 && (local->num_images >> cbit) != 0; cbit++) + for (; ((this_image.image_num >> cbit) & 1) == 0 && (local->total_num_images >> cbit) != 0; cbit++) { imoffset = 1 << cbit; - if (this_image.image_num + imoffset < local->num_images) + if (this_image.image_num + imoffset < local->total_num_images) { a = this_image_buf; b = this_image_buf + imoffset; @@ -155,7 +161,7 @@ cas_collsub_sum_scalar_i2 (GFC_INTEGER_2 *obj, int *result_image, } collsub_sync (ci); } - for ( ; (local->num_images >> cbit) != 0; cbit++) + for ( ; (local->total_num_images >> cbit) != 0; cbit++) collsub_sync (ci); if (!result_image || (*result_image - 1) == this_image.image_num) @@ -171,9 +177,7 @@ export_proto (cas_collsub_max_array_i2); void cas_collsub_max_array_i2 (gfc_array_i2 * restrict array, int *result_image, - int *stat __attribute__ ((unused)), - char *errmsg __attribute__ ((unused)), - index_type errmsg_len __attribute__ ((unused))) + int *stat, char *errmsg, index_type errmsg_len) { index_type count[GFC_MAX_DIMENSIONS]; index_type stride[GFC_MAX_DIMENSIONS]; @@ -188,6 +192,10 @@ cas_collsub_max_array_i2 (gfc_array_i2 * restrict array, int *result_image, int imoffset; collsub_iface *ci; + STAT_ERRMSG_ENTRY_CHECK(stat, errmsg, errmsg_len); + + error_on_missing_images(); + ci = &local->ci; dim = GFC_DESCRIPTOR_RANK (array); @@ -212,7 +220,7 @@ cas_collsub_max_array_i2 (gfc_array_i2 * restrict array, int *result_image, num_elems = ssize / sizeof (GFC_INTEGER_2); - buffer = get_collsub_buf (ci, ssize * local->num_images); + buffer = get_collsub_buf (ci, ssize * local->total_num_images); this_shared_ptr = buffer + this_image.image_num * num_elems; if (packed) @@ -261,10 +269,10 @@ cas_collsub_max_array_i2 (gfc_array_i2 * restrict array, int *result_image, a_______b___ r___________ */ - for (; ((this_image.image_num >> cbit) & 1) == 0 && (local->num_images >> cbit) != 0; cbit++) + for (; ((this_image.image_num >> cbit) & 1) == 0 && (local->total_num_images >> cbit) != 0; cbit++) { imoffset = 1 << cbit; - if (this_image.image_num + imoffset < local->num_images) + if (this_image.image_num + imoffset < local->total_num_images) { GFC_INTEGER_2 * other_shared_ptr; /* Points to the shared memory allocated to another image. */ @@ -282,7 +290,7 @@ cas_collsub_max_array_i2 (gfc_array_i2 * restrict array, int *result_image, } collsub_sync (ci); } - for ( ; (local->num_images >> cbit) != 0; cbit++) + for ( ; (local->total_num_images >> cbit) != 0; cbit++) collsub_sync (ci); if (!result_image || (*result_image - 1) == this_image.image_num) @@ -331,9 +339,7 @@ export_proto (cas_collsub_min_array_i2); void cas_collsub_min_array_i2 (gfc_array_i2 * restrict array, int *result_image, - int *stat __attribute__ ((unused)), - char *errmsg __attribute__ ((unused)), - index_type errmsg_len __attribute__ ((unused))) + int *stat, char *errmsg, index_type errmsg_len) { index_type count[GFC_MAX_DIMENSIONS]; index_type stride[GFC_MAX_DIMENSIONS]; @@ -348,6 +354,10 @@ cas_collsub_min_array_i2 (gfc_array_i2 * restrict array, int *result_image, int imoffset; collsub_iface *ci; + STAT_ERRMSG_ENTRY_CHECK(stat, errmsg, errmsg_len); + + error_on_missing_images(); + ci = &local->ci; dim = GFC_DESCRIPTOR_RANK (array); @@ -372,7 +382,7 @@ cas_collsub_min_array_i2 (gfc_array_i2 * restrict array, int *result_image, num_elems = ssize / sizeof (GFC_INTEGER_2); - buffer = get_collsub_buf (ci, ssize * local->num_images); + buffer = get_collsub_buf (ci, ssize * local->total_num_images); this_shared_ptr = buffer + this_image.image_num * num_elems; if (packed) @@ -421,10 +431,10 @@ cas_collsub_min_array_i2 (gfc_array_i2 * restrict array, int *result_image, a_______b___ r___________ */ - for (; ((this_image.image_num >> cbit) & 1) == 0 && (local->num_images >> cbit) != 0; cbit++) + for (; ((this_image.image_num >> cbit) & 1) == 0 && (local->total_num_images >> cbit) != 0; cbit++) { imoffset = 1 << cbit; - if (this_image.image_num + imoffset < local->num_images) + if (this_image.image_num + imoffset < local->total_num_images) { GFC_INTEGER_2 * other_shared_ptr; /* Points to the shared memory allocated to another image. */ @@ -442,7 +452,7 @@ cas_collsub_min_array_i2 (gfc_array_i2 * restrict array, int *result_image, } collsub_sync (ci); } - for ( ; (local->num_images >> cbit) != 0; cbit++) + for ( ; (local->total_num_images >> cbit) != 0; cbit++) collsub_sync (ci); if (!result_image || (*result_image - 1) == this_image.image_num) @@ -491,9 +501,7 @@ export_proto (cas_collsub_sum_array_i2); void cas_collsub_sum_array_i2 (gfc_array_i2 * restrict array, int *result_image, - int *stat __attribute__ ((unused)), - char *errmsg __attribute__ ((unused)), - index_type errmsg_len __attribute__ ((unused))) + int *stat, char *errmsg, index_type errmsg_len) { index_type count[GFC_MAX_DIMENSIONS]; index_type stride[GFC_MAX_DIMENSIONS]; @@ -508,6 +516,10 @@ cas_collsub_sum_array_i2 (gfc_array_i2 * restrict array, int *result_image, int imoffset; collsub_iface *ci; + STAT_ERRMSG_ENTRY_CHECK(stat, errmsg, errmsg_len); + + error_on_missing_images(); + ci = &local->ci; dim = GFC_DESCRIPTOR_RANK (array); @@ -532,7 +544,7 @@ cas_collsub_sum_array_i2 (gfc_array_i2 * restrict array, int *result_image, num_elems = ssize / sizeof (GFC_INTEGER_2); - buffer = get_collsub_buf (ci, ssize * local->num_images); + buffer = get_collsub_buf (ci, ssize * local->total_num_images); this_shared_ptr = buffer + this_image.image_num * num_elems; if (packed) @@ -581,10 +593,10 @@ cas_collsub_sum_array_i2 (gfc_array_i2 * restrict array, int *result_image, a_______b___ r___________ */ - for (; ((this_image.image_num >> cbit) & 1) == 0 && (local->num_images >> cbit) != 0; cbit++) + for (; ((this_image.image_num >> cbit) & 1) == 0 && (local->total_num_images >> cbit) != 0; cbit++) { imoffset = 1 << cbit; - if (this_image.image_num + imoffset < local->num_images) + if (this_image.image_num + imoffset < local->total_num_images) { GFC_INTEGER_2 * other_shared_ptr; /* Points to the shared memory allocated to another image. */ @@ -601,7 +613,7 @@ cas_collsub_sum_array_i2 (gfc_array_i2 * restrict array, int *result_image, } collsub_sync (ci); } - for ( ; (local->num_images >> cbit) != 0; cbit++) + for ( ; (local->total_num_images >> cbit) != 0; cbit++) collsub_sync (ci); if (!result_image || (*result_image - 1) == this_image.image_num) diff --git a/libgfortran/generated/nca_minmax_i4.c b/libgfortran/generated/nca_minmax_i4.c index 04972e767f71..e6018531a1f1 100644 --- a/libgfortran/generated/nca_minmax_i4.c +++ b/libgfortran/generated/nca_minmax_i4.c @@ -37,9 +37,7 @@ export_proto(cas_collsub_max_scalar_i4); void cas_collsub_max_scalar_i4 (GFC_INTEGER_4 *obj, int *result_image, - int *stat __attribute__ ((unused)), - char *errmsg __attribute__ ((unused)), - index_type errmsg_len __attribute__ ((unused))) + int *stat, char *errmsg, index_type errmsg_len) { int cbit = 0; int imoffset; @@ -47,17 +45,21 @@ cas_collsub_max_scalar_i4 (GFC_INTEGER_4 *obj, int *result_image, GFC_INTEGER_4 *buffer, *this_image_buf; collsub_iface *ci; + STAT_ERRMSG_ENTRY_CHECK(stat, errmsg, errmsg_len); + + error_on_missing_images(); + ci = &local->ci; - buffer = get_collsub_buf (ci, sizeof(GFC_INTEGER_4) * local->num_images); + buffer = get_collsub_buf (ci, sizeof(GFC_INTEGER_4) * local->total_num_images); this_image_buf = buffer + this_image.image_num; *this_image_buf = *obj; collsub_sync (ci); - for (; ((this_image.image_num >> cbit) & 1) == 0 && (local->num_images >> cbit) != 0; cbit++) + for (; ((this_image.image_num >> cbit) & 1) == 0 && (local->total_num_images >> cbit) != 0; cbit++) { imoffset = 1 << cbit; - if (this_image.image_num + imoffset < local->num_images) + if (this_image.image_num + imoffset < local->total_num_images) { a = this_image_buf; b = this_image_buf + imoffset; @@ -66,7 +68,7 @@ cas_collsub_max_scalar_i4 (GFC_INTEGER_4 *obj, int *result_image, } collsub_sync (ci); } - for ( ; (local->num_images >> cbit) != 0; cbit++) + for ( ; (local->total_num_images >> cbit) != 0; cbit++) collsub_sync (ci); if (!result_image || (*result_image - 1) == this_image.image_num) @@ -82,9 +84,7 @@ export_proto(cas_collsub_min_scalar_i4); void cas_collsub_min_scalar_i4 (GFC_INTEGER_4 *obj, int *result_image, - int *stat __attribute__ ((unused)), - char *errmsg __attribute__ ((unused)), - index_type errmsg_len __attribute__ ((unused))) + int *stat, char *errmsg, index_type errmsg_len) { int cbit = 0; int imoffset; @@ -92,17 +92,21 @@ cas_collsub_min_scalar_i4 (GFC_INTEGER_4 *obj, int *result_image, GFC_INTEGER_4 *buffer, *this_image_buf; collsub_iface *ci; + STAT_ERRMSG_ENTRY_CHECK(stat, errmsg, errmsg_len); + + error_on_missing_images(); + ci = &local->ci; - buffer = get_collsub_buf (ci, sizeof(GFC_INTEGER_4) * local->num_images); + buffer = get_collsub_buf (ci, sizeof(GFC_INTEGER_4) * local->total_num_images); this_image_buf = buffer + this_image.image_num; *this_image_buf = *obj; collsub_sync (ci); - for (; ((this_image.image_num >> cbit) & 1) == 0 && (local->num_images >> cbit) != 0; cbit++) + for (; ((this_image.image_num >> cbit) & 1) == 0 && (local->total_num_images >> cbit) != 0; cbit++) { imoffset = 1 << cbit; - if (this_image.image_num + imoffset < local->num_images) + if (this_image.image_num + imoffset < local->total_num_images) { a = this_image_buf; b = this_image_buf + imoffset; @@ -111,7 +115,7 @@ cas_collsub_min_scalar_i4 (GFC_INTEGER_4 *obj, int *result_image, } collsub_sync (ci); } - for ( ; (local->num_images >> cbit) != 0; cbit++) + for ( ; (local->total_num_images >> cbit) != 0; cbit++) collsub_sync (ci); if (!result_image || (*result_image - 1) == this_image.image_num) @@ -127,9 +131,7 @@ export_proto(cas_collsub_sum_scalar_i4); void cas_collsub_sum_scalar_i4 (GFC_INTEGER_4 *obj, int *result_image, - int *stat __attribute__ ((unused)), - char *errmsg __attribute__ ((unused)), - index_type errmsg_len __attribute__ ((unused))) + int *stat, char *errmsg, index_type errmsg_len) { int cbit = 0; int imoffset; @@ -137,17 +139,21 @@ cas_collsub_sum_scalar_i4 (GFC_INTEGER_4 *obj, int *result_image, GFC_INTEGER_4 *buffer, *this_image_buf; collsub_iface *ci; + STAT_ERRMSG_ENTRY_CHECK(stat, errmsg, errmsg_len); + + error_on_missing_images(); + ci = &local->ci; - buffer = get_collsub_buf (ci, sizeof(GFC_INTEGER_4) * local->num_images); + buffer = get_collsub_buf (ci, sizeof(GFC_INTEGER_4) * local->total_num_images); this_image_buf = buffer + this_image.image_num; *this_image_buf = *obj; collsub_sync (ci); - for (; ((this_image.image_num >> cbit) & 1) == 0 && (local->num_images >> cbit) != 0; cbit++) + for (; ((this_image.image_num >> cbit) & 1) == 0 && (local->total_num_images >> cbit) != 0; cbit++) { imoffset = 1 << cbit; - if (this_image.image_num + imoffset < local->num_images) + if (this_image.image_num + imoffset < local->total_num_images) { a = this_image_buf; b = this_image_buf + imoffset; @@ -155,7 +161,7 @@ cas_collsub_sum_scalar_i4 (GFC_INTEGER_4 *obj, int *result_image, } collsub_sync (ci); } - for ( ; (local->num_images >> cbit) != 0; cbit++) + for ( ; (local->total_num_images >> cbit) != 0; cbit++) collsub_sync (ci); if (!result_image || (*result_image - 1) == this_image.image_num) @@ -171,9 +177,7 @@ export_proto (cas_collsub_max_array_i4); void cas_collsub_max_array_i4 (gfc_array_i4 * restrict array, int *result_image, - int *stat __attribute__ ((unused)), - char *errmsg __attribute__ ((unused)), - index_type errmsg_len __attribute__ ((unused))) + int *stat, char *errmsg, index_type errmsg_len) { index_type count[GFC_MAX_DIMENSIONS]; index_type stride[GFC_MAX_DIMENSIONS]; @@ -188,6 +192,10 @@ cas_collsub_max_array_i4 (gfc_array_i4 * restrict array, int *result_image, int imoffset; collsub_iface *ci; + STAT_ERRMSG_ENTRY_CHECK(stat, errmsg, errmsg_len); + + error_on_missing_images(); + ci = &local->ci; dim = GFC_DESCRIPTOR_RANK (array); @@ -212,7 +220,7 @@ cas_collsub_max_array_i4 (gfc_array_i4 * restrict array, int *result_image, num_elems = ssize / sizeof (GFC_INTEGER_4); - buffer = get_collsub_buf (ci, ssize * local->num_images); + buffer = get_collsub_buf (ci, ssize * local->total_num_images); this_shared_ptr = buffer + this_image.image_num * num_elems; if (packed) @@ -261,10 +269,10 @@ cas_collsub_max_array_i4 (gfc_array_i4 * restrict array, int *result_image, a_______b___ r___________ */ - for (; ((this_image.image_num >> cbit) & 1) == 0 && (local->num_images >> cbit) != 0; cbit++) + for (; ((this_image.image_num >> cbit) & 1) == 0 && (local->total_num_images >> cbit) != 0; cbit++) { imoffset = 1 << cbit; - if (this_image.image_num + imoffset < local->num_images) + if (this_image.image_num + imoffset < local->total_num_images) { GFC_INTEGER_4 * other_shared_ptr; /* Points to the shared memory allocated to another image. */ @@ -282,7 +290,7 @@ cas_collsub_max_array_i4 (gfc_array_i4 * restrict array, int *result_image, } collsub_sync (ci); } - for ( ; (local->num_images >> cbit) != 0; cbit++) + for ( ; (local->total_num_images >> cbit) != 0; cbit++) collsub_sync (ci); if (!result_image || (*result_image - 1) == this_image.image_num) @@ -331,9 +339,7 @@ export_proto (cas_collsub_min_array_i4); void cas_collsub_min_array_i4 (gfc_array_i4 * restrict array, int *result_image, - int *stat __attribute__ ((unused)), - char *errmsg __attribute__ ((unused)), - index_type errmsg_len __attribute__ ((unused))) + int *stat, char *errmsg, index_type errmsg_len) { index_type count[GFC_MAX_DIMENSIONS]; index_type stride[GFC_MAX_DIMENSIONS]; @@ -348,6 +354,10 @@ cas_collsub_min_array_i4 (gfc_array_i4 * restrict array, int *result_image, int imoffset; collsub_iface *ci; + STAT_ERRMSG_ENTRY_CHECK(stat, errmsg, errmsg_len); + + error_on_missing_images(); + ci = &local->ci; dim = GFC_DESCRIPTOR_RANK (array); @@ -372,7 +382,7 @@ cas_collsub_min_array_i4 (gfc_array_i4 * restrict array, int *result_image, num_elems = ssize / sizeof (GFC_INTEGER_4); - buffer = get_collsub_buf (ci, ssize * local->num_images); + buffer = get_collsub_buf (ci, ssize * local->total_num_images); this_shared_ptr = buffer + this_image.image_num * num_elems; if (packed) @@ -421,10 +431,10 @@ cas_collsub_min_array_i4 (gfc_array_i4 * restrict array, int *result_image, a_______b___ r___________ */ - for (; ((this_image.image_num >> cbit) & 1) == 0 && (local->num_images >> cbit) != 0; cbit++) + for (; ((this_image.image_num >> cbit) & 1) == 0 && (local->total_num_images >> cbit) != 0; cbit++) { imoffset = 1 << cbit; - if (this_image.image_num + imoffset < local->num_images) + if (this_image.image_num + imoffset < local->total_num_images) { GFC_INTEGER_4 * other_shared_ptr; /* Points to the shared memory allocated to another image. */ @@ -442,7 +452,7 @@ cas_collsub_min_array_i4 (gfc_array_i4 * restrict array, int *result_image, } collsub_sync (ci); } - for ( ; (local->num_images >> cbit) != 0; cbit++) + for ( ; (local->total_num_images >> cbit) != 0; cbit++) collsub_sync (ci); if (!result_image || (*result_image - 1) == this_image.image_num) @@ -491,9 +501,7 @@ export_proto (cas_collsub_sum_array_i4); void cas_collsub_sum_array_i4 (gfc_array_i4 * restrict array, int *result_image, - int *stat __attribute__ ((unused)), - char *errmsg __attribute__ ((unused)), - index_type errmsg_len __attribute__ ((unused))) + int *stat, char *errmsg, index_type errmsg_len) { index_type count[GFC_MAX_DIMENSIONS]; index_type stride[GFC_MAX_DIMENSIONS]; @@ -508,6 +516,10 @@ cas_collsub_sum_array_i4 (gfc_array_i4 * restrict array, int *result_image, int imoffset; collsub_iface *ci; + STAT_ERRMSG_ENTRY_CHECK(stat, errmsg, errmsg_len); + + error_on_missing_images(); + ci = &local->ci; dim = GFC_DESCRIPTOR_RANK (array); @@ -532,7 +544,7 @@ cas_collsub_sum_array_i4 (gfc_array_i4 * restrict array, int *result_image, num_elems = ssize / sizeof (GFC_INTEGER_4); - buffer = get_collsub_buf (ci, ssize * local->num_images); + buffer = get_collsub_buf (ci, ssize * local->total_num_images); this_shared_ptr = buffer + this_image.image_num * num_elems; if (packed) @@ -581,10 +593,10 @@ cas_collsub_sum_array_i4 (gfc_array_i4 * restrict array, int *result_image, a_______b___ r___________ */ - for (; ((this_image.image_num >> cbit) & 1) == 0 && (local->num_images >> cbit) != 0; cbit++) + for (; ((this_image.image_num >> cbit) & 1) == 0 && (local->total_num_images >> cbit) != 0; cbit++) { imoffset = 1 << cbit; - if (this_image.image_num + imoffset < local->num_images) + if (this_image.image_num + imoffset < local->total_num_images) { GFC_INTEGER_4 * other_shared_ptr; /* Points to the shared memory allocated to another image. */ @@ -601,7 +613,7 @@ cas_collsub_sum_array_i4 (gfc_array_i4 * restrict array, int *result_image, } collsub_sync (ci); } - for ( ; (local->num_images >> cbit) != 0; cbit++) + for ( ; (local->total_num_images >> cbit) != 0; cbit++) collsub_sync (ci); if (!result_image || (*result_image - 1) == this_image.image_num) diff --git a/libgfortran/generated/nca_minmax_i8.c b/libgfortran/generated/nca_minmax_i8.c index b4b0864896f0..fb5a2ecc2f38 100644 --- a/libgfortran/generated/nca_minmax_i8.c +++ b/libgfortran/generated/nca_minmax_i8.c @@ -37,9 +37,7 @@ export_proto(cas_collsub_max_scalar_i8); void cas_collsub_max_scalar_i8 (GFC_INTEGER_8 *obj, int *result_image, - int *stat __attribute__ ((unused)), - char *errmsg __attribute__ ((unused)), - index_type errmsg_len __attribute__ ((unused))) + int *stat, char *errmsg, index_type errmsg_len) { int cbit = 0; int imoffset; @@ -47,17 +45,21 @@ cas_collsub_max_scalar_i8 (GFC_INTEGER_8 *obj, int *result_image, GFC_INTEGER_8 *buffer, *this_image_buf; collsub_iface *ci; + STAT_ERRMSG_ENTRY_CHECK(stat, errmsg, errmsg_len); + + error_on_missing_images(); + ci = &local->ci; - buffer = get_collsub_buf (ci, sizeof(GFC_INTEGER_8) * local->num_images); + buffer = get_collsub_buf (ci, sizeof(GFC_INTEGER_8) * local->total_num_images); this_image_buf = buffer + this_image.image_num; *this_image_buf = *obj; collsub_sync (ci); - for (; ((this_image.image_num >> cbit) & 1) == 0 && (local->num_images >> cbit) != 0; cbit++) + for (; ((this_image.image_num >> cbit) & 1) == 0 && (local->total_num_images >> cbit) != 0; cbit++) { imoffset = 1 << cbit; - if (this_image.image_num + imoffset < local->num_images) + if (this_image.image_num + imoffset < local->total_num_images) { a = this_image_buf; b = this_image_buf + imoffset; @@ -66,7 +68,7 @@ cas_collsub_max_scalar_i8 (GFC_INTEGER_8 *obj, int *result_image, } collsub_sync (ci); } - for ( ; (local->num_images >> cbit) != 0; cbit++) + for ( ; (local->total_num_images >> cbit) != 0; cbit++) collsub_sync (ci); if (!result_image || (*result_image - 1) == this_image.image_num) @@ -82,9 +84,7 @@ export_proto(cas_collsub_min_scalar_i8); void cas_collsub_min_scalar_i8 (GFC_INTEGER_8 *obj, int *result_image, - int *stat __attribute__ ((unused)), - char *errmsg __attribute__ ((unused)), - index_type errmsg_len __attribute__ ((unused))) + int *stat, char *errmsg, index_type errmsg_len) { int cbit = 0; int imoffset; @@ -92,17 +92,21 @@ cas_collsub_min_scalar_i8 (GFC_INTEGER_8 *obj, int *result_image, GFC_INTEGER_8 *buffer, *this_image_buf; collsub_iface *ci; + STAT_ERRMSG_ENTRY_CHECK(stat, errmsg, errmsg_len); + + error_on_missing_images(); + ci = &local->ci; - buffer = get_collsub_buf (ci, sizeof(GFC_INTEGER_8) * local->num_images); + buffer = get_collsub_buf (ci, sizeof(GFC_INTEGER_8) * local->total_num_images); this_image_buf = buffer + this_image.image_num; *this_image_buf = *obj; collsub_sync (ci); - for (; ((this_image.image_num >> cbit) & 1) == 0 && (local->num_images >> cbit) != 0; cbit++) + for (; ((this_image.image_num >> cbit) & 1) == 0 && (local->total_num_images >> cbit) != 0; cbit++) { imoffset = 1 << cbit; - if (this_image.image_num + imoffset < local->num_images) + if (this_image.image_num + imoffset < local->total_num_images) { a = this_image_buf; b = this_image_buf + imoffset; @@ -111,7 +115,7 @@ cas_collsub_min_scalar_i8 (GFC_INTEGER_8 *obj, int *result_image, } collsub_sync (ci); } - for ( ; (local->num_images >> cbit) != 0; cbit++) + for ( ; (local->total_num_images >> cbit) != 0; cbit++) collsub_sync (ci); if (!result_image || (*result_image - 1) == this_image.image_num) @@ -127,9 +131,7 @@ export_proto(cas_collsub_sum_scalar_i8); void cas_collsub_sum_scalar_i8 (GFC_INTEGER_8 *obj, int *result_image, - int *stat __attribute__ ((unused)), - char *errmsg __attribute__ ((unused)), - index_type errmsg_len __attribute__ ((unused))) + int *stat, char *errmsg, index_type errmsg_len) { int cbit = 0; int imoffset; @@ -137,17 +139,21 @@ cas_collsub_sum_scalar_i8 (GFC_INTEGER_8 *obj, int *result_image, GFC_INTEGER_8 *buffer, *this_image_buf; collsub_iface *ci; + STAT_ERRMSG_ENTRY_CHECK(stat, errmsg, errmsg_len); + + error_on_missing_images(); + ci = &local->ci; - buffer = get_collsub_buf (ci, sizeof(GFC_INTEGER_8) * local->num_images); + buffer = get_collsub_buf (ci, sizeof(GFC_INTEGER_8) * local->total_num_images); this_image_buf = buffer + this_image.image_num; *this_image_buf = *obj; collsub_sync (ci); - for (; ((this_image.image_num >> cbit) & 1) == 0 && (local->num_images >> cbit) != 0; cbit++) + for (; ((this_image.image_num >> cbit) & 1) == 0 && (local->total_num_images >> cbit) != 0; cbit++) { imoffset = 1 << cbit; - if (this_image.image_num + imoffset < local->num_images) + if (this_image.image_num + imoffset < local->total_num_images) { a = this_image_buf; b = this_image_buf + imoffset; @@ -155,7 +161,7 @@ cas_collsub_sum_scalar_i8 (GFC_INTEGER_8 *obj, int *result_image, } collsub_sync (ci); } - for ( ; (local->num_images >> cbit) != 0; cbit++) + for ( ; (local->total_num_images >> cbit) != 0; cbit++) collsub_sync (ci); if (!result_image || (*result_image - 1) == this_image.image_num) @@ -171,9 +177,7 @@ export_proto (cas_collsub_max_array_i8); void cas_collsub_max_array_i8 (gfc_array_i8 * restrict array, int *result_image, - int *stat __attribute__ ((unused)), - char *errmsg __attribute__ ((unused)), - index_type errmsg_len __attribute__ ((unused))) + int *stat, char *errmsg, index_type errmsg_len) { index_type count[GFC_MAX_DIMENSIONS]; index_type stride[GFC_MAX_DIMENSIONS]; @@ -188,6 +192,10 @@ cas_collsub_max_array_i8 (gfc_array_i8 * restrict array, int *result_image, int imoffset; collsub_iface *ci; + STAT_ERRMSG_ENTRY_CHECK(stat, errmsg, errmsg_len); + + error_on_missing_images(); + ci = &local->ci; dim = GFC_DESCRIPTOR_RANK (array); @@ -212,7 +220,7 @@ cas_collsub_max_array_i8 (gfc_array_i8 * restrict array, int *result_image, num_elems = ssize / sizeof (GFC_INTEGER_8); - buffer = get_collsub_buf (ci, ssize * local->num_images); + buffer = get_collsub_buf (ci, ssize * local->total_num_images); this_shared_ptr = buffer + this_image.image_num * num_elems; if (packed) @@ -261,10 +269,10 @@ cas_collsub_max_array_i8 (gfc_array_i8 * restrict array, int *result_image, a_______b___ r___________ */ - for (; ((this_image.image_num >> cbit) & 1) == 0 && (local->num_images >> cbit) != 0; cbit++) + for (; ((this_image.image_num >> cbit) & 1) == 0 && (local->total_num_images >> cbit) != 0; cbit++) { imoffset = 1 << cbit; - if (this_image.image_num + imoffset < local->num_images) + if (this_image.image_num + imoffset < local->total_num_images) { GFC_INTEGER_8 * other_shared_ptr; /* Points to the shared memory allocated to another image. */ @@ -282,7 +290,7 @@ cas_collsub_max_array_i8 (gfc_array_i8 * restrict array, int *result_image, } collsub_sync (ci); } - for ( ; (local->num_images >> cbit) != 0; cbit++) + for ( ; (local->total_num_images >> cbit) != 0; cbit++) collsub_sync (ci); if (!result_image || (*result_image - 1) == this_image.image_num) @@ -331,9 +339,7 @@ export_proto (cas_collsub_min_array_i8); void cas_collsub_min_array_i8 (gfc_array_i8 * restrict array, int *result_image, - int *stat __attribute__ ((unused)), - char *errmsg __attribute__ ((unused)), - index_type errmsg_len __attribute__ ((unused))) + int *stat, char *errmsg, index_type errmsg_len) { index_type count[GFC_MAX_DIMENSIONS]; index_type stride[GFC_MAX_DIMENSIONS]; @@ -348,6 +354,10 @@ cas_collsub_min_array_i8 (gfc_array_i8 * restrict array, int *result_image, int imoffset; collsub_iface *ci; + STAT_ERRMSG_ENTRY_CHECK(stat, errmsg, errmsg_len); + + error_on_missing_images(); + ci = &local->ci; dim = GFC_DESCRIPTOR_RANK (array); @@ -372,7 +382,7 @@ cas_collsub_min_array_i8 (gfc_array_i8 * restrict array, int *result_image, num_elems = ssize / sizeof (GFC_INTEGER_8); - buffer = get_collsub_buf (ci, ssize * local->num_images); + buffer = get_collsub_buf (ci, ssize * local->total_num_images); this_shared_ptr = buffer + this_image.image_num * num_elems; if (packed) @@ -421,10 +431,10 @@ cas_collsub_min_array_i8 (gfc_array_i8 * restrict array, int *result_image, a_______b___ r___________ */ - for (; ((this_image.image_num >> cbit) & 1) == 0 && (local->num_images >> cbit) != 0; cbit++) + for (; ((this_image.image_num >> cbit) & 1) == 0 && (local->total_num_images >> cbit) != 0; cbit++) { imoffset = 1 << cbit; - if (this_image.image_num + imoffset < local->num_images) + if (this_image.image_num + imoffset < local->total_num_images) { GFC_INTEGER_8 * other_shared_ptr; /* Points to the shared memory allocated to another image. */ @@ -442,7 +452,7 @@ cas_collsub_min_array_i8 (gfc_array_i8 * restrict array, int *result_image, } collsub_sync (ci); } - for ( ; (local->num_images >> cbit) != 0; cbit++) + for ( ; (local->total_num_images >> cbit) != 0; cbit++) collsub_sync (ci); if (!result_image || (*result_image - 1) == this_image.image_num) @@ -491,9 +501,7 @@ export_proto (cas_collsub_sum_array_i8); void cas_collsub_sum_array_i8 (gfc_array_i8 * restrict array, int *result_image, - int *stat __attribute__ ((unused)), - char *errmsg __attribute__ ((unused)), - index_type errmsg_len __attribute__ ((unused))) + int *stat, char *errmsg, index_type errmsg_len) { index_type count[GFC_MAX_DIMENSIONS]; index_type stride[GFC_MAX_DIMENSIONS]; @@ -508,6 +516,10 @@ cas_collsub_sum_array_i8 (gfc_array_i8 * restrict array, int *result_image, int imoffset; collsub_iface *ci; + STAT_ERRMSG_ENTRY_CHECK(stat, errmsg, errmsg_len); + + error_on_missing_images(); + ci = &local->ci; dim = GFC_DESCRIPTOR_RANK (array); @@ -532,7 +544,7 @@ cas_collsub_sum_array_i8 (gfc_array_i8 * restrict array, int *result_image, num_elems = ssize / sizeof (GFC_INTEGER_8); - buffer = get_collsub_buf (ci, ssize * local->num_images); + buffer = get_collsub_buf (ci, ssize * local->total_num_images); this_shared_ptr = buffer + this_image.image_num * num_elems; if (packed) @@ -581,10 +593,10 @@ cas_collsub_sum_array_i8 (gfc_array_i8 * restrict array, int *result_image, a_______b___ r___________ */ - for (; ((this_image.image_num >> cbit) & 1) == 0 && (local->num_images >> cbit) != 0; cbit++) + for (; ((this_image.image_num >> cbit) & 1) == 0 && (local->total_num_images >> cbit) != 0; cbit++) { imoffset = 1 << cbit; - if (this_image.image_num + imoffset < local->num_images) + if (this_image.image_num + imoffset < local->total_num_images) { GFC_INTEGER_8 * other_shared_ptr; /* Points to the shared memory allocated to another image. */ @@ -601,7 +613,7 @@ cas_collsub_sum_array_i8 (gfc_array_i8 * restrict array, int *result_image, } collsub_sync (ci); } - for ( ; (local->num_images >> cbit) != 0; cbit++) + for ( ; (local->total_num_images >> cbit) != 0; cbit++) collsub_sync (ci); if (!result_image || (*result_image - 1) == this_image.image_num) diff --git a/libgfortran/generated/nca_minmax_r10.c b/libgfortran/generated/nca_minmax_r10.c index 67111579f0d4..f219dd3dc89e 100644 --- a/libgfortran/generated/nca_minmax_r10.c +++ b/libgfortran/generated/nca_minmax_r10.c @@ -37,9 +37,7 @@ export_proto(cas_collsub_max_scalar_r10); void cas_collsub_max_scalar_r10 (GFC_REAL_10 *obj, int *result_image, - int *stat __attribute__ ((unused)), - char *errmsg __attribute__ ((unused)), - index_type errmsg_len __attribute__ ((unused))) + int *stat, char *errmsg, index_type errmsg_len) { int cbit = 0; int imoffset; @@ -47,17 +45,21 @@ cas_collsub_max_scalar_r10 (GFC_REAL_10 *obj, int *result_image, GFC_REAL_10 *buffer, *this_image_buf; collsub_iface *ci; + STAT_ERRMSG_ENTRY_CHECK(stat, errmsg, errmsg_len); + + error_on_missing_images(); + ci = &local->ci; - buffer = get_collsub_buf (ci, sizeof(GFC_REAL_10) * local->num_images); + buffer = get_collsub_buf (ci, sizeof(GFC_REAL_10) * local->total_num_images); this_image_buf = buffer + this_image.image_num; *this_image_buf = *obj; collsub_sync (ci); - for (; ((this_image.image_num >> cbit) & 1) == 0 && (local->num_images >> cbit) != 0; cbit++) + for (; ((this_image.image_num >> cbit) & 1) == 0 && (local->total_num_images >> cbit) != 0; cbit++) { imoffset = 1 << cbit; - if (this_image.image_num + imoffset < local->num_images) + if (this_image.image_num + imoffset < local->total_num_images) { a = this_image_buf; b = this_image_buf + imoffset; @@ -66,7 +68,7 @@ cas_collsub_max_scalar_r10 (GFC_REAL_10 *obj, int *result_image, } collsub_sync (ci); } - for ( ; (local->num_images >> cbit) != 0; cbit++) + for ( ; (local->total_num_images >> cbit) != 0; cbit++) collsub_sync (ci); if (!result_image || (*result_image - 1) == this_image.image_num) @@ -82,9 +84,7 @@ export_proto(cas_collsub_min_scalar_r10); void cas_collsub_min_scalar_r10 (GFC_REAL_10 *obj, int *result_image, - int *stat __attribute__ ((unused)), - char *errmsg __attribute__ ((unused)), - index_type errmsg_len __attribute__ ((unused))) + int *stat, char *errmsg, index_type errmsg_len) { int cbit = 0; int imoffset; @@ -92,17 +92,21 @@ cas_collsub_min_scalar_r10 (GFC_REAL_10 *obj, int *result_image, GFC_REAL_10 *buffer, *this_image_buf; collsub_iface *ci; + STAT_ERRMSG_ENTRY_CHECK(stat, errmsg, errmsg_len); + + error_on_missing_images(); + ci = &local->ci; - buffer = get_collsub_buf (ci, sizeof(GFC_REAL_10) * local->num_images); + buffer = get_collsub_buf (ci, sizeof(GFC_REAL_10) * local->total_num_images); this_image_buf = buffer + this_image.image_num; *this_image_buf = *obj; collsub_sync (ci); - for (; ((this_image.image_num >> cbit) & 1) == 0 && (local->num_images >> cbit) != 0; cbit++) + for (; ((this_image.image_num >> cbit) & 1) == 0 && (local->total_num_images >> cbit) != 0; cbit++) { imoffset = 1 << cbit; - if (this_image.image_num + imoffset < local->num_images) + if (this_image.image_num + imoffset < local->total_num_images) { a = this_image_buf; b = this_image_buf + imoffset; @@ -111,7 +115,7 @@ cas_collsub_min_scalar_r10 (GFC_REAL_10 *obj, int *result_image, } collsub_sync (ci); } - for ( ; (local->num_images >> cbit) != 0; cbit++) + for ( ; (local->total_num_images >> cbit) != 0; cbit++) collsub_sync (ci); if (!result_image || (*result_image - 1) == this_image.image_num) @@ -127,9 +131,7 @@ export_proto(cas_collsub_sum_scalar_r10); void cas_collsub_sum_scalar_r10 (GFC_REAL_10 *obj, int *result_image, - int *stat __attribute__ ((unused)), - char *errmsg __attribute__ ((unused)), - index_type errmsg_len __attribute__ ((unused))) + int *stat, char *errmsg, index_type errmsg_len) { int cbit = 0; int imoffset; @@ -137,17 +139,21 @@ cas_collsub_sum_scalar_r10 (GFC_REAL_10 *obj, int *result_image, GFC_REAL_10 *buffer, *this_image_buf; collsub_iface *ci; + STAT_ERRMSG_ENTRY_CHECK(stat, errmsg, errmsg_len); + + error_on_missing_images(); + ci = &local->ci; - buffer = get_collsub_buf (ci, sizeof(GFC_REAL_10) * local->num_images); + buffer = get_collsub_buf (ci, sizeof(GFC_REAL_10) * local->total_num_images); this_image_buf = buffer + this_image.image_num; *this_image_buf = *obj; collsub_sync (ci); - for (; ((this_image.image_num >> cbit) & 1) == 0 && (local->num_images >> cbit) != 0; cbit++) + for (; ((this_image.image_num >> cbit) & 1) == 0 && (local->total_num_images >> cbit) != 0; cbit++) { imoffset = 1 << cbit; - if (this_image.image_num + imoffset < local->num_images) + if (this_image.image_num + imoffset < local->total_num_images) { a = this_image_buf; b = this_image_buf + imoffset; @@ -155,7 +161,7 @@ cas_collsub_sum_scalar_r10 (GFC_REAL_10 *obj, int *result_image, } collsub_sync (ci); } - for ( ; (local->num_images >> cbit) != 0; cbit++) + for ( ; (local->total_num_images >> cbit) != 0; cbit++) collsub_sync (ci); if (!result_image || (*result_image - 1) == this_image.image_num) @@ -171,9 +177,7 @@ export_proto (cas_collsub_max_array_r10); void cas_collsub_max_array_r10 (gfc_array_r10 * restrict array, int *result_image, - int *stat __attribute__ ((unused)), - char *errmsg __attribute__ ((unused)), - index_type errmsg_len __attribute__ ((unused))) + int *stat, char *errmsg, index_type errmsg_len) { index_type count[GFC_MAX_DIMENSIONS]; index_type stride[GFC_MAX_DIMENSIONS]; @@ -188,6 +192,10 @@ cas_collsub_max_array_r10 (gfc_array_r10 * restrict array, int *result_image, int imoffset; collsub_iface *ci; + STAT_ERRMSG_ENTRY_CHECK(stat, errmsg, errmsg_len); + + error_on_missing_images(); + ci = &local->ci; dim = GFC_DESCRIPTOR_RANK (array); @@ -212,7 +220,7 @@ cas_collsub_max_array_r10 (gfc_array_r10 * restrict array, int *result_image, num_elems = ssize / sizeof (GFC_REAL_10); - buffer = get_collsub_buf (ci, ssize * local->num_images); + buffer = get_collsub_buf (ci, ssize * local->total_num_images); this_shared_ptr = buffer + this_image.image_num * num_elems; if (packed) @@ -261,10 +269,10 @@ cas_collsub_max_array_r10 (gfc_array_r10 * restrict array, int *result_image, a_______b___ r___________ */ - for (; ((this_image.image_num >> cbit) & 1) == 0 && (local->num_images >> cbit) != 0; cbit++) + for (; ((this_image.image_num >> cbit) & 1) == 0 && (local->total_num_images >> cbit) != 0; cbit++) { imoffset = 1 << cbit; - if (this_image.image_num + imoffset < local->num_images) + if (this_image.image_num + imoffset < local->total_num_images) { GFC_REAL_10 * other_shared_ptr; /* Points to the shared memory allocated to another image. */ @@ -282,7 +290,7 @@ cas_collsub_max_array_r10 (gfc_array_r10 * restrict array, int *result_image, } collsub_sync (ci); } - for ( ; (local->num_images >> cbit) != 0; cbit++) + for ( ; (local->total_num_images >> cbit) != 0; cbit++) collsub_sync (ci); if (!result_image || (*result_image - 1) == this_image.image_num) @@ -331,9 +339,7 @@ export_proto (cas_collsub_min_array_r10); void cas_collsub_min_array_r10 (gfc_array_r10 * restrict array, int *result_image, - int *stat __attribute__ ((unused)), - char *errmsg __attribute__ ((unused)), - index_type errmsg_len __attribute__ ((unused))) + int *stat, char *errmsg, index_type errmsg_len) { index_type count[GFC_MAX_DIMENSIONS]; index_type stride[GFC_MAX_DIMENSIONS]; @@ -348,6 +354,10 @@ cas_collsub_min_array_r10 (gfc_array_r10 * restrict array, int *result_image, int imoffset; collsub_iface *ci; + STAT_ERRMSG_ENTRY_CHECK(stat, errmsg, errmsg_len); + + error_on_missing_images(); + ci = &local->ci; dim = GFC_DESCRIPTOR_RANK (array); @@ -372,7 +382,7 @@ cas_collsub_min_array_r10 (gfc_array_r10 * restrict array, int *result_image, num_elems = ssize / sizeof (GFC_REAL_10); - buffer = get_collsub_buf (ci, ssize * local->num_images); + buffer = get_collsub_buf (ci, ssize * local->total_num_images); this_shared_ptr = buffer + this_image.image_num * num_elems; if (packed) @@ -421,10 +431,10 @@ cas_collsub_min_array_r10 (gfc_array_r10 * restrict array, int *result_image, a_______b___ r___________ */ - for (; ((this_image.image_num >> cbit) & 1) == 0 && (local->num_images >> cbit) != 0; cbit++) + for (; ((this_image.image_num >> cbit) & 1) == 0 && (local->total_num_images >> cbit) != 0; cbit++) { imoffset = 1 << cbit; - if (this_image.image_num + imoffset < local->num_images) + if (this_image.image_num + imoffset < local->total_num_images) { GFC_REAL_10 * other_shared_ptr; /* Points to the shared memory allocated to another image. */ @@ -442,7 +452,7 @@ cas_collsub_min_array_r10 (gfc_array_r10 * restrict array, int *result_image, } collsub_sync (ci); } - for ( ; (local->num_images >> cbit) != 0; cbit++) + for ( ; (local->total_num_images >> cbit) != 0; cbit++) collsub_sync (ci); if (!result_image || (*result_image - 1) == this_image.image_num) @@ -491,9 +501,7 @@ export_proto (cas_collsub_sum_array_r10); void cas_collsub_sum_array_r10 (gfc_array_r10 * restrict array, int *result_image, - int *stat __attribute__ ((unused)), - char *errmsg __attribute__ ((unused)), - index_type errmsg_len __attribute__ ((unused))) + int *stat, char *errmsg, index_type errmsg_len) { index_type count[GFC_MAX_DIMENSIONS]; index_type stride[GFC_MAX_DIMENSIONS]; @@ -508,6 +516,10 @@ cas_collsub_sum_array_r10 (gfc_array_r10 * restrict array, int *result_image, int imoffset; collsub_iface *ci; + STAT_ERRMSG_ENTRY_CHECK(stat, errmsg, errmsg_len); + + error_on_missing_images(); + ci = &local->ci; dim = GFC_DESCRIPTOR_RANK (array); @@ -532,7 +544,7 @@ cas_collsub_sum_array_r10 (gfc_array_r10 * restrict array, int *result_image, num_elems = ssize / sizeof (GFC_REAL_10); - buffer = get_collsub_buf (ci, ssize * local->num_images); + buffer = get_collsub_buf (ci, ssize * local->total_num_images); this_shared_ptr = buffer + this_image.image_num * num_elems; if (packed) @@ -581,10 +593,10 @@ cas_collsub_sum_array_r10 (gfc_array_r10 * restrict array, int *result_image, a_______b___ r___________ */ - for (; ((this_image.image_num >> cbit) & 1) == 0 && (local->num_images >> cbit) != 0; cbit++) + for (; ((this_image.image_num >> cbit) & 1) == 0 && (local->total_num_images >> cbit) != 0; cbit++) { imoffset = 1 << cbit; - if (this_image.image_num + imoffset < local->num_images) + if (this_image.image_num + imoffset < local->total_num_images) { GFC_REAL_10 * other_shared_ptr; /* Points to the shared memory allocated to another image. */ @@ -601,7 +613,7 @@ cas_collsub_sum_array_r10 (gfc_array_r10 * restrict array, int *result_image, } collsub_sync (ci); } - for ( ; (local->num_images >> cbit) != 0; cbit++) + for ( ; (local->total_num_images >> cbit) != 0; cbit++) collsub_sync (ci); if (!result_image || (*result_image - 1) == this_image.image_num) diff --git a/libgfortran/generated/nca_minmax_r16.c b/libgfortran/generated/nca_minmax_r16.c index 959aec7fde3c..a26eafe691f8 100644 --- a/libgfortran/generated/nca_minmax_r16.c +++ b/libgfortran/generated/nca_minmax_r16.c @@ -37,9 +37,7 @@ export_proto(cas_collsub_max_scalar_r16); void cas_collsub_max_scalar_r16 (GFC_REAL_16 *obj, int *result_image, - int *stat __attribute__ ((unused)), - char *errmsg __attribute__ ((unused)), - index_type errmsg_len __attribute__ ((unused))) + int *stat, char *errmsg, index_type errmsg_len) { int cbit = 0; int imoffset; @@ -47,17 +45,21 @@ cas_collsub_max_scalar_r16 (GFC_REAL_16 *obj, int *result_image, GFC_REAL_16 *buffer, *this_image_buf; collsub_iface *ci; + STAT_ERRMSG_ENTRY_CHECK(stat, errmsg, errmsg_len); + + error_on_missing_images(); + ci = &local->ci; - buffer = get_collsub_buf (ci, sizeof(GFC_REAL_16) * local->num_images); + buffer = get_collsub_buf (ci, sizeof(GFC_REAL_16) * local->total_num_images); this_image_buf = buffer + this_image.image_num; *this_image_buf = *obj; collsub_sync (ci); - for (; ((this_image.image_num >> cbit) & 1) == 0 && (local->num_images >> cbit) != 0; cbit++) + for (; ((this_image.image_num >> cbit) & 1) == 0 && (local->total_num_images >> cbit) != 0; cbit++) { imoffset = 1 << cbit; - if (this_image.image_num + imoffset < local->num_images) + if (this_image.image_num + imoffset < local->total_num_images) { a = this_image_buf; b = this_image_buf + imoffset; @@ -66,7 +68,7 @@ cas_collsub_max_scalar_r16 (GFC_REAL_16 *obj, int *result_image, } collsub_sync (ci); } - for ( ; (local->num_images >> cbit) != 0; cbit++) + for ( ; (local->total_num_images >> cbit) != 0; cbit++) collsub_sync (ci); if (!result_image || (*result_image - 1) == this_image.image_num) @@ -82,9 +84,7 @@ export_proto(cas_collsub_min_scalar_r16); void cas_collsub_min_scalar_r16 (GFC_REAL_16 *obj, int *result_image, - int *stat __attribute__ ((unused)), - char *errmsg __attribute__ ((unused)), - index_type errmsg_len __attribute__ ((unused))) + int *stat, char *errmsg, index_type errmsg_len) { int cbit = 0; int imoffset; @@ -92,17 +92,21 @@ cas_collsub_min_scalar_r16 (GFC_REAL_16 *obj, int *result_image, GFC_REAL_16 *buffer, *this_image_buf; collsub_iface *ci; + STAT_ERRMSG_ENTRY_CHECK(stat, errmsg, errmsg_len); + + error_on_missing_images(); + ci = &local->ci; - buffer = get_collsub_buf (ci, sizeof(GFC_REAL_16) * local->num_images); + buffer = get_collsub_buf (ci, sizeof(GFC_REAL_16) * local->total_num_images); this_image_buf = buffer + this_image.image_num; *this_image_buf = *obj; collsub_sync (ci); - for (; ((this_image.image_num >> cbit) & 1) == 0 && (local->num_images >> cbit) != 0; cbit++) + for (; ((this_image.image_num >> cbit) & 1) == 0 && (local->total_num_images >> cbit) != 0; cbit++) { imoffset = 1 << cbit; - if (this_image.image_num + imoffset < local->num_images) + if (this_image.image_num + imoffset < local->total_num_images) { a = this_image_buf; b = this_image_buf + imoffset; @@ -111,7 +115,7 @@ cas_collsub_min_scalar_r16 (GFC_REAL_16 *obj, int *result_image, } collsub_sync (ci); } - for ( ; (local->num_images >> cbit) != 0; cbit++) + for ( ; (local->total_num_images >> cbit) != 0; cbit++) collsub_sync (ci); if (!result_image || (*result_image - 1) == this_image.image_num) @@ -127,9 +131,7 @@ export_proto(cas_collsub_sum_scalar_r16); void cas_collsub_sum_scalar_r16 (GFC_REAL_16 *obj, int *result_image, - int *stat __attribute__ ((unused)), - char *errmsg __attribute__ ((unused)), - index_type errmsg_len __attribute__ ((unused))) + int *stat, char *errmsg, index_type errmsg_len) { int cbit = 0; int imoffset; @@ -137,17 +139,21 @@ cas_collsub_sum_scalar_r16 (GFC_REAL_16 *obj, int *result_image, GFC_REAL_16 *buffer, *this_image_buf; collsub_iface *ci; + STAT_ERRMSG_ENTRY_CHECK(stat, errmsg, errmsg_len); + + error_on_missing_images(); + ci = &local->ci; - buffer = get_collsub_buf (ci, sizeof(GFC_REAL_16) * local->num_images); + buffer = get_collsub_buf (ci, sizeof(GFC_REAL_16) * local->total_num_images); this_image_buf = buffer + this_image.image_num; *this_image_buf = *obj; collsub_sync (ci); - for (; ((this_image.image_num >> cbit) & 1) == 0 && (local->num_images >> cbit) != 0; cbit++) + for (; ((this_image.image_num >> cbit) & 1) == 0 && (local->total_num_images >> cbit) != 0; cbit++) { imoffset = 1 << cbit; - if (this_image.image_num + imoffset < local->num_images) + if (this_image.image_num + imoffset < local->total_num_images) { a = this_image_buf; b = this_image_buf + imoffset; @@ -155,7 +161,7 @@ cas_collsub_sum_scalar_r16 (GFC_REAL_16 *obj, int *result_image, } collsub_sync (ci); } - for ( ; (local->num_images >> cbit) != 0; cbit++) + for ( ; (local->total_num_images >> cbit) != 0; cbit++) collsub_sync (ci); if (!result_image || (*result_image - 1) == this_image.image_num) @@ -171,9 +177,7 @@ export_proto (cas_collsub_max_array_r16); void cas_collsub_max_array_r16 (gfc_array_r16 * restrict array, int *result_image, - int *stat __attribute__ ((unused)), - char *errmsg __attribute__ ((unused)), - index_type errmsg_len __attribute__ ((unused))) + int *stat, char *errmsg, index_type errmsg_len) { index_type count[GFC_MAX_DIMENSIONS]; index_type stride[GFC_MAX_DIMENSIONS]; @@ -188,6 +192,10 @@ cas_collsub_max_array_r16 (gfc_array_r16 * restrict array, int *result_image, int imoffset; collsub_iface *ci; + STAT_ERRMSG_ENTRY_CHECK(stat, errmsg, errmsg_len); + + error_on_missing_images(); + ci = &local->ci; dim = GFC_DESCRIPTOR_RANK (array); @@ -212,7 +220,7 @@ cas_collsub_max_array_r16 (gfc_array_r16 * restrict array, int *result_image, num_elems = ssize / sizeof (GFC_REAL_16); - buffer = get_collsub_buf (ci, ssize * local->num_images); + buffer = get_collsub_buf (ci, ssize * local->total_num_images); this_shared_ptr = buffer + this_image.image_num * num_elems; if (packed) @@ -261,10 +269,10 @@ cas_collsub_max_array_r16 (gfc_array_r16 * restrict array, int *result_image, a_______b___ r___________ */ - for (; ((this_image.image_num >> cbit) & 1) == 0 && (local->num_images >> cbit) != 0; cbit++) + for (; ((this_image.image_num >> cbit) & 1) == 0 && (local->total_num_images >> cbit) != 0; cbit++) { imoffset = 1 << cbit; - if (this_image.image_num + imoffset < local->num_images) + if (this_image.image_num + imoffset < local->total_num_images) { GFC_REAL_16 * other_shared_ptr; /* Points to the shared memory allocated to another image. */ @@ -282,7 +290,7 @@ cas_collsub_max_array_r16 (gfc_array_r16 * restrict array, int *result_image, } collsub_sync (ci); } - for ( ; (local->num_images >> cbit) != 0; cbit++) + for ( ; (local->total_num_images >> cbit) != 0; cbit++) collsub_sync (ci); if (!result_image || (*result_image - 1) == this_image.image_num) @@ -331,9 +339,7 @@ export_proto (cas_collsub_min_array_r16); void cas_collsub_min_array_r16 (gfc_array_r16 * restrict array, int *result_image, - int *stat __attribute__ ((unused)), - char *errmsg __attribute__ ((unused)), - index_type errmsg_len __attribute__ ((unused))) + int *stat, char *errmsg, index_type errmsg_len) { index_type count[GFC_MAX_DIMENSIONS]; index_type stride[GFC_MAX_DIMENSIONS]; @@ -348,6 +354,10 @@ cas_collsub_min_array_r16 (gfc_array_r16 * restrict array, int *result_image, int imoffset; collsub_iface *ci; + STAT_ERRMSG_ENTRY_CHECK(stat, errmsg, errmsg_len); + + error_on_missing_images(); + ci = &local->ci; dim = GFC_DESCRIPTOR_RANK (array); @@ -372,7 +382,7 @@ cas_collsub_min_array_r16 (gfc_array_r16 * restrict array, int *result_image, num_elems = ssize / sizeof (GFC_REAL_16); - buffer = get_collsub_buf (ci, ssize * local->num_images); + buffer = get_collsub_buf (ci, ssize * local->total_num_images); this_shared_ptr = buffer + this_image.image_num * num_elems; if (packed) @@ -421,10 +431,10 @@ cas_collsub_min_array_r16 (gfc_array_r16 * restrict array, int *result_image, a_______b___ r___________ */ - for (; ((this_image.image_num >> cbit) & 1) == 0 && (local->num_images >> cbit) != 0; cbit++) + for (; ((this_image.image_num >> cbit) & 1) == 0 && (local->total_num_images >> cbit) != 0; cbit++) { imoffset = 1 << cbit; - if (this_image.image_num + imoffset < local->num_images) + if (this_image.image_num + imoffset < local->total_num_images) { GFC_REAL_16 * other_shared_ptr; /* Points to the shared memory allocated to another image. */ @@ -442,7 +452,7 @@ cas_collsub_min_array_r16 (gfc_array_r16 * restrict array, int *result_image, } collsub_sync (ci); } - for ( ; (local->num_images >> cbit) != 0; cbit++) + for ( ; (local->total_num_images >> cbit) != 0; cbit++) collsub_sync (ci); if (!result_image || (*result_image - 1) == this_image.image_num) @@ -491,9 +501,7 @@ export_proto (cas_collsub_sum_array_r16); void cas_collsub_sum_array_r16 (gfc_array_r16 * restrict array, int *result_image, - int *stat __attribute__ ((unused)), - char *errmsg __attribute__ ((unused)), - index_type errmsg_len __attribute__ ((unused))) + int *stat, char *errmsg, index_type errmsg_len) { index_type count[GFC_MAX_DIMENSIONS]; index_type stride[GFC_MAX_DIMENSIONS]; @@ -508,6 +516,10 @@ cas_collsub_sum_array_r16 (gfc_array_r16 * restrict array, int *result_image, int imoffset; collsub_iface *ci; + STAT_ERRMSG_ENTRY_CHECK(stat, errmsg, errmsg_len); + + error_on_missing_images(); + ci = &local->ci; dim = GFC_DESCRIPTOR_RANK (array); @@ -532,7 +544,7 @@ cas_collsub_sum_array_r16 (gfc_array_r16 * restrict array, int *result_image, num_elems = ssize / sizeof (GFC_REAL_16); - buffer = get_collsub_buf (ci, ssize * local->num_images); + buffer = get_collsub_buf (ci, ssize * local->total_num_images); this_shared_ptr = buffer + this_image.image_num * num_elems; if (packed) @@ -581,10 +593,10 @@ cas_collsub_sum_array_r16 (gfc_array_r16 * restrict array, int *result_image, a_______b___ r___________ */ - for (; ((this_image.image_num >> cbit) & 1) == 0 && (local->num_images >> cbit) != 0; cbit++) + for (; ((this_image.image_num >> cbit) & 1) == 0 && (local->total_num_images >> cbit) != 0; cbit++) { imoffset = 1 << cbit; - if (this_image.image_num + imoffset < local->num_images) + if (this_image.image_num + imoffset < local->total_num_images) { GFC_REAL_16 * other_shared_ptr; /* Points to the shared memory allocated to another image. */ @@ -601,7 +613,7 @@ cas_collsub_sum_array_r16 (gfc_array_r16 * restrict array, int *result_image, } collsub_sync (ci); } - for ( ; (local->num_images >> cbit) != 0; cbit++) + for ( ; (local->total_num_images >> cbit) != 0; cbit++) collsub_sync (ci); if (!result_image || (*result_image - 1) == this_image.image_num) diff --git a/libgfortran/generated/nca_minmax_r4.c b/libgfortran/generated/nca_minmax_r4.c index 02dd3b67e51d..3efa62598231 100644 --- a/libgfortran/generated/nca_minmax_r4.c +++ b/libgfortran/generated/nca_minmax_r4.c @@ -37,9 +37,7 @@ export_proto(cas_collsub_max_scalar_r4); void cas_collsub_max_scalar_r4 (GFC_REAL_4 *obj, int *result_image, - int *stat __attribute__ ((unused)), - char *errmsg __attribute__ ((unused)), - index_type errmsg_len __attribute__ ((unused))) + int *stat, char *errmsg, index_type errmsg_len) { int cbit = 0; int imoffset; @@ -47,17 +45,21 @@ cas_collsub_max_scalar_r4 (GFC_REAL_4 *obj, int *result_image, GFC_REAL_4 *buffer, *this_image_buf; collsub_iface *ci; + STAT_ERRMSG_ENTRY_CHECK(stat, errmsg, errmsg_len); + + error_on_missing_images(); + ci = &local->ci; - buffer = get_collsub_buf (ci, sizeof(GFC_REAL_4) * local->num_images); + buffer = get_collsub_buf (ci, sizeof(GFC_REAL_4) * local->total_num_images); this_image_buf = buffer + this_image.image_num; *this_image_buf = *obj; collsub_sync (ci); - for (; ((this_image.image_num >> cbit) & 1) == 0 && (local->num_images >> cbit) != 0; cbit++) + for (; ((this_image.image_num >> cbit) & 1) == 0 && (local->total_num_images >> cbit) != 0; cbit++) { imoffset = 1 << cbit; - if (this_image.image_num + imoffset < local->num_images) + if (this_image.image_num + imoffset < local->total_num_images) { a = this_image_buf; b = this_image_buf + imoffset; @@ -66,7 +68,7 @@ cas_collsub_max_scalar_r4 (GFC_REAL_4 *obj, int *result_image, } collsub_sync (ci); } - for ( ; (local->num_images >> cbit) != 0; cbit++) + for ( ; (local->total_num_images >> cbit) != 0; cbit++) collsub_sync (ci); if (!result_image || (*result_image - 1) == this_image.image_num) @@ -82,9 +84,7 @@ export_proto(cas_collsub_min_scalar_r4); void cas_collsub_min_scalar_r4 (GFC_REAL_4 *obj, int *result_image, - int *stat __attribute__ ((unused)), - char *errmsg __attribute__ ((unused)), - index_type errmsg_len __attribute__ ((unused))) + int *stat, char *errmsg, index_type errmsg_len) { int cbit = 0; int imoffset; @@ -92,17 +92,21 @@ cas_collsub_min_scalar_r4 (GFC_REAL_4 *obj, int *result_image, GFC_REAL_4 *buffer, *this_image_buf; collsub_iface *ci; + STAT_ERRMSG_ENTRY_CHECK(stat, errmsg, errmsg_len); + + error_on_missing_images(); + ci = &local->ci; - buffer = get_collsub_buf (ci, sizeof(GFC_REAL_4) * local->num_images); + buffer = get_collsub_buf (ci, sizeof(GFC_REAL_4) * local->total_num_images); this_image_buf = buffer + this_image.image_num; *this_image_buf = *obj; collsub_sync (ci); - for (; ((this_image.image_num >> cbit) & 1) == 0 && (local->num_images >> cbit) != 0; cbit++) + for (; ((this_image.image_num >> cbit) & 1) == 0 && (local->total_num_images >> cbit) != 0; cbit++) { imoffset = 1 << cbit; - if (this_image.image_num + imoffset < local->num_images) + if (this_image.image_num + imoffset < local->total_num_images) { a = this_image_buf; b = this_image_buf + imoffset; @@ -111,7 +115,7 @@ cas_collsub_min_scalar_r4 (GFC_REAL_4 *obj, int *result_image, } collsub_sync (ci); } - for ( ; (local->num_images >> cbit) != 0; cbit++) + for ( ; (local->total_num_images >> cbit) != 0; cbit++) collsub_sync (ci); if (!result_image || (*result_image - 1) == this_image.image_num) @@ -127,9 +131,7 @@ export_proto(cas_collsub_sum_scalar_r4); void cas_collsub_sum_scalar_r4 (GFC_REAL_4 *obj, int *result_image, - int *stat __attribute__ ((unused)), - char *errmsg __attribute__ ((unused)), - index_type errmsg_len __attribute__ ((unused))) + int *stat, char *errmsg, index_type errmsg_len) { int cbit = 0; int imoffset; @@ -137,17 +139,21 @@ cas_collsub_sum_scalar_r4 (GFC_REAL_4 *obj, int *result_image, GFC_REAL_4 *buffer, *this_image_buf; collsub_iface *ci; + STAT_ERRMSG_ENTRY_CHECK(stat, errmsg, errmsg_len); + + error_on_missing_images(); + ci = &local->ci; - buffer = get_collsub_buf (ci, sizeof(GFC_REAL_4) * local->num_images); + buffer = get_collsub_buf (ci, sizeof(GFC_REAL_4) * local->total_num_images); this_image_buf = buffer + this_image.image_num; *this_image_buf = *obj; collsub_sync (ci); - for (; ((this_image.image_num >> cbit) & 1) == 0 && (local->num_images >> cbit) != 0; cbit++) + for (; ((this_image.image_num >> cbit) & 1) == 0 && (local->total_num_images >> cbit) != 0; cbit++) { imoffset = 1 << cbit; - if (this_image.image_num + imoffset < local->num_images) + if (this_image.image_num + imoffset < local->total_num_images) { a = this_image_buf; b = this_image_buf + imoffset; @@ -155,7 +161,7 @@ cas_collsub_sum_scalar_r4 (GFC_REAL_4 *obj, int *result_image, } collsub_sync (ci); } - for ( ; (local->num_images >> cbit) != 0; cbit++) + for ( ; (local->total_num_images >> cbit) != 0; cbit++) collsub_sync (ci); if (!result_image || (*result_image - 1) == this_image.image_num) @@ -171,9 +177,7 @@ export_proto (cas_collsub_max_array_r4); void cas_collsub_max_array_r4 (gfc_array_r4 * restrict array, int *result_image, - int *stat __attribute__ ((unused)), - char *errmsg __attribute__ ((unused)), - index_type errmsg_len __attribute__ ((unused))) + int *stat, char *errmsg, index_type errmsg_len) { index_type count[GFC_MAX_DIMENSIONS]; index_type stride[GFC_MAX_DIMENSIONS]; @@ -188,6 +192,10 @@ cas_collsub_max_array_r4 (gfc_array_r4 * restrict array, int *result_image, int imoffset; collsub_iface *ci; + STAT_ERRMSG_ENTRY_CHECK(stat, errmsg, errmsg_len); + + error_on_missing_images(); + ci = &local->ci; dim = GFC_DESCRIPTOR_RANK (array); @@ -212,7 +220,7 @@ cas_collsub_max_array_r4 (gfc_array_r4 * restrict array, int *result_image, num_elems = ssize / sizeof (GFC_REAL_4); - buffer = get_collsub_buf (ci, ssize * local->num_images); + buffer = get_collsub_buf (ci, ssize * local->total_num_images); this_shared_ptr = buffer + this_image.image_num * num_elems; if (packed) @@ -261,10 +269,10 @@ cas_collsub_max_array_r4 (gfc_array_r4 * restrict array, int *result_image, a_______b___ r___________ */ - for (; ((this_image.image_num >> cbit) & 1) == 0 && (local->num_images >> cbit) != 0; cbit++) + for (; ((this_image.image_num >> cbit) & 1) == 0 && (local->total_num_images >> cbit) != 0; cbit++) { imoffset = 1 << cbit; - if (this_image.image_num + imoffset < local->num_images) + if (this_image.image_num + imoffset < local->total_num_images) { GFC_REAL_4 * other_shared_ptr; /* Points to the shared memory allocated to another image. */ @@ -282,7 +290,7 @@ cas_collsub_max_array_r4 (gfc_array_r4 * restrict array, int *result_image, } collsub_sync (ci); } - for ( ; (local->num_images >> cbit) != 0; cbit++) + for ( ; (local->total_num_images >> cbit) != 0; cbit++) collsub_sync (ci); if (!result_image || (*result_image - 1) == this_image.image_num) @@ -331,9 +339,7 @@ export_proto (cas_collsub_min_array_r4); void cas_collsub_min_array_r4 (gfc_array_r4 * restrict array, int *result_image, - int *stat __attribute__ ((unused)), - char *errmsg __attribute__ ((unused)), - index_type errmsg_len __attribute__ ((unused))) + int *stat, char *errmsg, index_type errmsg_len) { index_type count[GFC_MAX_DIMENSIONS]; index_type stride[GFC_MAX_DIMENSIONS]; @@ -348,6 +354,10 @@ cas_collsub_min_array_r4 (gfc_array_r4 * restrict array, int *result_image, int imoffset; collsub_iface *ci; + STAT_ERRMSG_ENTRY_CHECK(stat, errmsg, errmsg_len); + + error_on_missing_images(); + ci = &local->ci; dim = GFC_DESCRIPTOR_RANK (array); @@ -372,7 +382,7 @@ cas_collsub_min_array_r4 (gfc_array_r4 * restrict array, int *result_image, num_elems = ssize / sizeof (GFC_REAL_4); - buffer = get_collsub_buf (ci, ssize * local->num_images); + buffer = get_collsub_buf (ci, ssize * local->total_num_images); this_shared_ptr = buffer + this_image.image_num * num_elems; if (packed) @@ -421,10 +431,10 @@ cas_collsub_min_array_r4 (gfc_array_r4 * restrict array, int *result_image, a_______b___ r___________ */ - for (; ((this_image.image_num >> cbit) & 1) == 0 && (local->num_images >> cbit) != 0; cbit++) + for (; ((this_image.image_num >> cbit) & 1) == 0 && (local->total_num_images >> cbit) != 0; cbit++) { imoffset = 1 << cbit; - if (this_image.image_num + imoffset < local->num_images) + if (this_image.image_num + imoffset < local->total_num_images) { GFC_REAL_4 * other_shared_ptr; /* Points to the shared memory allocated to another image. */ @@ -442,7 +452,7 @@ cas_collsub_min_array_r4 (gfc_array_r4 * restrict array, int *result_image, } collsub_sync (ci); } - for ( ; (local->num_images >> cbit) != 0; cbit++) + for ( ; (local->total_num_images >> cbit) != 0; cbit++) collsub_sync (ci); if (!result_image || (*result_image - 1) == this_image.image_num) @@ -491,9 +501,7 @@ export_proto (cas_collsub_sum_array_r4); void cas_collsub_sum_array_r4 (gfc_array_r4 * restrict array, int *result_image, - int *stat __attribute__ ((unused)), - char *errmsg __attribute__ ((unused)), - index_type errmsg_len __attribute__ ((unused))) + int *stat, char *errmsg, index_type errmsg_len) { index_type count[GFC_MAX_DIMENSIONS]; index_type stride[GFC_MAX_DIMENSIONS]; @@ -508,6 +516,10 @@ cas_collsub_sum_array_r4 (gfc_array_r4 * restrict array, int *result_image, int imoffset; collsub_iface *ci; + STAT_ERRMSG_ENTRY_CHECK(stat, errmsg, errmsg_len); + + error_on_missing_images(); + ci = &local->ci; dim = GFC_DESCRIPTOR_RANK (array); @@ -532,7 +544,7 @@ cas_collsub_sum_array_r4 (gfc_array_r4 * restrict array, int *result_image, num_elems = ssize / sizeof (GFC_REAL_4); - buffer = get_collsub_buf (ci, ssize * local->num_images); + buffer = get_collsub_buf (ci, ssize * local->total_num_images); this_shared_ptr = buffer + this_image.image_num * num_elems; if (packed) @@ -581,10 +593,10 @@ cas_collsub_sum_array_r4 (gfc_array_r4 * restrict array, int *result_image, a_______b___ r___________ */ - for (; ((this_image.image_num >> cbit) & 1) == 0 && (local->num_images >> cbit) != 0; cbit++) + for (; ((this_image.image_num >> cbit) & 1) == 0 && (local->total_num_images >> cbit) != 0; cbit++) { imoffset = 1 << cbit; - if (this_image.image_num + imoffset < local->num_images) + if (this_image.image_num + imoffset < local->total_num_images) { GFC_REAL_4 * other_shared_ptr; /* Points to the shared memory allocated to another image. */ @@ -601,7 +613,7 @@ cas_collsub_sum_array_r4 (gfc_array_r4 * restrict array, int *result_image, } collsub_sync (ci); } - for ( ; (local->num_images >> cbit) != 0; cbit++) + for ( ; (local->total_num_images >> cbit) != 0; cbit++) collsub_sync (ci); if (!result_image || (*result_image - 1) == this_image.image_num) diff --git a/libgfortran/generated/nca_minmax_r8.c b/libgfortran/generated/nca_minmax_r8.c index 6af88cb6cc2d..83518b6bdb55 100644 --- a/libgfortran/generated/nca_minmax_r8.c +++ b/libgfortran/generated/nca_minmax_r8.c @@ -37,9 +37,7 @@ export_proto(cas_collsub_max_scalar_r8); void cas_collsub_max_scalar_r8 (GFC_REAL_8 *obj, int *result_image, - int *stat __attribute__ ((unused)), - char *errmsg __attribute__ ((unused)), - index_type errmsg_len __attribute__ ((unused))) + int *stat, char *errmsg, index_type errmsg_len) { int cbit = 0; int imoffset; @@ -47,17 +45,21 @@ cas_collsub_max_scalar_r8 (GFC_REAL_8 *obj, int *result_image, GFC_REAL_8 *buffer, *this_image_buf; collsub_iface *ci; + STAT_ERRMSG_ENTRY_CHECK(stat, errmsg, errmsg_len); + + error_on_missing_images(); + ci = &local->ci; - buffer = get_collsub_buf (ci, sizeof(GFC_REAL_8) * local->num_images); + buffer = get_collsub_buf (ci, sizeof(GFC_REAL_8) * local->total_num_images); this_image_buf = buffer + this_image.image_num; *this_image_buf = *obj; collsub_sync (ci); - for (; ((this_image.image_num >> cbit) & 1) == 0 && (local->num_images >> cbit) != 0; cbit++) + for (; ((this_image.image_num >> cbit) & 1) == 0 && (local->total_num_images >> cbit) != 0; cbit++) { imoffset = 1 << cbit; - if (this_image.image_num + imoffset < local->num_images) + if (this_image.image_num + imoffset < local->total_num_images) { a = this_image_buf; b = this_image_buf + imoffset; @@ -66,7 +68,7 @@ cas_collsub_max_scalar_r8 (GFC_REAL_8 *obj, int *result_image, } collsub_sync (ci); } - for ( ; (local->num_images >> cbit) != 0; cbit++) + for ( ; (local->total_num_images >> cbit) != 0; cbit++) collsub_sync (ci); if (!result_image || (*result_image - 1) == this_image.image_num) @@ -82,9 +84,7 @@ export_proto(cas_collsub_min_scalar_r8); void cas_collsub_min_scalar_r8 (GFC_REAL_8 *obj, int *result_image, - int *stat __attribute__ ((unused)), - char *errmsg __attribute__ ((unused)), - index_type errmsg_len __attribute__ ((unused))) + int *stat, char *errmsg, index_type errmsg_len) { int cbit = 0; int imoffset; @@ -92,17 +92,21 @@ cas_collsub_min_scalar_r8 (GFC_REAL_8 *obj, int *result_image, GFC_REAL_8 *buffer, *this_image_buf; collsub_iface *ci; + STAT_ERRMSG_ENTRY_CHECK(stat, errmsg, errmsg_len); + + error_on_missing_images(); + ci = &local->ci; - buffer = get_collsub_buf (ci, sizeof(GFC_REAL_8) * local->num_images); + buffer = get_collsub_buf (ci, sizeof(GFC_REAL_8) * local->total_num_images); this_image_buf = buffer + this_image.image_num; *this_image_buf = *obj; collsub_sync (ci); - for (; ((this_image.image_num >> cbit) & 1) == 0 && (local->num_images >> cbit) != 0; cbit++) + for (; ((this_image.image_num >> cbit) & 1) == 0 && (local->total_num_images >> cbit) != 0; cbit++) { imoffset = 1 << cbit; - if (this_image.image_num + imoffset < local->num_images) + if (this_image.image_num + imoffset < local->total_num_images) { a = this_image_buf; b = this_image_buf + imoffset; @@ -111,7 +115,7 @@ cas_collsub_min_scalar_r8 (GFC_REAL_8 *obj, int *result_image, } collsub_sync (ci); } - for ( ; (local->num_images >> cbit) != 0; cbit++) + for ( ; (local->total_num_images >> cbit) != 0; cbit++) collsub_sync (ci); if (!result_image || (*result_image - 1) == this_image.image_num) @@ -127,9 +131,7 @@ export_proto(cas_collsub_sum_scalar_r8); void cas_collsub_sum_scalar_r8 (GFC_REAL_8 *obj, int *result_image, - int *stat __attribute__ ((unused)), - char *errmsg __attribute__ ((unused)), - index_type errmsg_len __attribute__ ((unused))) + int *stat, char *errmsg, index_type errmsg_len) { int cbit = 0; int imoffset; @@ -137,17 +139,21 @@ cas_collsub_sum_scalar_r8 (GFC_REAL_8 *obj, int *result_image, GFC_REAL_8 *buffer, *this_image_buf; collsub_iface *ci; + STAT_ERRMSG_ENTRY_CHECK(stat, errmsg, errmsg_len); + + error_on_missing_images(); + ci = &local->ci; - buffer = get_collsub_buf (ci, sizeof(GFC_REAL_8) * local->num_images); + buffer = get_collsub_buf (ci, sizeof(GFC_REAL_8) * local->total_num_images); this_image_buf = buffer + this_image.image_num; *this_image_buf = *obj; collsub_sync (ci); - for (; ((this_image.image_num >> cbit) & 1) == 0 && (local->num_images >> cbit) != 0; cbit++) + for (; ((this_image.image_num >> cbit) & 1) == 0 && (local->total_num_images >> cbit) != 0; cbit++) { imoffset = 1 << cbit; - if (this_image.image_num + imoffset < local->num_images) + if (this_image.image_num + imoffset < local->total_num_images) { a = this_image_buf; b = this_image_buf + imoffset; @@ -155,7 +161,7 @@ cas_collsub_sum_scalar_r8 (GFC_REAL_8 *obj, int *result_image, } collsub_sync (ci); } - for ( ; (local->num_images >> cbit) != 0; cbit++) + for ( ; (local->total_num_images >> cbit) != 0; cbit++) collsub_sync (ci); if (!result_image || (*result_image - 1) == this_image.image_num) @@ -171,9 +177,7 @@ export_proto (cas_collsub_max_array_r8); void cas_collsub_max_array_r8 (gfc_array_r8 * restrict array, int *result_image, - int *stat __attribute__ ((unused)), - char *errmsg __attribute__ ((unused)), - index_type errmsg_len __attribute__ ((unused))) + int *stat, char *errmsg, index_type errmsg_len) { index_type count[GFC_MAX_DIMENSIONS]; index_type stride[GFC_MAX_DIMENSIONS]; @@ -188,6 +192,10 @@ cas_collsub_max_array_r8 (gfc_array_r8 * restrict array, int *result_image, int imoffset; collsub_iface *ci; + STAT_ERRMSG_ENTRY_CHECK(stat, errmsg, errmsg_len); + + error_on_missing_images(); + ci = &local->ci; dim = GFC_DESCRIPTOR_RANK (array); @@ -212,7 +220,7 @@ cas_collsub_max_array_r8 (gfc_array_r8 * restrict array, int *result_image, num_elems = ssize / sizeof (GFC_REAL_8); - buffer = get_collsub_buf (ci, ssize * local->num_images); + buffer = get_collsub_buf (ci, ssize * local->total_num_images); this_shared_ptr = buffer + this_image.image_num * num_elems; if (packed) @@ -261,10 +269,10 @@ cas_collsub_max_array_r8 (gfc_array_r8 * restrict array, int *result_image, a_______b___ r___________ */ - for (; ((this_image.image_num >> cbit) & 1) == 0 && (local->num_images >> cbit) != 0; cbit++) + for (; ((this_image.image_num >> cbit) & 1) == 0 && (local->total_num_images >> cbit) != 0; cbit++) { imoffset = 1 << cbit; - if (this_image.image_num + imoffset < local->num_images) + if (this_image.image_num + imoffset < local->total_num_images) { GFC_REAL_8 * other_shared_ptr; /* Points to the shared memory allocated to another image. */ @@ -282,7 +290,7 @@ cas_collsub_max_array_r8 (gfc_array_r8 * restrict array, int *result_image, } collsub_sync (ci); } - for ( ; (local->num_images >> cbit) != 0; cbit++) + for ( ; (local->total_num_images >> cbit) != 0; cbit++) collsub_sync (ci); if (!result_image || (*result_image - 1) == this_image.image_num) @@ -331,9 +339,7 @@ export_proto (cas_collsub_min_array_r8); void cas_collsub_min_array_r8 (gfc_array_r8 * restrict array, int *result_image, - int *stat __attribute__ ((unused)), - char *errmsg __attribute__ ((unused)), - index_type errmsg_len __attribute__ ((unused))) + int *stat, char *errmsg, index_type errmsg_len) { index_type count[GFC_MAX_DIMENSIONS]; index_type stride[GFC_MAX_DIMENSIONS]; @@ -348,6 +354,10 @@ cas_collsub_min_array_r8 (gfc_array_r8 * restrict array, int *result_image, int imoffset; collsub_iface *ci; + STAT_ERRMSG_ENTRY_CHECK(stat, errmsg, errmsg_len); + + error_on_missing_images(); + ci = &local->ci; dim = GFC_DESCRIPTOR_RANK (array); @@ -372,7 +382,7 @@ cas_collsub_min_array_r8 (gfc_array_r8 * restrict array, int *result_image, num_elems = ssize / sizeof (GFC_REAL_8); - buffer = get_collsub_buf (ci, ssize * local->num_images); + buffer = get_collsub_buf (ci, ssize * local->total_num_images); this_shared_ptr = buffer + this_image.image_num * num_elems; if (packed) @@ -421,10 +431,10 @@ cas_collsub_min_array_r8 (gfc_array_r8 * restrict array, int *result_image, a_______b___ r___________ */ - for (; ((this_image.image_num >> cbit) & 1) == 0 && (local->num_images >> cbit) != 0; cbit++) + for (; ((this_image.image_num >> cbit) & 1) == 0 && (local->total_num_images >> cbit) != 0; cbit++) { imoffset = 1 << cbit; - if (this_image.image_num + imoffset < local->num_images) + if (this_image.image_num + imoffset < local->total_num_images) { GFC_REAL_8 * other_shared_ptr; /* Points to the shared memory allocated to another image. */ @@ -442,7 +452,7 @@ cas_collsub_min_array_r8 (gfc_array_r8 * restrict array, int *result_image, } collsub_sync (ci); } - for ( ; (local->num_images >> cbit) != 0; cbit++) + for ( ; (local->total_num_images >> cbit) != 0; cbit++) collsub_sync (ci); if (!result_image || (*result_image - 1) == this_image.image_num) @@ -491,9 +501,7 @@ export_proto (cas_collsub_sum_array_r8); void cas_collsub_sum_array_r8 (gfc_array_r8 * restrict array, int *result_image, - int *stat __attribute__ ((unused)), - char *errmsg __attribute__ ((unused)), - index_type errmsg_len __attribute__ ((unused))) + int *stat, char *errmsg, index_type errmsg_len) { index_type count[GFC_MAX_DIMENSIONS]; index_type stride[GFC_MAX_DIMENSIONS]; @@ -508,6 +516,10 @@ cas_collsub_sum_array_r8 (gfc_array_r8 * restrict array, int *result_image, int imoffset; collsub_iface *ci; + STAT_ERRMSG_ENTRY_CHECK(stat, errmsg, errmsg_len); + + error_on_missing_images(); + ci = &local->ci; dim = GFC_DESCRIPTOR_RANK (array); @@ -532,7 +544,7 @@ cas_collsub_sum_array_r8 (gfc_array_r8 * restrict array, int *result_image, num_elems = ssize / sizeof (GFC_REAL_8); - buffer = get_collsub_buf (ci, ssize * local->num_images); + buffer = get_collsub_buf (ci, ssize * local->total_num_images); this_shared_ptr = buffer + this_image.image_num * num_elems; if (packed) @@ -581,10 +593,10 @@ cas_collsub_sum_array_r8 (gfc_array_r8 * restrict array, int *result_image, a_______b___ r___________ */ - for (; ((this_image.image_num >> cbit) & 1) == 0 && (local->num_images >> cbit) != 0; cbit++) + for (; ((this_image.image_num >> cbit) & 1) == 0 && (local->total_num_images >> cbit) != 0; cbit++) { imoffset = 1 << cbit; - if (this_image.image_num + imoffset < local->num_images) + if (this_image.image_num + imoffset < local->total_num_images) { GFC_REAL_8 * other_shared_ptr; /* Points to the shared memory allocated to another image. */ @@ -601,7 +613,7 @@ cas_collsub_sum_array_r8 (gfc_array_r8 * restrict array, int *result_image, } collsub_sync (ci); } - for ( ; (local->num_images >> cbit) != 0; cbit++) + for ( ; (local->total_num_images >> cbit) != 0; cbit++) collsub_sync (ci); if (!result_image || (*result_image - 1) == this_image.image_num) diff --git a/libgfortran/generated/nca_minmax_s1.c b/libgfortran/generated/nca_minmax_s1.c index 493de0bf463b..1b4826c99dc1 100644 --- a/libgfortran/generated/nca_minmax_s1.c +++ b/libgfortran/generated/nca_minmax_s1.c @@ -70,19 +70,23 @@ cas_collsub_max_scalar_s1 (GFC_UINTEGER_1 *obj, int *result_image, collsub_iface *ci; index_type type_size; + STAT_ERRMSG_ENTRY_CHECK(stat, errmsg, errmsg_len); + + error_on_missing_images(); + ci = &local->ci; type_size = char_len * sizeof (GFC_UINTEGER_1); - buffer = get_collsub_buf (ci, type_size * local->num_images); + buffer = get_collsub_buf (ci, type_size * local->total_num_images); this_image_buf = buffer + this_image.image_num * char_len; memcpy (this_image_buf, obj, type_size); collsub_sync (ci); for (; ((this_image.image_num >> cbit) & 1) == 0 - && (local->num_images >> cbit) != 0; cbit++) + && (local->total_num_images >> cbit) != 0; cbit++) { imoffset = 1 << cbit; - if (this_image.image_num + imoffset < local->num_images) + if (this_image.image_num + imoffset < local->total_num_images) { a = this_image_buf; b = this_image_buf + imoffset * char_len; @@ -94,7 +98,7 @@ cas_collsub_max_scalar_s1 (GFC_UINTEGER_1 *obj, int *result_image, /* All images have to execute the same number of collsub_sync, otherwise some images will hang. Here, we execute the missing ones for images that are not needed anymore in the main loop. */ - for ( ; (local->num_images >> cbit) != 0; cbit++) + for ( ; (local->total_num_images >> cbit) != 0; cbit++) collsub_sync (ci); if (!result_image || (*result_image - 1) == this_image.image_num) @@ -125,19 +129,23 @@ cas_collsub_min_scalar_s1 (GFC_UINTEGER_1 *obj, int *result_image, collsub_iface *ci; index_type type_size; + STAT_ERRMSG_ENTRY_CHECK(stat, errmsg, errmsg_len); + + error_on_missing_images(); + ci = &local->ci; type_size = char_len * sizeof (GFC_UINTEGER_1); - buffer = get_collsub_buf (ci, type_size * local->num_images); + buffer = get_collsub_buf (ci, type_size * local->total_num_images); this_image_buf = buffer + this_image.image_num * char_len; memcpy (this_image_buf, obj, type_size); collsub_sync (ci); for (; ((this_image.image_num >> cbit) & 1) == 0 - && (local->num_images >> cbit) != 0; cbit++) + && (local->total_num_images >> cbit) != 0; cbit++) { imoffset = 1 << cbit; - if (this_image.image_num + imoffset < local->num_images) + if (this_image.image_num + imoffset < local->total_num_images) { a = this_image_buf; b = this_image_buf + imoffset * char_len; @@ -149,7 +157,7 @@ cas_collsub_min_scalar_s1 (GFC_UINTEGER_1 *obj, int *result_image, /* All images have to execute the same number of collsub_sync, otherwise some images will hang. Here, we execute the missing ones for images that are not needed anymore in the main loop. */ - for ( ; (local->num_images >> cbit) != 0; cbit++) + for ( ; (local->total_num_images >> cbit) != 0; cbit++) collsub_sync (ci); if (!result_image || (*result_image - 1) == this_image.image_num) @@ -169,10 +177,8 @@ export_proto (cas_collsub_max_array_s1); void cas_collsub_max_array_s1 (gfc_array_s1 * restrict array, int *result_image, - int *stat __attribute__ ((unused)), - char *errmsg __attribute__ ((unused)), - index_type char_len, - index_type errmsg_len __attribute__ ((unused))) + int *stat, char *errmsg, index_type char_len, + index_type errmsg_len) { index_type count[GFC_MAX_DIMENSIONS]; index_type stride[GFC_MAX_DIMENSIONS]; /* Store byte-based strides here. */ @@ -188,6 +194,10 @@ cas_collsub_max_array_s1 (gfc_array_s1 * restrict array, int *result_image, index_type type_size; collsub_iface *ci; + STAT_ERRMSG_ENTRY_CHECK(stat, errmsg, errmsg_len); + + error_on_missing_images(); + ci = &local->ci; type_size = char_len * sizeof (GFC_UINTEGER_1); @@ -212,7 +222,7 @@ cas_collsub_max_array_s1 (gfc_array_s1 * restrict array, int *result_image, } ssize = num_elems * type_size; - buffer = get_collsub_buf (ci, ssize * local->num_images); + buffer = get_collsub_buf (ci, ssize * local->total_num_images); this_shared_ptr = buffer + this_image.image_num * ssize; if (packed) @@ -264,10 +274,10 @@ cas_collsub_max_array_s1 (gfc_array_s1 * restrict array, int *result_image, r___________ */ for (; ((this_image.image_num >> cbit) & 1) == 0 - && (local->num_images >> cbit) != 0; cbit++) + && (local->total_num_images >> cbit) != 0; cbit++) { imoffset = 1 << cbit; - if (this_image.image_num + imoffset < local->num_images) + if (this_image.image_num + imoffset < local->total_num_images) { char *other_shared_ptr; /* Points to the shared memory allocated to another image. */ @@ -285,7 +295,7 @@ cas_collsub_max_array_s1 (gfc_array_s1 * restrict array, int *result_image, } collsub_sync (ci); } - for ( ; (local->num_images >> cbit) != 0; cbit++) + for ( ; (local->total_num_images >> cbit) != 0; cbit++) collsub_sync (ci); if (!result_image || (*result_image - 1) == this_image.image_num) @@ -336,10 +346,8 @@ export_proto (cas_collsub_min_array_s1); void cas_collsub_min_array_s1 (gfc_array_s1 * restrict array, int *result_image, - int *stat __attribute__ ((unused)), - char *errmsg __attribute__ ((unused)), - index_type char_len, - index_type errmsg_len __attribute__ ((unused))) + int *stat, char *errmsg, index_type char_len, + index_type errmsg_len) { index_type count[GFC_MAX_DIMENSIONS]; index_type stride[GFC_MAX_DIMENSIONS]; /* Store byte-based strides here. */ @@ -355,6 +363,10 @@ cas_collsub_min_array_s1 (gfc_array_s1 * restrict array, int *result_image, index_type type_size; collsub_iface *ci; + STAT_ERRMSG_ENTRY_CHECK(stat, errmsg, errmsg_len); + + error_on_missing_images(); + ci = &local->ci; type_size = char_len * sizeof (GFC_UINTEGER_1); @@ -379,7 +391,7 @@ cas_collsub_min_array_s1 (gfc_array_s1 * restrict array, int *result_image, } ssize = num_elems * type_size; - buffer = get_collsub_buf (ci, ssize * local->num_images); + buffer = get_collsub_buf (ci, ssize * local->total_num_images); this_shared_ptr = buffer + this_image.image_num * ssize; if (packed) @@ -431,10 +443,10 @@ cas_collsub_min_array_s1 (gfc_array_s1 * restrict array, int *result_image, r___________ */ for (; ((this_image.image_num >> cbit) & 1) == 0 - && (local->num_images >> cbit) != 0; cbit++) + && (local->total_num_images >> cbit) != 0; cbit++) { imoffset = 1 << cbit; - if (this_image.image_num + imoffset < local->num_images) + if (this_image.image_num + imoffset < local->total_num_images) { char *other_shared_ptr; /* Points to the shared memory allocated to another image. */ @@ -452,7 +464,7 @@ cas_collsub_min_array_s1 (gfc_array_s1 * restrict array, int *result_image, } collsub_sync (ci); } - for ( ; (local->num_images >> cbit) != 0; cbit++) + for ( ; (local->total_num_images >> cbit) != 0; cbit++) collsub_sync (ci); if (!result_image || (*result_image - 1) == this_image.image_num) diff --git a/libgfortran/generated/nca_minmax_s4.c b/libgfortran/generated/nca_minmax_s4.c index 9f74da23a84b..336e16737511 100644 --- a/libgfortran/generated/nca_minmax_s4.c +++ b/libgfortran/generated/nca_minmax_s4.c @@ -70,19 +70,23 @@ cas_collsub_max_scalar_s4 (GFC_UINTEGER_4 *obj, int *result_image, collsub_iface *ci; index_type type_size; + STAT_ERRMSG_ENTRY_CHECK(stat, errmsg, errmsg_len); + + error_on_missing_images(); + ci = &local->ci; type_size = char_len * sizeof (GFC_UINTEGER_4); - buffer = get_collsub_buf (ci, type_size * local->num_images); + buffer = get_collsub_buf (ci, type_size * local->total_num_images); this_image_buf = buffer + this_image.image_num * char_len; memcpy (this_image_buf, obj, type_size); collsub_sync (ci); for (; ((this_image.image_num >> cbit) & 1) == 0 - && (local->num_images >> cbit) != 0; cbit++) + && (local->total_num_images >> cbit) != 0; cbit++) { imoffset = 1 << cbit; - if (this_image.image_num + imoffset < local->num_images) + if (this_image.image_num + imoffset < local->total_num_images) { a = this_image_buf; b = this_image_buf + imoffset * char_len; @@ -94,7 +98,7 @@ cas_collsub_max_scalar_s4 (GFC_UINTEGER_4 *obj, int *result_image, /* All images have to execute the same number of collsub_sync, otherwise some images will hang. Here, we execute the missing ones for images that are not needed anymore in the main loop. */ - for ( ; (local->num_images >> cbit) != 0; cbit++) + for ( ; (local->total_num_images >> cbit) != 0; cbit++) collsub_sync (ci); if (!result_image || (*result_image - 1) == this_image.image_num) @@ -125,19 +129,23 @@ cas_collsub_min_scalar_s4 (GFC_UINTEGER_4 *obj, int *result_image, collsub_iface *ci; index_type type_size; + STAT_ERRMSG_ENTRY_CHECK(stat, errmsg, errmsg_len); + + error_on_missing_images(); + ci = &local->ci; type_size = char_len * sizeof (GFC_UINTEGER_4); - buffer = get_collsub_buf (ci, type_size * local->num_images); + buffer = get_collsub_buf (ci, type_size * local->total_num_images); this_image_buf = buffer + this_image.image_num * char_len; memcpy (this_image_buf, obj, type_size); collsub_sync (ci); for (; ((this_image.image_num >> cbit) & 1) == 0 - && (local->num_images >> cbit) != 0; cbit++) + && (local->total_num_images >> cbit) != 0; cbit++) { imoffset = 1 << cbit; - if (this_image.image_num + imoffset < local->num_images) + if (this_image.image_num + imoffset < local->total_num_images) { a = this_image_buf; b = this_image_buf + imoffset * char_len; @@ -149,7 +157,7 @@ cas_collsub_min_scalar_s4 (GFC_UINTEGER_4 *obj, int *result_image, /* All images have to execute the same number of collsub_sync, otherwise some images will hang. Here, we execute the missing ones for images that are not needed anymore in the main loop. */ - for ( ; (local->num_images >> cbit) != 0; cbit++) + for ( ; (local->total_num_images >> cbit) != 0; cbit++) collsub_sync (ci); if (!result_image || (*result_image - 1) == this_image.image_num) @@ -169,10 +177,8 @@ export_proto (cas_collsub_max_array_s4); void cas_collsub_max_array_s4 (gfc_array_s4 * restrict array, int *result_image, - int *stat __attribute__ ((unused)), - char *errmsg __attribute__ ((unused)), - index_type char_len, - index_type errmsg_len __attribute__ ((unused))) + int *stat, char *errmsg, index_type char_len, + index_type errmsg_len) { index_type count[GFC_MAX_DIMENSIONS]; index_type stride[GFC_MAX_DIMENSIONS]; /* Store byte-based strides here. */ @@ -188,6 +194,10 @@ cas_collsub_max_array_s4 (gfc_array_s4 * restrict array, int *result_image, index_type type_size; collsub_iface *ci; + STAT_ERRMSG_ENTRY_CHECK(stat, errmsg, errmsg_len); + + error_on_missing_images(); + ci = &local->ci; type_size = char_len * sizeof (GFC_UINTEGER_4); @@ -212,7 +222,7 @@ cas_collsub_max_array_s4 (gfc_array_s4 * restrict array, int *result_image, } ssize = num_elems * type_size; - buffer = get_collsub_buf (ci, ssize * local->num_images); + buffer = get_collsub_buf (ci, ssize * local->total_num_images); this_shared_ptr = buffer + this_image.image_num * ssize; if (packed) @@ -264,10 +274,10 @@ cas_collsub_max_array_s4 (gfc_array_s4 * restrict array, int *result_image, r___________ */ for (; ((this_image.image_num >> cbit) & 1) == 0 - && (local->num_images >> cbit) != 0; cbit++) + && (local->total_num_images >> cbit) != 0; cbit++) { imoffset = 1 << cbit; - if (this_image.image_num + imoffset < local->num_images) + if (this_image.image_num + imoffset < local->total_num_images) { char *other_shared_ptr; /* Points to the shared memory allocated to another image. */ @@ -285,7 +295,7 @@ cas_collsub_max_array_s4 (gfc_array_s4 * restrict array, int *result_image, } collsub_sync (ci); } - for ( ; (local->num_images >> cbit) != 0; cbit++) + for ( ; (local->total_num_images >> cbit) != 0; cbit++) collsub_sync (ci); if (!result_image || (*result_image - 1) == this_image.image_num) @@ -336,10 +346,8 @@ export_proto (cas_collsub_min_array_s4); void cas_collsub_min_array_s4 (gfc_array_s4 * restrict array, int *result_image, - int *stat __attribute__ ((unused)), - char *errmsg __attribute__ ((unused)), - index_type char_len, - index_type errmsg_len __attribute__ ((unused))) + int *stat, char *errmsg, index_type char_len, + index_type errmsg_len) { index_type count[GFC_MAX_DIMENSIONS]; index_type stride[GFC_MAX_DIMENSIONS]; /* Store byte-based strides here. */ @@ -355,6 +363,10 @@ cas_collsub_min_array_s4 (gfc_array_s4 * restrict array, int *result_image, index_type type_size; collsub_iface *ci; + STAT_ERRMSG_ENTRY_CHECK(stat, errmsg, errmsg_len); + + error_on_missing_images(); + ci = &local->ci; type_size = char_len * sizeof (GFC_UINTEGER_4); @@ -379,7 +391,7 @@ cas_collsub_min_array_s4 (gfc_array_s4 * restrict array, int *result_image, } ssize = num_elems * type_size; - buffer = get_collsub_buf (ci, ssize * local->num_images); + buffer = get_collsub_buf (ci, ssize * local->total_num_images); this_shared_ptr = buffer + this_image.image_num * ssize; if (packed) @@ -431,10 +443,10 @@ cas_collsub_min_array_s4 (gfc_array_s4 * restrict array, int *result_image, r___________ */ for (; ((this_image.image_num >> cbit) & 1) == 0 - && (local->num_images >> cbit) != 0; cbit++) + && (local->total_num_images >> cbit) != 0; cbit++) { imoffset = 1 << cbit; - if (this_image.image_num + imoffset < local->num_images) + if (this_image.image_num + imoffset < local->total_num_images) { char *other_shared_ptr; /* Points to the shared memory allocated to another image. */ @@ -452,7 +464,7 @@ cas_collsub_min_array_s4 (gfc_array_s4 * restrict array, int *result_image, } collsub_sync (ci); } - for ( ; (local->num_images >> cbit) != 0; cbit++) + for ( ; (local->total_num_images >> cbit) != 0; cbit++) collsub_sync (ci); if (!result_image || (*result_image - 1) == this_image.image_num) diff --git a/libgfortran/m4/nca-minmax-s.m4 b/libgfortran/m4/nca-minmax-s.m4 index af5dd5d31364..9e9b0b982866 100644 --- a/libgfortran/m4/nca-minmax-s.m4 +++ b/libgfortran/m4/nca-minmax-s.m4 @@ -48,19 +48,23 @@ cas_collsub_'$1`_scalar_'rtype_code` ('rtype_name` *obj, int *result_image, collsub_iface *ci; index_type type_size; + STAT_ERRMSG_ENTRY_CHECK(stat, errmsg, errmsg_len); + + error_on_missing_images(); + ci = &local->ci; type_size = char_len * sizeof ('rtype_name`); - buffer = get_collsub_buf (ci, type_size * local->num_images); + buffer = get_collsub_buf (ci, type_size * local->total_num_images); this_image_buf = buffer + this_image.image_num * char_len; memcpy (this_image_buf, obj, type_size); collsub_sync (ci); for (; ((this_image.image_num >> cbit) & 1) == 0 - && (local->num_images >> cbit) != 0; cbit++) + && (local->total_num_images >> cbit) != 0; cbit++) { imoffset = 1 << cbit; - if (this_image.image_num + imoffset < local->num_images) + if (this_image.image_num + imoffset < local->total_num_images) { a = this_image_buf; b = this_image_buf + imoffset * char_len; @@ -72,7 +76,7 @@ cas_collsub_'$1`_scalar_'rtype_code` ('rtype_name` *obj, int *result_image, /* All images have to execute the same number of collsub_sync, otherwise some images will hang. Here, we execute the missing ones for images that are not needed anymore in the main loop. */ - for ( ; (local->num_images >> cbit) != 0; cbit++) + for ( ; (local->total_num_images >> cbit) != 0; cbit++) collsub_sync (ci); if (!result_image || (*result_image - 1) == this_image.image_num) @@ -94,10 +98,8 @@ export_proto (cas_collsub_'$1`_array_'rtype_code`); void cas_collsub_'$1`_array_'rtype_code` ('rtype` * restrict array, int *result_image, - int *stat __attribute__ ((unused)), - char *errmsg __attribute__ ((unused)), - index_type char_len, - index_type errmsg_len __attribute__ ((unused))) + int *stat, char *errmsg, index_type char_len, + index_type errmsg_len) { index_type count[GFC_MAX_DIMENSIONS]; index_type stride[GFC_MAX_DIMENSIONS]; /* Store byte-based strides here. */ @@ -113,6 +115,10 @@ cas_collsub_'$1`_array_'rtype_code` ('rtype` * restrict array, int *result_image index_type type_size; collsub_iface *ci; + STAT_ERRMSG_ENTRY_CHECK(stat, errmsg, errmsg_len); + + error_on_missing_images(); + ci = &local->ci; type_size = char_len * sizeof ('rtype_name`); @@ -137,7 +143,7 @@ cas_collsub_'$1`_array_'rtype_code` ('rtype` * restrict array, int *result_image } ssize = num_elems * type_size; - buffer = get_collsub_buf (ci, ssize * local->num_images); + buffer = get_collsub_buf (ci, ssize * local->total_num_images); this_shared_ptr = buffer + this_image.image_num * ssize; if (packed) @@ -189,10 +195,10 @@ cas_collsub_'$1`_array_'rtype_code` ('rtype` * restrict array, int *result_image r___________ */ for (; ((this_image.image_num >> cbit) & 1) == 0 - && (local->num_images >> cbit) != 0; cbit++) + && (local->total_num_images >> cbit) != 0; cbit++) { imoffset = 1 << cbit; - if (this_image.image_num + imoffset < local->num_images) + if (this_image.image_num + imoffset < local->total_num_images) { char *other_shared_ptr; /* Points to the shared memory allocated to another image. */ @@ -210,7 +216,7 @@ cas_collsub_'$1`_array_'rtype_code` ('rtype` * restrict array, int *result_image } collsub_sync (ci); } - for ( ; (local->num_images >> cbit) != 0; cbit++) + for ( ; (local->total_num_images >> cbit) != 0; cbit++) collsub_sync (ci); if (!result_image || (*result_image - 1) == this_image.image_num) diff --git a/libgfortran/m4/nca_minmax.m4 b/libgfortran/m4/nca_minmax.m4 index 9e107fc24b88..f69cf65490d9 100644 --- a/libgfortran/m4/nca_minmax.m4 +++ b/libgfortran/m4/nca_minmax.m4 @@ -35,9 +35,7 @@ export_proto(cas_collsub_'$1`_scalar_'rtype_code`); void cas_collsub_'$1`_scalar_'rtype_code` ('rtype_name` *obj, int *result_image, - int *stat __attribute__ ((unused)), - char *errmsg __attribute__ ((unused)), - index_type errmsg_len __attribute__ ((unused))) + int *stat, char *errmsg, index_type errmsg_len) { int cbit = 0; int imoffset; @@ -45,17 +43,21 @@ cas_collsub_'$1`_scalar_'rtype_code` ('rtype_name` *obj, int *result_image, 'rtype_name` *buffer, *this_image_buf; collsub_iface *ci; + STAT_ERRMSG_ENTRY_CHECK(stat, errmsg, errmsg_len); + + error_on_missing_images(); + ci = &local->ci; - buffer = get_collsub_buf (ci, sizeof('rtype_name`) * local->num_images); + buffer = get_collsub_buf (ci, sizeof('rtype_name`) * local->total_num_images); this_image_buf = buffer + this_image.image_num; *this_image_buf = *obj; collsub_sync (ci); - for (; ((this_image.image_num >> cbit) & 1) == 0 && (local->num_images >> cbit) != 0; cbit++) + for (; ((this_image.image_num >> cbit) & 1) == 0 && (local->total_num_images >> cbit) != 0; cbit++) { imoffset = 1 << cbit; - if (this_image.image_num + imoffset < local->num_images) + if (this_image.image_num + imoffset < local->total_num_images) { a = this_image_buf; b = this_image_buf + imoffset; @@ -63,7 +65,7 @@ cas_collsub_'$1`_scalar_'rtype_code` ('rtype_name` *obj, int *result_image, } collsub_sync (ci); } - for ( ; (local->num_images >> cbit) != 0; cbit++) + for ( ; (local->total_num_images >> cbit) != 0; cbit++) collsub_sync (ci); if (!result_image || (*result_image - 1) == this_image.image_num) @@ -81,9 +83,7 @@ export_proto (cas_collsub_'$1`_array_'rtype_code`); void cas_collsub_'$1`_array_'rtype_code` ('rtype` * restrict array, int *result_image, - int *stat __attribute__ ((unused)), - char *errmsg __attribute__ ((unused)), - index_type errmsg_len __attribute__ ((unused))) + int *stat, char *errmsg, index_type errmsg_len) { index_type count[GFC_MAX_DIMENSIONS]; index_type stride[GFC_MAX_DIMENSIONS]; @@ -98,6 +98,10 @@ cas_collsub_'$1`_array_'rtype_code` ('rtype` * restrict array, int *result_image int imoffset; collsub_iface *ci; + STAT_ERRMSG_ENTRY_CHECK(stat, errmsg, errmsg_len); + + error_on_missing_images(); + ci = &local->ci; dim = GFC_DESCRIPTOR_RANK (array); @@ -122,7 +126,7 @@ cas_collsub_'$1`_array_'rtype_code` ('rtype` * restrict array, int *result_image num_elems = ssize / sizeof ('rtype_name`); - buffer = get_collsub_buf (ci, ssize * local->num_images); + buffer = get_collsub_buf (ci, ssize * local->total_num_images); this_shared_ptr = buffer + this_image.image_num * num_elems; if (packed) @@ -171,10 +175,10 @@ cas_collsub_'$1`_array_'rtype_code` ('rtype` * restrict array, int *result_image a_______b___ r___________ */ - for (; ((this_image.image_num >> cbit) & 1) == 0 && (local->num_images >> cbit) != 0; cbit++) + for (; ((this_image.image_num >> cbit) & 1) == 0 && (local->total_num_images >> cbit) != 0; cbit++) { imoffset = 1 << cbit; - if (this_image.image_num + imoffset < local->num_images) + if (this_image.image_num + imoffset < local->total_num_images) { 'rtype_name` * other_shared_ptr; /* Points to the shared memory allocated to another image. */ @@ -191,7 +195,7 @@ cas_collsub_'$1`_array_'rtype_code` ('rtype` * restrict array, int *result_image } collsub_sync (ci); } - for ( ; (local->num_images >> cbit) != 0; cbit++) + for ( ; (local->total_num_images >> cbit) != 0; cbit++) collsub_sync (ci); if (!result_image || (*result_image - 1) == this_image.image_num) diff --git a/libgfortran/nca/coarraynative.c b/libgfortran/nca/coarraynative.c index 251e7c9dd571..c177fe8cea70 100644 --- a/libgfortran/nca/coarraynative.c +++ b/libgfortran/nca/coarraynative.c @@ -33,12 +33,15 @@ see the files COPYING3 and COPYING.RUNTIME respectively. If not, see #include #include #include +#include #define GFORTRAN_ENV_NUM_IMAGES "GFORTRAN_NUM_IMAGES" nca_local_data *local = NULL; -image this_image; +image this_image = {-1, NULL}; + +/* Get image number from environment or sysconf. */ static int get_environ_image_num (void) @@ -53,6 +56,26 @@ get_environ_image_num (void) return nimages; } +/* Get a master. */ + +static master * +get_master (void) +{ + master *m; + m = SHMPTR_AS (master *, + shared_memory_get_mem_with_alignment + (&local->sm, + sizeof (master) + sizeof(image_status) * local->total_num_images, + __alignof__(master)), &local->sm); + m->has_failed_image = 0; + m->finished_images = 0; + waitable_counter_init (&m->num_active_images, local->total_num_images); + return m; +} + + +/* Ensure things are initialized. */ + void ensure_initialization(void) { @@ -63,14 +86,94 @@ ensure_initialization(void) // point? Maybe use mmap(MAP_ANON) // instead pagesize = sysconf (_SC_PAGE_SIZE); - local->num_images = get_environ_image_num (); + local->total_num_images = get_environ_image_num (); shared_memory_init (&local->sm); shared_memory_prepare (&local->sm); + if (this_image.m == NULL) /* A bit of a hack, but we + need the master early. */ + this_image.m = get_master(); alloc_iface_init (&local->ai, &local->sm); collsub_iface_init (&local->ci, &local->ai, &local->sm); sync_iface_init (&local->si, &local->ai, &local->sm); } + +/* Test for failed or stopped images. */ + +int +test_for_cas_errors (int *stat, char *errmsg, size_t errmsg_length) +{ + size_t errmsg_written_bytes; + if (!stat) + return 0; + + /* This rather strange ordering is mandated by the standard. */ + if (this_image.m->finished_images) + { + *stat = CAS_STAT_STOPPED_IMAGE; + if (errmsg) + { + errmsg_written_bytes + = snprintf(errmsg, errmsg_length, + "Stopped images present (currently %d)", + this_image.m->finished_images); + if (errmsg_written_bytes > errmsg_length - 1) + errmsg_written_bytes = errmsg_length - 1; + + memset(errmsg + errmsg_written_bytes, ' ', + errmsg_length - errmsg_written_bytes); + } + } + else if (this_image.m->has_failed_image) + { + *stat = CAS_STAT_FAILED_IMAGE; + if (errmsg) + { + errmsg_written_bytes + = snprintf(errmsg, errmsg_length, + "Failed images present (currently %d)", + this_image.m->has_failed_image); + if (errmsg_written_bytes > errmsg_length - 1) + errmsg_written_bytes = errmsg_length - 1; + + memset(errmsg + errmsg_written_bytes, ' ', + errmsg_length - errmsg_written_bytes); + } + } + else + { + *stat = 0; + return 0; + } + return 1; +} + +/* Check if an image is active. */ + +int +master_is_image_active (master *m, int image_num) +{ + return m->images[image_num].status == IMAGE_OK; +} + +/* Get number of active images. */ + +int +master_get_num_active_images (master *m) +{ + return waitable_counter_get_val(&m->num_active_images); +} + +/* Bind barrier to counter. */ + +void +master_bind_active_image_barrier(master *m, counter_barrier *b) +{ + bind_counter_barrier(b, &m->num_active_images); +} + +/* Main wrapper. */ + static void __attribute__((noreturn)) image_main_wrapper (void (*image_main) (void), image *this) { @@ -83,16 +186,10 @@ image_main_wrapper (void (*image_main) (void), image *this) exit (0); } -static master * -get_master (void) { - master *m; - m = SHMPTR_AS (master *, - shared_memory_get_mem_with_alignment - (&local->sm, - sizeof (master) + sizeof(image_status) * local->num_images, - __alignof__(master)), &local->sm); - m->has_failed_image = 0; - return m; +void +error_on_missing_images(void) { + if (master_get_num_active_images(this_image.m) != local->total_num_images) + exit(1); } /* This is called from main, with a pointer to the user's program as @@ -107,11 +204,11 @@ cas_master (void (*image_main) (void)) { int exit_code = 0; int chstatus; ensure_initialization(); - m = get_master(); + m = this_image.m; im.m = m; - for (im.image_num = 0; im.image_num < local->num_images; im.image_num++) + for (im.image_num = 0; im.image_num < local->total_num_images; im.image_num++) { if ((new = fork())) { @@ -126,25 +223,28 @@ cas_master (void (*image_main) (void)) { else image_main_wrapper(image_main, &im); } - for (i = 0; i < local->num_images; i++) + for (i = 0; i < local->total_num_images; i++) { new = wait (&chstatus); if (WIFEXITED (chstatus) && !WEXITSTATUS (chstatus)) { j = 0; - for (; j < local->num_images && m->images[j].pid != new; j++); + for (; j < local->total_num_images && m->images[j].pid != new; j++); m->images[j].status = IMAGE_SUCCESS; m->finished_images++; /* FIXME: Needs to be atomic, probably. */ } else if (!WIFEXITED (chstatus) || WEXITSTATUS (chstatus)) { j = 0; - for (; j < local->num_images && m->images[j].pid != new; j++); + for (; j < local->total_num_images && m->images[j].pid != new; j++); m->images[j].status = IMAGE_FAILED; m->has_failed_image++; /* FIXME: Needs to be atomic, probably. */ + for (; j < local->total_num_images; j++) + m->images[j].active_image_index--; dprintf (2, "ERROR: Image %d(%#x) failed\n", j, new); exit_code = 1; } + waitable_counter_add(&m->num_active_images, -1); } exit (exit_code); } diff --git a/libgfortran/nca/collective_subroutine.c b/libgfortran/nca/collective_subroutine.c index 14bd517d5767..f4d9b589e4dd 100644 --- a/libgfortran/nca/collective_subroutine.c +++ b/libgfortran/nca/collective_subroutine.c @@ -23,9 +23,11 @@ see the files COPYING3 and COPYING.RUNTIME respectively. If not, see . */ #include "libgfortran.h" +#include #include "libcoarraynative.h" #include "collective_subroutine.h" #include "allocator.h" +#include "counter_barrier.h" #include @@ -56,7 +58,7 @@ get_collsub_buf (collsub_iface *ci, size_t size) void collsub_sync (collsub_iface *ci) { - pthread_barrier_wait (&ci->s->barrier); + counter_barrier_wait (&ci->s->barrier); } @@ -83,6 +85,8 @@ collsub_reduce_array (collsub_iface *ci, gfc_array_char *desc, int *result_image index_type this_image_size_bytes; char *this_image_buf; + error_on_missing_images(); + packed = pack_array_prepare (&pi, desc); if (pi.num_elem == 0) return; @@ -90,7 +94,7 @@ collsub_reduce_array (collsub_iface *ci, gfc_array_char *desc, int *result_image elem_size = GFC_DESCRIPTOR_SIZE (desc); this_image_size_bytes = elem_size * pi.num_elem; - buffer = get_collsub_buf (ci, this_image_size_bytes * local->num_images); + buffer = get_collsub_buf (ci, this_image_size_bytes * local->total_num_images); this_image_buf = buffer + this_image_size_bytes * this_image.image_num; if (packed) @@ -99,10 +103,12 @@ collsub_reduce_array (collsub_iface *ci, gfc_array_char *desc, int *result_image pack_array_finish (&pi, desc, this_image_buf); collsub_sync (ci); - for (; ((this_image.image_num >> cbit) & 1) == 0 && (local->num_images >> cbit) != 0; cbit++) + + + for (; ((this_image.image_num >> cbit) & 1) == 0 && (local->total_num_images >> cbit) != 0; cbit++) { imoffset = 1 << cbit; - if (this_image.image_num + imoffset < local->num_images) + if (this_image.image_num + imoffset < local->total_num_images) /* Reduce arrays elementwise. */ for (ssize_t i = 0; i < pi.num_elem; i++) assign_function (this_image_buf + elem_size * i, @@ -110,7 +116,7 @@ collsub_reduce_array (collsub_iface *ci, gfc_array_char *desc, int *result_image collsub_sync (ci); } - for ( ; (local->num_images >> cbit) != 0; cbit++) + for ( ; (local->total_num_images >> cbit) != 0; cbit++) collsub_sync (ci); if (!result_image || *result_image == this_image.image_num) @@ -134,22 +140,26 @@ collsub_reduce_scalar (collsub_iface *ci, void *obj, index_type elem_size, int imoffset; char *this_image_buf; - buffer = get_collsub_buf (ci, elem_size * local->num_images); + error_on_missing_images(); + + buffer = get_collsub_buf (ci, elem_size * master_get_num_active_images(this_image.m)); this_image_buf = buffer + elem_size * this_image.image_num; memcpy (this_image_buf, obj, elem_size); + collsub_sync (ci); - for (; ((this_image.image_num >> cbit) & 1) == 0 && (local->num_images >> cbit) != 0; cbit++) + for (; ((this_image.image_num >> cbit) & 1) == 0 && (local->total_num_images >> cbit) != 0; cbit++) { imoffset = 1 << cbit; - if (this_image.image_num + imoffset < local->num_images) + + if (this_image.image_num + imoffset < local->total_num_images) { /* Reduce arrays elementwise. */ - assign_function (this_image_buf, this_image_buf + elem_size*imoffset); - + assign_function (this_image_buf, this_image_buf + elem_size*imoffset); + } collsub_sync (ci); } - for ( ; (local->num_images >> cbit) != 0; cbit++) + for ( ; (master_get_num_active_images(this_image.m) >> cbit) != 0; cbit++) collsub_sync (ci); if (!result_image || *result_image == this_image.image_num) @@ -165,19 +175,15 @@ collsub_reduce_scalar (collsub_iface *ci, void *obj, index_type elem_size, void collsub_iface_init (collsub_iface *ci, alloc_iface *ai, shared_memory *sm) { - pthread_barrierattr_t attr; ci->s = SHARED_MEMORY_RAW_ALLOC_PTR(sm, collsub_iface_shared); - ci->s->collsub_buf = shared_malloc(get_allocator(ai), sizeof(double)*local->num_images); - ci->s->curr_size = sizeof(double)*local->num_images; + ci->s->collsub_buf = shared_malloc(get_allocator(ai), + sizeof(double)*local->total_num_images); + ci->s->curr_size = sizeof(double)*local->total_num_images; ci->sm = sm; ci->a = get_allocator(ai); - pthread_barrierattr_init (&attr); - pthread_barrierattr_setpshared (&attr, PTHREAD_PROCESS_SHARED); - pthread_barrier_init (&ci->s->barrier, &attr, local->num_images); - pthread_barrierattr_destroy(&attr); - + master_bind_active_image_barrier (this_image.m, &ci->s->barrier); initialize_shared_mutex (&ci->s->mutex); } diff --git a/libgfortran/nca/collective_subroutine.h b/libgfortran/nca/collective_subroutine.h index 27931166ecf9..137220c3e9d2 100644 --- a/libgfortran/nca/collective_subroutine.h +++ b/libgfortran/nca/collective_subroutine.h @@ -8,7 +8,7 @@ typedef struct collsub_iface_shared { size_t curr_size; shared_mem_ptr collsub_buf; - pthread_barrier_t barrier; + counter_barrier barrier; pthread_mutex_t mutex; } collsub_iface_shared; diff --git a/libgfortran/nca/counter_barrier.c b/libgfortran/nca/counter_barrier.c new file mode 100644 index 000000000000..3f15ba460c57 --- /dev/null +++ b/libgfortran/nca/counter_barrier.c @@ -0,0 +1,141 @@ +/* Copyright (C) 2020 Free Software Foundation, Inc. + Contributed by Nicolas Koenig + + This file is part of the GNU Fortran Native Coarray Library (libnca). + + Libnca is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; either version 3, or (at your option) + any later version. + + Libnca is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + Under Section 7 of GPL version 3, you are granted additional + permissions described in the GCC Runtime Library Exception, version + 3.1, as published by the Free Software Foundation. + + You should have received a copy of the GNU General Public License and + a copy of the GCC Runtime Library Exception along with this program; + see the files COPYING3 and COPYING.RUNTIME respectively. If not, see + . */ + +#include "libgfortran.h" +#include "util.h" +#include "counter_barrier.h" + +#include + +/* Lock the associated counter of this barrier. */ + +static inline void +lock_counter_barrier (counter_barrier *b) +{ + pthread_mutex_lock (&b->count->m); +} + +/* Unlock the associated counter of this barrier. */ + +static inline void +unlock_counter_barrier (counter_barrier *b) +{ + pthread_mutex_unlock (&b->count->m); +} + +/* Wait on the barrier. */ + +void +counter_barrier_wait (counter_barrier *b) +{ + int initial_count; + int wait_group_beginning; + + assert (b->count); + + lock_counter_barrier (b); + + wait_group_beginning = b->curr_wait_group; + + if (--b->wait_count <= 0) + pthread_cond_broadcast (&b->cond); + else + { + while (b->wait_count > 0 && b->curr_wait_group == wait_group_beginning) + pthread_cond_wait(&b->cond, &b->count->m); + } + + if (b->wait_count <= 0) + { + b->curr_wait_group = !wait_group_beginning; + b->wait_count = b->count->val; + } + + unlock_counter_barrier (b); +} + +/* Adjust the counter of a barrier by val (which can be positive or + negative), signalling if necessary. */ + +static inline void +change_internal_barrier_count (counter_barrier *b, int val) +{ + b->wait_count += val; + if (b->wait_count <= 0) + pthread_cond_broadcast (&b->cond); +} + +/* Adjust all associated barriers of a counter. */ +int +waitable_counter_add (waitable_counter *c, int val) +{ + counter_barrier *curr; + int ret; + pthread_mutex_lock(&c->m); + ret = (c->val += val); + for (curr = c->b; curr; curr = curr->next) + change_internal_barrier_count(curr, val); + + pthread_mutex_unlock(&c->m); + return ret; +} + +/* Get the value of a counter. */ + +int +waitable_counter_get_val (waitable_counter *c) +{ + int ret; + pthread_mutex_lock(&c->m); + ret = c->val; + pthread_mutex_unlock(&c->m); + return ret; +} + +/* Initialize waitable counter. */ + +void +waitable_counter_init (waitable_counter *c, int val) +{ + initialize_shared_mutex (&c->m); + c->val = val; + c->b = NULL; +} + +/* Bind a barrier to a counter. */ + +void +bind_counter_barrier (counter_barrier *b, waitable_counter *c) +{ + initialize_shared_condition (&b->cond); + + pthread_mutex_lock (&c->m); + b->next = c->b; + b->curr_wait_group = 0; + b->count = c; + b->wait_count = c->val; + c->b = b; + pthread_mutex_unlock (&c->m); + +} diff --git a/libgfortran/nca/counter_barrier.h b/libgfortran/nca/counter_barrier.h new file mode 100644 index 000000000000..94eb727a74e0 --- /dev/null +++ b/libgfortran/nca/counter_barrier.h @@ -0,0 +1,36 @@ +#ifndef COUNTER_BARRIER_HDR +#define COUNTER_BARRIER_HDR + +#include + +struct waitable_counter; + +typedef struct counter_barrier +{ + pthread_cond_t cond; + struct waitable_counter *count; + volatile struct counter_barrier *next; + volatile int wait_count; + volatile int curr_wait_group; +} counter_barrier; + +typedef struct waitable_counter +{ + pthread_mutex_t m; + volatile counter_barrier *b; + volatile int val; +} waitable_counter; + +void waitable_counter_init (waitable_counter *c, int val); +internal_proto(waitable_counter_init); +int waitable_counter_add (waitable_counter *c, int val); +internal_proto(waitable_counter_add); +int waitable_counter_get_val (waitable_counter *c); +internal_proto(waitable_counter_get_val); + +void bind_counter_barrier (counter_barrier *b, waitable_counter *c); +internal_proto(bind_counter_barrier); +void counter_barrier_wait (counter_barrier *b); +internal_proto(counter_barrier_wait); + +#endif diff --git a/libgfortran/nca/libcoarraynative.h b/libgfortran/nca/libcoarraynative.h index 4bd7e3ca7401..e4549652d785 100644 --- a/libgfortran/nca/libcoarraynative.h +++ b/libgfortran/nca/libcoarraynative.h @@ -38,11 +38,18 @@ see the files COPYING3 and COPYING.RUNTIME respectively. If not, see #include "sync.h" #include "util.h" #include "collective_subroutine.h" - -typedef struct { - pthread_barrier_t barrier; - int maximg; -} ipcollsub; +#include "counter_barrier.h" + +/* Defnitions of the Fortran 2008 standard; need to be kept in sync + with ISO_FORTRAN_ENV, cf. gcc/fortran/libgfortran.h. */ +typedef enum +{ + CAS_STAT_UNLOCKED = 0, + CAS_STAT_LOCKED, + CAS_STAT_LOCKED_OTHER_IMAGE, + CAS_STAT_STOPPED_IMAGE = 6000, + CAS_STAT_FAILED_IMAGE = 6001 +} stat_constants; typedef enum { IMAGE_UNKNOWN = 0, @@ -52,14 +59,17 @@ typedef enum { } image_status; typedef struct { - image_status status; pid_t pid; + image_status status; + int active_image_index; } image_tracker; typedef struct { - int has_failed_image; - int finished_images; - image_tracker images[]; + volatile int has_failed_image; + volatile int finished_images; + waitable_counter num_active_images; + pthread_mutex_t image_tracker_lock; + volatile image_tracker images[]; } master; typedef struct { @@ -70,7 +80,7 @@ typedef struct { extern image this_image; typedef struct { - int num_images; + int total_num_images; shared_memory sm; alloc_iface ai; collsub_iface ci; @@ -82,6 +92,33 @@ internal_proto (local); void ensure_initialization(void); internal_proto(ensure_initialization); +int test_for_cas_errors(int *, char *, size_t); +internal_proto(test_for_cas_errors); + +int master_get_num_active_images(master *m); +internal_proto(master_get_num_active_images); + +void master_bind_active_image_barrier (master *m, counter_barrier *); +internal_proto(master_bind_active_image_barrier); + +int master_is_image_active(master *m, int image_num); +internal_proto(master_is_image_active); + +void error_on_missing_images(void); +internal_proto(error_on_missing_images); + +#define STAT_ERRMSG_ENTRY_CHECK(stat, errmsg, errmsg_len) \ + do { \ + if (test_for_cas_errors(stat, errmsg, errmsg_len))\ + return;\ + } while(0) + +#define STAT_ERRMSG_ENTRY_CHECK_RET(stat, errmsg, errmsg_len, retval) \ + do { \ + if (test_for_cas_errors(stat, errmsg, errmsg_len))\ + return retval;\ + } while(0) + void cas_master(void (*)(void)); export_proto (cas_master); diff --git a/libgfortran/nca/sync.c b/libgfortran/nca/sync.c index 7cf5ee28d9b8..c7ae0679e5e8 100644 --- a/libgfortran/nca/sync.c +++ b/libgfortran/nca/sync.c @@ -29,13 +29,9 @@ see the files COPYING3 and COPYING.RUNTIME respectively. If not, see #include static void -sync_all_init (pthread_barrier_t *b) +sync_all_init (counter_barrier *b) { - pthread_barrierattr_t battr; - pthread_barrierattr_init (&battr); - pthread_barrierattr_setpshared (&battr, PTHREAD_PROCESS_SHARED); - pthread_barrier_init (b, &battr, local->num_images); - pthread_barrierattr_destroy (&battr); + master_bind_active_image_barrier(this_image.m, b); } static inline void @@ -76,17 +72,18 @@ sync_iface_init (sync_iface *si, alloc_iface *ai, shared_memory *sm) si->a = get_allocator(ai); si->cis->table = - shared_malloc(si->a, sizeof(int)*local->num_images * local->num_images); + shared_malloc(si->a, sizeof(int)*local->total_num_images * local->total_num_images); si->cis->triggers = - shared_malloc(si->a, sizeof(pthread_cond_t)*local->num_images); + shared_malloc(si->a, sizeof(pthread_cond_t)*local->total_num_images); si->table = SHMPTR_AS(int *, si->cis->table, si->sm); si->triggers = SHMPTR_AS(pthread_cond_t *, si->cis->triggers, si->sm); - for (int i = 0; i < local->num_images; i++) + for (int i = 0; i < local->total_num_images; i++) initialize_shared_condition (&si->triggers[i]); } +/* TODO: Maybe check whether synchronizing image is still alive. */ void sync_table (sync_iface *si, int *images, size_t size) { @@ -101,15 +98,15 @@ sync_table (sync_iface *si, int *images, size_t size) int *table = get_locked_table(si); for (i = 0; i < size; i++) { - table[images[i] - 1 + local->num_images*this_image.image_num]++; + table[images[i] - 1 + local->total_num_images*this_image.image_num]++; pthread_cond_signal (&si->triggers[images[i] - 1]); } for (;;) { done = 1; for (i = 0; i < size; i++) - done &= si->table[images[i] - 1 + this_image.image_num*local->num_images] - == si->table[this_image.image_num + (images[i] - 1)*local->num_images]; + done &= si->table[images[i] - 1 + this_image.image_num*local->total_num_images] + == si->table[this_image.image_num + (images[i] - 1)*local->total_num_images]; if (done) break; wait_table_cond (si, &si->triggers[this_image.image_num]); @@ -120,6 +117,5 @@ sync_table (sync_iface *si, int *images, size_t size) void sync_all (sync_iface *si) { - - pthread_barrier_wait (&si->cis->sync_all); + counter_barrier_wait(&si->cis->sync_all); } diff --git a/libgfortran/nca/sync.h b/libgfortran/nca/sync.h index 53aa3dcb15f4..59946434da58 100644 --- a/libgfortran/nca/sync.h +++ b/libgfortran/nca/sync.h @@ -25,10 +25,13 @@ see the files COPYING3 and COPYING.RUNTIME respectively. If not, see #ifndef IPSYNC_HDR #define IPSYNC_HDR +#include "shared_memory.h" +#include "alloc.h" +#include "counter_barrier.h" #include typedef struct { - pthread_barrier_t sync_all; + counter_barrier sync_all; pthread_mutex_t table_lock; shared_mem_ptr table; shared_mem_ptr triggers; diff --git a/libgfortran/nca/util.h b/libgfortran/nca/util.h index 1d3351295200..fa9e158b38f9 100644 --- a/libgfortran/nca/util.h +++ b/libgfortran/nca/util.h @@ -83,4 +83,6 @@ internal_proto (pack_array_finish); void unpack_array_finish (pack_info * const restrict, const gfc_array_char * const, const char * restrict); +internal_proto (unpack_array_finish); + #endif diff --git a/libgfortran/nca/wrapper.c b/libgfortran/nca/wrapper.c index d3d50f5d2112..2e0b53bb27ca 100644 --- a/libgfortran/nca/wrapper.c +++ b/libgfortran/nca/wrapper.c @@ -69,19 +69,19 @@ void cas_unlock (void *); export_proto (cas_unlock); void cas_collsub_reduce_array (gfc_array_char *, void (*) (void *, void *), - int *); + int *, int*, char *, size_t); export_proto (cas_collsub_reduce_array); void cas_collsub_reduce_scalar (void *, index_type, void (*) (void *, void *), - int *); + int *, int*, char *, size_t); export_proto (cas_collsub_reduce_scalar); -void cas_collsub_broadcast_array (gfc_array_char * restrict, int/*, int *, char *, - size_t*/); +void cas_collsub_broadcast_array (gfc_array_char * restrict, int, int *, char *, + size_t); export_proto (cas_collsub_broadcast_array); -void cas_collsub_broadcast_scalar (void * restrict, size_t, int/*, int *, char *, - size_t*/); +void cas_collsub_broadcast_scalar (void * restrict, size_t, int, int *, char *, + size_t); export_proto(cas_collsub_broadcast_scalar); void @@ -115,7 +115,7 @@ cas_coarray_alloc (gfc_array_void *desc, int elem_size, int corank, num_coarray_elems *= GFC_DESCRIPTOR_EXTENT(desc, i); } - extent_last_codimen = div_ru (local->num_images, num_coarray_elems); + extent_last_codimen = div_ru (local->total_num_images, num_coarray_elems); last_lbound = GFC_DIMENSION_LBOUND(desc->dim[last_rank_index]); GFC_DIMENSION_SET(desc->dim[last_rank_index], last_lbound, @@ -143,7 +143,7 @@ cas_coarray_alloc (gfc_array_void *desc, int elem_size, int corank, __ATOMIC_SEQ_CST)); if (!addr->initialized++) { - for (i = 0; i < local->num_images; i++) + for (i = 0; i < local->total_num_images; i++) initialize_shared_mutex (&addr->arr[i]); } __atomic_store_n (&addr->owner, 0, __ATOMIC_SEQ_CST); @@ -165,6 +165,7 @@ cas_coarray_free (gfc_array_void *desc, int alloc_type) lock_array *la; int expected = 0; la = desc->base_addr - offsetof (lock_array, arr); + /* TODO: Fix this, replace with some kind of atomic initilization. */ while (!__atomic_compare_exchange_n (&la->owner, &expected, this_image.image_num+1, false, __ATOMIC_SEQ_CST, @@ -173,7 +174,7 @@ cas_coarray_free (gfc_array_void *desc, int alloc_type) { /* Coarray locks can be removed and just normal pthread_mutex can be used. */ - for (i = 0; i < local->num_images; i++) + for (i = 0; i < local->total_num_images; i++) pthread_mutex_destroy (&la->arr[i]); } __atomic_store_n (&la->owner, 0, __ATOMIC_SEQ_CST); @@ -194,21 +195,20 @@ cas_coarray_this_image (int distance __attribute__((unused))) int cas_coarray_num_images (int distance __attribute__((unused))) { - return local->num_images; + return local->total_num_images; } void -cas_coarray_sync_all (int *stat __attribute__((unused))) +cas_coarray_sync_all (int *stat) { + STAT_ERRMSG_ENTRY_CHECK(stat, NULL, 0); sync_all (&local->si); } void -cas_sync_images (size_t s, int *images, - int *stat __attribute__((unused)), - char *error __attribute__((unused)), - size_t err_size __attribute__((unused))) +cas_sync_images (size_t s, int *images, int *stat, char *error, size_t err_size) { + STAT_ERRMSG_ENTRY_CHECK(stat, error, err_size); sync_table (&local->si, images, s); } @@ -226,33 +226,34 @@ cas_unlock (void *lock) void cas_collsub_reduce_array (gfc_array_char *desc, void (*assign_function) (void *, void *), - int *result_image) + int *result_image, int *stat, char *errmsg, size_t errmsg_len) { + STAT_ERRMSG_ENTRY_CHECK(stat, errmsg, errmsg_len); collsub_reduce_array (&local->ci, desc, result_image, assign_function); } void cas_collsub_reduce_scalar (void *obj, index_type elem_size, void (*assign_function) (void *, void *), - int *result_image) + int *result_image, + int *stat, char *errmsg, size_t errmsg_len) { + STAT_ERRMSG_ENTRY_CHECK(stat, errmsg, errmsg_len); collsub_reduce_scalar (&local->ci, obj, elem_size, result_image, assign_function); } void -cas_collsub_broadcast_array (gfc_array_char * restrict a, int source_image - /* , int *stat __attribute__ ((unused)), - char *errmsg __attribute__ ((unused)), - size_t errmsg_len __attribute__ ((unused))*/) +cas_collsub_broadcast_array (gfc_array_char * restrict a, int source_image, + int *stat, char *errmsg, size_t errmsg_len) { + STAT_ERRMSG_ENTRY_CHECK(stat, errmsg, errmsg_len); collsub_broadcast_array (&local->ci, a, source_image - 1); } void -cas_collsub_broadcast_scalar (void * restrict obj, size_t size, int source_image/*, - int *stat __attribute__((unused)), - char *errmsg __attribute__ ((unused)), - size_t errmsg_len __attribute__ ((unused))*/) +cas_collsub_broadcast_scalar (void * restrict obj, size_t size, int source_image, + int *stat, char *errmsg, size_t errmsg_len) { + STAT_ERRMSG_ENTRY_CHECK(stat, errmsg, errmsg_len); collsub_broadcast_scalar (&local->ci, obj, size, source_image - 1); } -- 2.47.2