From 48ae6c138ca30c4c5e876a0be47c9a0b5c8bf5c2 Mon Sep 17 00:00:00 2001 From: Richard Henderson Date: Thu, 14 Apr 2005 16:37:47 -0700 Subject: [PATCH] re PR middle-end/14311 (builtins for atomic operations needed) PR middle-end/14311 * builtin-types.def (BT_BOOL, BT_VOLATILE_PTR, BT_I1, BT_I2, BT_I4, BT_I8, BT_FN_VOID_VPTR, BT_FN_I1_VPTR_I1, BT_FN_I2_VPTR_I2, BT_FN_I4_VPTR_I4, BT_FN_I8_VPTR_I8, BT_FN_BOOL_VPTR_I1_I1, BT_FN_BOOL_VPTR_I2_I2, BT_FN_BOOL_VPTR_I4_I4, BT_FN_BOOL_VPTR_I8_I8, BT_FN_I1_VPTR_I1_I1, BT_FN_I2_VPTR_I2_I2, BT_FN_I4_VPTR_I4_I4, BT_FN_I8_VPTR_I8_I8): New. * builtins.def (DEF_SYNC_BUILTIN): New. (BUILT_IN_FETCH_AND_ADD_N, BUILT_IN_FETCH_AND_ADD_1, BUILT_IN_FETCH_AND_ADD_2, BUILT_IN_FETCH_AND_ADD_4, BUILT_IN_FETCH_AND_ADD_8, BUILT_IN_FETCH_AND_SUB_N, BUILT_IN_FETCH_AND_SUB_1, BUILT_IN_FETCH_AND_SUB_2, BUILT_IN_FETCH_AND_SUB_4, BUILT_IN_FETCH_AND_SUB_8, BUILT_IN_FETCH_AND_OR_N, BUILT_IN_FETCH_AND_OR_1, BUILT_IN_FETCH_AND_OR_2, BUILT_IN_FETCH_AND_OR_4, BUILT_IN_FETCH_AND_OR_8, BUILT_IN_FETCH_AND_AND_N, BUILT_IN_FETCH_AND_AND_1, BUILT_IN_FETCH_AND_AND_2, BUILT_IN_FETCH_AND_AND_4, BUILT_IN_FETCH_AND_AND_8, BUILT_IN_FETCH_AND_XOR_N, BUILT_IN_FETCH_AND_XOR_1, BUILT_IN_FETCH_AND_XOR_2, BUILT_IN_FETCH_AND_XOR_4, BUILT_IN_FETCH_AND_XOR_8, BUILT_IN_FETCH_AND_NAND_N, BUILT_IN_FETCH_AND_NAND_1, BUILT_IN_FETCH_AND_NAND_2, BUILT_IN_FETCH_AND_NAND_4, BUILT_IN_FETCH_AND_NAND_8, BUILT_IN_ADD_AND_FETCH_N, BUILT_IN_ADD_AND_FETCH_1, BUILT_IN_ADD_AND_FETCH_2, BUILT_IN_ADD_AND_FETCH_4, BUILT_IN_ADD_AND_FETCH_8, BUILT_IN_SUB_AND_FETCH_N, BUILT_IN_SUB_AND_FETCH_1, BUILT_IN_SUB_AND_FETCH_2, BUILT_IN_SUB_AND_FETCH_4, BUILT_IN_SUB_AND_FETCH_8, BUILT_IN_OR_AND_FETCH_N, BUILT_IN_OR_AND_FETCH_1, BUILT_IN_OR_AND_FETCH_2, BUILT_IN_OR_AND_FETCH_4, BUILT_IN_OR_AND_FETCH_8, BUILT_IN_AND_AND_FETCH_N, BUILT_IN_AND_AND_FETCH_1, BUILT_IN_AND_AND_FETCH_2, BUILT_IN_AND_AND_FETCH_4, BUILT_IN_AND_AND_FETCH_8, BUILT_IN_XOR_AND_FETCH_N, BUILT_IN_XOR_AND_FETCH_1, BUILT_IN_XOR_AND_FETCH_2, BUILT_IN_XOR_AND_FETCH_4, BUILT_IN_XOR_AND_FETCH_8, BUILT_IN_NAND_AND_FETCH_N, BUILT_IN_NAND_AND_FETCH_1, BUILT_IN_NAND_AND_FETCH_2, BUILT_IN_NAND_AND_FETCH_4, BUILT_IN_NAND_AND_FETCH_8, BUILT_IN_BOOL_COMPARE_AND_SWAP_N, BUILT_IN_BOOL_COMPARE_AND_SWAP_1, BUILT_IN_BOOL_COMPARE_AND_SWAP_2, BUILT_IN_BOOL_COMPARE_AND_SWAP_4, BUILT_IN_BOOL_COMPARE_AND_SWAP_8, BUILT_IN_VAL_COMPARE_AND_SWAP_N, BUILT_IN_VAL_COMPARE_AND_SWAP_1, BUILT_IN_VAL_COMPARE_AND_SWAP_2, BUILT_IN_VAL_COMPARE_AND_SWAP_4, BUILT_IN_VAL_COMPARE_AND_SWAP_8, BUILT_IN_LOCK_TEST_AND_SET_N, BUILT_IN_LOCK_TEST_AND_SET_1, BUILT_IN_LOCK_TEST_AND_SET_2, BUILT_IN_LOCK_TEST_AND_SET_4, BUILT_IN_LOCK_TEST_AND_SET_8, BUILT_IN_LOCK_RELEASE_N, BUILT_IN_LOCK_RELEASE_1, BUILT_IN_LOCK_RELEASE_2, BUILT_IN_LOCK_RELEASE_4, BUILT_IN_LOCK_RELEASE_8, BUILT_IN_SYNCHRONIZE: New. * builtins.c (called_as_built_in): Rewrite from CALLED_AS_BUILT_IN as a function. Accept __sync_ as a prefix as well. (expand_builtin_sync_operation, expand_builtin_compare_and_swap, expand_builtin_lock_test_and_set, expand_builtin_synchronize, expand_builtin_lock_release): New. (expand_builtin): Call them. * c-common.c (DEF_BUILTIN): Don't require __builtin_ prefix if neither BOTH_P nor FALLBACK_P are defined. (builtin_type_for_size): New. (sync_resolve_size, sync_resolve_params, sync_resolve_return): New. (resolve_overloaded_builtin): New. * c-common.h (resolve_overloaded_builtin): Declare. (builtin_type_for_size): Declare. * c-typeck.c (build_function_call): Invoke resolve_overloaded_builtin. * expr.c (sync_add_optab, sync_sub_optab, sync_ior_optab, sync_and_optab, sync_xor_optab, sync_nand_optab, sync_old_add_optab, sync_old_sub_optab, sync_old_ior_optab, sync_old_and_optab, sync_old_xor_optab, sync_old_nand_optab, sync_new_add_optab, sync_new_sub_optab, sync_new_ior_optab, sync_new_and_optab, sync_new_xor_optab, sync_new_nand_optab, sync_compare_and_swap, sync_compare_and_swap_cc, sync_lock_test_and_set, sync_lock_release): New. * optabs.h: Declare them. * expr.h (expand_val_compare_and_swap, expand_bool_compare_and_swap, expand_sync_operation, expand_sync_fetch_operation, expand_sync_lock_test_and_set): Declare. * genopinit.c (optabs): Add sync optabs. * optabs.c (init_optabs): Initialize sync optabs. (expand_val_compare_and_swap_1, expand_val_compare_and_swap, expand_bool_compare_and_swap, expand_compare_and_swap_loop, expand_sync_operation, expand_sync_fetch_operation, expand_sync_lock_test_and_set): New. * doc/extend.texi (Atomic Builtins): New section * doc/md.texi (Standard Names): Add sync patterns. From-SVN: r98154 --- gcc/ChangeLog | 140 ++++- gcc/builtin-types.def | 27 + gcc/builtins.c | 338 +++++++++++- gcc/builtins.def | 199 +++++++ gcc/c-common.c | 171 +++++- gcc/c-common.h | 4 + gcc/c-typeck.c | 7 + gcc/doc/extend.texi | 128 +++++ gcc/doc/md.texi | 134 +++++ gcc/expr.c | 24 + gcc/expr.h | 5 + gcc/genopinit.c | 25 +- gcc/optabs.c | 536 +++++++++++++++++++ gcc/optabs.h | 37 ++ gcc/testsuite/gcc.c-torture/compile/sync-1.c | 276 ++++++++++ gcc/testsuite/gcc.dg/sync-1.c | 40 ++ 16 files changed, 2057 insertions(+), 34 deletions(-) create mode 100644 gcc/testsuite/gcc.c-torture/compile/sync-1.c create mode 100644 gcc/testsuite/gcc.dg/sync-1.c diff --git a/gcc/ChangeLog b/gcc/ChangeLog index 53aa00c82b36..88c3a0374972 100644 --- a/gcc/ChangeLog +++ b/gcc/ChangeLog @@ -1,3 +1,89 @@ +2004-04-14 Richard Henderson + + PR middle-end/14311 + * builtin-types.def (BT_BOOL, BT_VOLATILE_PTR, BT_I1, BT_I2, + BT_I4, BT_I8, BT_FN_VOID_VPTR, BT_FN_I1_VPTR_I1, BT_FN_I2_VPTR_I2, + BT_FN_I4_VPTR_I4, BT_FN_I8_VPTR_I8, BT_FN_BOOL_VPTR_I1_I1, + BT_FN_BOOL_VPTR_I2_I2, BT_FN_BOOL_VPTR_I4_I4, BT_FN_BOOL_VPTR_I8_I8, + BT_FN_I1_VPTR_I1_I1, BT_FN_I2_VPTR_I2_I2, BT_FN_I4_VPTR_I4_I4, + BT_FN_I8_VPTR_I8_I8): New. + * builtins.def (DEF_SYNC_BUILTIN): New. + (BUILT_IN_FETCH_AND_ADD_N, BUILT_IN_FETCH_AND_ADD_1, + BUILT_IN_FETCH_AND_ADD_2, BUILT_IN_FETCH_AND_ADD_4, + BUILT_IN_FETCH_AND_ADD_8, BUILT_IN_FETCH_AND_SUB_N, + BUILT_IN_FETCH_AND_SUB_1, BUILT_IN_FETCH_AND_SUB_2, + BUILT_IN_FETCH_AND_SUB_4, BUILT_IN_FETCH_AND_SUB_8, + BUILT_IN_FETCH_AND_OR_N, BUILT_IN_FETCH_AND_OR_1, + BUILT_IN_FETCH_AND_OR_2, BUILT_IN_FETCH_AND_OR_4, + BUILT_IN_FETCH_AND_OR_8, BUILT_IN_FETCH_AND_AND_N, + BUILT_IN_FETCH_AND_AND_1, BUILT_IN_FETCH_AND_AND_2, + BUILT_IN_FETCH_AND_AND_4, BUILT_IN_FETCH_AND_AND_8, + BUILT_IN_FETCH_AND_XOR_N, BUILT_IN_FETCH_AND_XOR_1, + BUILT_IN_FETCH_AND_XOR_2, BUILT_IN_FETCH_AND_XOR_4, + BUILT_IN_FETCH_AND_XOR_8, BUILT_IN_FETCH_AND_NAND_N, + BUILT_IN_FETCH_AND_NAND_1, BUILT_IN_FETCH_AND_NAND_2, + BUILT_IN_FETCH_AND_NAND_4, BUILT_IN_FETCH_AND_NAND_8, + BUILT_IN_ADD_AND_FETCH_N, BUILT_IN_ADD_AND_FETCH_1, + BUILT_IN_ADD_AND_FETCH_2, BUILT_IN_ADD_AND_FETCH_4, + BUILT_IN_ADD_AND_FETCH_8, BUILT_IN_SUB_AND_FETCH_N, + BUILT_IN_SUB_AND_FETCH_1, BUILT_IN_SUB_AND_FETCH_2, + BUILT_IN_SUB_AND_FETCH_4, BUILT_IN_SUB_AND_FETCH_8, + BUILT_IN_OR_AND_FETCH_N, BUILT_IN_OR_AND_FETCH_1, + BUILT_IN_OR_AND_FETCH_2, BUILT_IN_OR_AND_FETCH_4, + BUILT_IN_OR_AND_FETCH_8, BUILT_IN_AND_AND_FETCH_N, + BUILT_IN_AND_AND_FETCH_1, BUILT_IN_AND_AND_FETCH_2, + BUILT_IN_AND_AND_FETCH_4, BUILT_IN_AND_AND_FETCH_8, + BUILT_IN_XOR_AND_FETCH_N, BUILT_IN_XOR_AND_FETCH_1, + BUILT_IN_XOR_AND_FETCH_2, BUILT_IN_XOR_AND_FETCH_4, + BUILT_IN_XOR_AND_FETCH_8, BUILT_IN_NAND_AND_FETCH_N, + BUILT_IN_NAND_AND_FETCH_1, BUILT_IN_NAND_AND_FETCH_2, + BUILT_IN_NAND_AND_FETCH_4, BUILT_IN_NAND_AND_FETCH_8, + BUILT_IN_BOOL_COMPARE_AND_SWAP_N, BUILT_IN_BOOL_COMPARE_AND_SWAP_1, + BUILT_IN_BOOL_COMPARE_AND_SWAP_2, BUILT_IN_BOOL_COMPARE_AND_SWAP_4, + BUILT_IN_BOOL_COMPARE_AND_SWAP_8, BUILT_IN_VAL_COMPARE_AND_SWAP_N, + BUILT_IN_VAL_COMPARE_AND_SWAP_1, BUILT_IN_VAL_COMPARE_AND_SWAP_2, + BUILT_IN_VAL_COMPARE_AND_SWAP_4, BUILT_IN_VAL_COMPARE_AND_SWAP_8, + BUILT_IN_LOCK_TEST_AND_SET_N, BUILT_IN_LOCK_TEST_AND_SET_1, + BUILT_IN_LOCK_TEST_AND_SET_2, BUILT_IN_LOCK_TEST_AND_SET_4, + BUILT_IN_LOCK_TEST_AND_SET_8, BUILT_IN_LOCK_RELEASE_N, + BUILT_IN_LOCK_RELEASE_1, BUILT_IN_LOCK_RELEASE_2, + BUILT_IN_LOCK_RELEASE_4, BUILT_IN_LOCK_RELEASE_8, + BUILT_IN_SYNCHRONIZE: New. + * builtins.c (called_as_built_in): Rewrite from CALLED_AS_BUILT_IN + as a function. Accept __sync_ as a prefix as well. + (expand_builtin_sync_operation, expand_builtin_compare_and_swap, + expand_builtin_lock_test_and_set, expand_builtin_synchronize, + expand_builtin_lock_release): New. + (expand_builtin): Call them. + * c-common.c (DEF_BUILTIN): Don't require __builtin_ prefix if + neither BOTH_P nor FALLBACK_P are defined. + (builtin_type_for_size): New. + (sync_resolve_size, sync_resolve_params, sync_resolve_return): New. + (resolve_overloaded_builtin): New. + * c-common.h (resolve_overloaded_builtin): Declare. + (builtin_type_for_size): Declare. + * c-typeck.c (build_function_call): Invoke resolve_overloaded_builtin. + * expr.c (sync_add_optab, sync_sub_optab, sync_ior_optab, + sync_and_optab, sync_xor_optab, sync_nand_optab, sync_old_add_optab, + sync_old_sub_optab, sync_old_ior_optab, sync_old_and_optab, + sync_old_xor_optab, sync_old_nand_optab, sync_new_add_optab, + sync_new_sub_optab, sync_new_ior_optab, sync_new_and_optab, + sync_new_xor_optab, sync_new_nand_optab, sync_compare_and_swap, + sync_compare_and_swap_cc, sync_lock_test_and_set, + sync_lock_release): New. + * optabs.h: Declare them. + * expr.h (expand_val_compare_and_swap, expand_bool_compare_and_swap, + expand_sync_operation, expand_sync_fetch_operation, + expand_sync_lock_test_and_set): Declare. + * genopinit.c (optabs): Add sync optabs. + * optabs.c (init_optabs): Initialize sync optabs. + (expand_val_compare_and_swap_1, expand_val_compare_and_swap, + expand_bool_compare_and_swap, expand_compare_and_swap_loop, + expand_sync_operation, expand_sync_fetch_operation, + expand_sync_lock_test_and_set): New. + * doc/extend.texi (Atomic Builtins): New section + * doc/md.texi (Standard Names): Add sync patterns. + 2005-04-14 Alexandre Oliva * tree-eh.c (lower_try_finally_copy): Generate new code in @@ -91,13 +177,13 @@ 2005-04-13 Dale Johannesen - * objc/Make-lang.in (objc-lang.o): Depend on tree-gimple.h. - (objc-act.o): Ditto. - * objc/objc-act.c (objc_gimplify_expr): New. - (objc_get_callee_fndecl): New. - * objc/objc-act.h: Include tree-gimple.h. Declare new functions. - * objc/objc-lang.c (LANG_HOOKS_GIMPLIFY_EXPR): Define. - (LANG_HOOKS_GET_CALLEE_FNDECL): Define. + * objc/Make-lang.in (objc-lang.o): Depend on tree-gimple.h. + (objc-act.o): Ditto. + * objc/objc-act.c (objc_gimplify_expr): New. + (objc_get_callee_fndecl): New. + * objc/objc-act.h: Include tree-gimple.h. Declare new functions. + * objc/objc-lang.c (LANG_HOOKS_GIMPLIFY_EXPR): Define. + (LANG_HOOKS_GET_CALLEE_FNDECL): Define. 2005-04-13 Devang Patel @@ -489,29 +575,29 @@ 2005-04-11 Devang Patel - * tree-data-ref.c (build_classic_dist_vector, - compute_subscript_distance): Make externally visible. - * tree-data-ref.h (build_classic_dist_vector, - compute_subscript_distance): Same. - * tree-vect-analyze.c (vect_analyze_data_ref_dependence): - Check distance vector against vectorization factor. - (vect_analyze_loop): Determine vectorizaion factor before - analyzing data dependences. - * tree-vectorizer.c (loops_num): Make it externally visible and - rename ... - * tree-vectorizer.c (vect_loops_num): ... new name. - * tree-vectorizer.h (vect_loops_num): New. + * tree-data-ref.c (build_classic_dist_vector, + compute_subscript_distance): Make externally visible. + * tree-data-ref.h (build_classic_dist_vector, + compute_subscript_distance): Same. + * tree-vect-analyze.c (vect_analyze_data_ref_dependence): + Check distance vector against vectorization factor. + (vect_analyze_loop): Determine vectorizaion factor before + analyzing data dependences. + * tree-vectorizer.c (loops_num): Make it externally visible and + rename ... + * tree-vectorizer.c (vect_loops_num): ... new name. + * tree-vectorizer.h (vect_loops_num): New. 2005-04-11 Devang Patel - * tree-vect-analyze.c (vect_analyze_operations): Check - vectorizable codition. - * tree-vect-transform.c (vect_is_simple_cond): New function. - (vectorizable_condition): New function. - (vect_transform_stmt): Handle condition_vec_info_type. - * tree-vectorizer.h (enum stmt_vec_info_type): Add - condition_vec_info_type. - (vectorizable_condition): New. + * tree-vect-analyze.c (vect_analyze_operations): Check + vectorizable codition. + * tree-vect-transform.c (vect_is_simple_cond): New function. + (vectorizable_condition): New function. + (vect_transform_stmt): Handle condition_vec_info_type. + * tree-vectorizer.h (enum stmt_vec_info_type): Add + condition_vec_info_type. + (vectorizable_condition): New. 2005-04-11 Geoffrey Keating diff --git a/gcc/builtin-types.def b/gcc/builtin-types.def index d9b87df309fc..4b5f35351e49 100644 --- a/gcc/builtin-types.def +++ b/gcc/builtin-types.def @@ -60,6 +60,7 @@ Software Foundation, 59 Temple Place - Suite 330, Boston, MA the type pointed to. */ DEF_PRIMITIVE_TYPE (BT_VOID, void_type_node) +DEF_PRIMITIVE_TYPE (BT_BOOL, boolean_type_node) DEF_PRIMITIVE_TYPE (BT_INT, integer_type_node) DEF_PRIMITIVE_TYPE (BT_UINT, unsigned_type_node) DEF_PRIMITIVE_TYPE (BT_LONG, long_integer_type_node) @@ -79,6 +80,10 @@ DEF_PRIMITIVE_TYPE (BT_COMPLEX_LONGDOUBLE, complex_long_double_type_node) DEF_PRIMITIVE_TYPE (BT_PTR, ptr_type_node) DEF_PRIMITIVE_TYPE (BT_FILEPTR, fileptr_type_node) DEF_PRIMITIVE_TYPE (BT_CONST_PTR, const_ptr_type_node) +DEF_PRIMITIVE_TYPE (BT_VOLATILE_PTR, + build_pointer_type + (build_qualified_type (void_type_node, + TYPE_QUAL_VOLATILE))) DEF_PRIMITIVE_TYPE (BT_PTRMODE, (*lang_hooks.types.type_for_mode)(ptr_mode, 0)) DEF_PRIMITIVE_TYPE (BT_INT_PTR, integer_ptr_type_node) DEF_PRIMITIVE_TYPE (BT_FLOAT_PTR, float_ptr_type_node) @@ -94,6 +99,11 @@ DEF_PRIMITIVE_TYPE (BT_CONST_STRING, const_string_type_node) DEF_PRIMITIVE_TYPE (BT_VALIST_REF, va_list_ref_type_node) DEF_PRIMITIVE_TYPE (BT_VALIST_ARG, va_list_arg_type_node) +DEF_PRIMITIVE_TYPE (BT_I1, builtin_type_for_size (BITS_PER_UNIT*1, 1)) +DEF_PRIMITIVE_TYPE (BT_I2, builtin_type_for_size (BITS_PER_UNIT*2, 1)) +DEF_PRIMITIVE_TYPE (BT_I4, builtin_type_for_size (BITS_PER_UNIT*4, 1)) +DEF_PRIMITIVE_TYPE (BT_I8, builtin_type_for_size (BITS_PER_UNIT*8, 1)) + DEF_POINTER_TYPE (BT_PTR_CONST_STRING, BT_CONST_STRING) DEF_FUNCTION_TYPE_0 (BT_FN_VOID, BT_VOID) @@ -160,6 +170,7 @@ DEF_FUNCTION_TYPE_1 (BT_FN_STRING_CONST_STRING, BT_STRING, BT_CONST_STRING) DEF_FUNCTION_TYPE_1 (BT_FN_WORD_PTR, BT_WORD, BT_PTR) DEF_FUNCTION_TYPE_1 (BT_FN_INT_WINT, BT_INT, BT_WINT) DEF_FUNCTION_TYPE_1 (BT_FN_WINT_WINT, BT_WINT, BT_WINT) +DEF_FUNCTION_TYPE_1 (BT_FN_VOID_VPTR, BT_VOID, BT_VOLATILE_PTR) DEF_FUNCTION_TYPE_2 (BT_FN_VOID_PTR_INT, BT_VOID, BT_PTR, BT_INT) DEF_FUNCTION_TYPE_2 (BT_FN_STRING_STRING_CONST_STRING, @@ -241,6 +252,10 @@ DEF_FUNCTION_TYPE_2 (BT_FN_COMPLEX_LONGDOUBLE_COMPLEX_LONGDOUBLE_COMPLEX_LONGDOU DEF_FUNCTION_TYPE_2 (BT_FN_VOID_PTR_PTR, BT_VOID, BT_PTR, BT_PTR) DEF_FUNCTION_TYPE_2 (BT_FN_INT_CONST_STRING_PTR_CONST_STRING, BT_INT, BT_CONST_STRING, BT_PTR_CONST_STRING) +DEF_FUNCTION_TYPE_2 (BT_FN_I1_VPTR_I1, BT_I1, BT_VOLATILE_PTR, BT_I1) +DEF_FUNCTION_TYPE_2 (BT_FN_I2_VPTR_I2, BT_I2, BT_VOLATILE_PTR, BT_I2) +DEF_FUNCTION_TYPE_2 (BT_FN_I4_VPTR_I4, BT_I4, BT_VOLATILE_PTR, BT_I4) +DEF_FUNCTION_TYPE_2 (BT_FN_I8_VPTR_I8, BT_I8, BT_VOLATILE_PTR, BT_I8) DEF_FUNCTION_TYPE_3 (BT_FN_STRING_STRING_CONST_STRING_SIZE, BT_STRING, BT_STRING, BT_CONST_STRING, BT_SIZE) @@ -285,6 +300,18 @@ DEF_FUNCTION_TYPE_3 (BT_FN_VOID_LONGDOUBLE_LONGDOUBLEPTR_LONGDOUBLEPTR, DEF_FUNCTION_TYPE_3 (BT_FN_VOID_PTR_PTR_PTR, BT_VOID, BT_PTR, BT_PTR, BT_PTR) DEF_FUNCTION_TYPE_3 (BT_FN_INT_CONST_STRING_PTR_CONST_STRING_PTR_CONST_STRING, BT_INT, BT_CONST_STRING, BT_PTR_CONST_STRING, BT_PTR_CONST_STRING) +DEF_FUNCTION_TYPE_3 (BT_FN_BOOL_VPTR_I1_I1, BT_BOOL, BT_VOLATILE_PTR, + BT_I1, BT_I1) +DEF_FUNCTION_TYPE_3 (BT_FN_BOOL_VPTR_I2_I2, BT_BOOL, BT_VOLATILE_PTR, + BT_I2, BT_I2) +DEF_FUNCTION_TYPE_3 (BT_FN_BOOL_VPTR_I4_I4, BT_BOOL, BT_VOLATILE_PTR, + BT_I4, BT_I4) +DEF_FUNCTION_TYPE_3 (BT_FN_BOOL_VPTR_I8_I8, BT_BOOL, BT_VOLATILE_PTR, + BT_I8, BT_I8) +DEF_FUNCTION_TYPE_3 (BT_FN_I1_VPTR_I1_I1, BT_I1, BT_VOLATILE_PTR, BT_I1, BT_I1) +DEF_FUNCTION_TYPE_3 (BT_FN_I2_VPTR_I2_I2, BT_I2, BT_VOLATILE_PTR, BT_I2, BT_I2) +DEF_FUNCTION_TYPE_3 (BT_FN_I4_VPTR_I4_I4, BT_I4, BT_VOLATILE_PTR, BT_I4, BT_I4) +DEF_FUNCTION_TYPE_3 (BT_FN_I8_VPTR_I8_I8, BT_I8, BT_VOLATILE_PTR, BT_I8, BT_I8) DEF_FUNCTION_TYPE_4 (BT_FN_SIZE_CONST_PTR_SIZE_SIZE_FILEPTR, BT_SIZE, BT_CONST_PTR, BT_SIZE, BT_SIZE, BT_FILEPTR) diff --git a/gcc/builtins.c b/gcc/builtins.c index b7ddb7474951..1a54a06854b2 100644 --- a/gcc/builtins.c +++ b/gcc/builtins.c @@ -48,9 +48,6 @@ Software Foundation, 59 Temple Place - Suite 330, Boston, MA #include "basic-block.h" #include "tree-mudflap.h" -#define CALLED_AS_BUILT_IN(NODE) \ - (!strncmp (IDENTIFIER_POINTER (DECL_NAME (NODE)), "__builtin_", 10)) - #ifndef PAD_VARARGS_DOWN #define PAD_VARARGS_DOWN BYTES_BIG_ENDIAN #endif @@ -191,6 +188,19 @@ static tree fold_builtin_strspn (tree); static tree fold_builtin_strcspn (tree); static tree fold_builtin_sprintf (tree, int); +/* Return true if NODE should be considered for inline expansion regardless + of the optimization level. This means whenever a function is invoked with + its "internal" name, which normally contains the prefix "__builtin". */ + +static bool called_as_built_in (tree node) +{ + const char *name = IDENTIFIER_POINTER (DECL_NAME (node)); + if (strncmp (name, "__builtin_", 10) == 0) + return true; + if (strncmp (name, "__sync_", 7) == 0) + return true; + return false; +} /* Return the alignment in bits of EXP, a pointer valued expression. But don't return more than MAX_ALIGN no matter what. @@ -5225,6 +5235,166 @@ expand_builtin_fork_or_exec (tree fn, tree arglist, rtx target, int ignore) return expand_call (call, target, ignore); } + + +/* Expand the __sync_xxx_and_fetch and __sync_fetch_and_xxx intrinsics. + ARGLIST is the operands list to the function. CODE is the rtx code + that corresponds to the arithmetic or logical operation from the name; + an exception here is that NOT actually means NAND. TARGET is an optional + place for us to store the results; AFTER is true if this is the + fetch_and_xxx form. IGNORE is true if we don't actually care about + the result of the operation at all. */ + +static rtx +expand_builtin_sync_operation (tree arglist, enum rtx_code code, bool after, + rtx target, bool ignore) +{ + enum machine_mode mode; + rtx addr, val, mem; + + /* Expand the operands. */ + addr = expand_expr (TREE_VALUE (arglist), NULL, Pmode, EXPAND_SUM); + mode = TYPE_MODE (TREE_TYPE (TREE_TYPE (TREE_VALUE (arglist)))); + + arglist = TREE_CHAIN (arglist); + val = expand_expr (TREE_VALUE (arglist), NULL, mode, EXPAND_NORMAL); + + /* Note that we explicitly do not want any alias information for this + memory, so that we kill all other live memories. Otherwise we don't + satisfy the full barrier semantics of the intrinsic. */ + mem = validize_mem (gen_rtx_MEM (mode, addr)); + MEM_VOLATILE_P (mem) = 1; + + if (ignore) + return expand_sync_operation (mem, val, code); + else + return expand_sync_fetch_operation (mem, val, code, after, target); +} + +/* Expand the __sync_val_compare_and_swap and __sync_bool_compare_and_swap + intrinsics. ARGLIST is the operands list to the function. IS_BOOL is + true if this is the boolean form. TARGET is a place for us to store the + results; this is NOT optional if IS_BOOL is true. */ + +static rtx +expand_builtin_compare_and_swap (tree arglist, bool is_bool, rtx target) +{ + enum machine_mode mode; + rtx addr, old_val, new_val, mem; + + /* Expand the operands. */ + addr = expand_expr (TREE_VALUE (arglist), NULL, Pmode, EXPAND_SUM); + mode = TYPE_MODE (TREE_TYPE (TREE_TYPE (TREE_VALUE (arglist)))); + + arglist = TREE_CHAIN (arglist); + old_val = expand_expr (TREE_VALUE (arglist), NULL, mode, EXPAND_NORMAL); + + arglist = TREE_CHAIN (arglist); + new_val = expand_expr (TREE_VALUE (arglist), NULL, mode, EXPAND_NORMAL); + + /* Note that we explicitly do not want any alias information for this + memory, so that we kill all other live memories. Otherwise we don't + satisfy the full barrier semantics of the intrinsic. */ + mem = validize_mem (gen_rtx_MEM (mode, addr)); + MEM_VOLATILE_P (mem) = 1; + + if (is_bool) + return expand_bool_compare_and_swap (mem, old_val, new_val, target); + else + return expand_val_compare_and_swap (mem, old_val, new_val, target); +} + +/* Expand the __sync_lock_test_and_set intrinsic. Note that the most + general form is actually an atomic exchange, and some targets only + support a reduced form with the second argument being a constant 1. + ARGLIST is the operands list to the function; TARGET is an optional + place for us to store the results. */ + +static rtx +expand_builtin_lock_test_and_set (tree arglist, rtx target) +{ + enum machine_mode mode; + rtx addr, val, mem; + + /* Expand the operands. */ + addr = expand_expr (TREE_VALUE (arglist), NULL, Pmode, EXPAND_NORMAL); + mode = TYPE_MODE (TREE_TYPE (TREE_TYPE (TREE_VALUE (arglist)))); + + arglist = TREE_CHAIN (arglist); + val = expand_expr (TREE_VALUE (arglist), NULL, mode, EXPAND_NORMAL); + + /* Note that we explicitly do not want any alias information for this + memory, so that we kill all other live memories. Otherwise we don't + satisfy the barrier semantics of the intrinsic. */ + mem = validize_mem (gen_rtx_MEM (mode, addr)); + MEM_VOLATILE_P (mem) = 1; + + return expand_sync_lock_test_and_set (mem, val, target); +} + +/* Expand the __sync_synchronize intrinsic. */ + +static void +expand_builtin_synchronize (void) +{ + rtx body; + +#ifdef HAVE_memory_barrier + if (HAVE_memory_barrier) + { + emit_insn (gen_memory_barrier ()); + return; + } +#endif + + /* If no explicit memory barrier instruction is available, create an empty + asm stmt that will prevent compiler movement across the barrier. */ + body = gen_rtx_ASM_INPUT (VOIDmode, ""); + MEM_VOLATILE_P (body) = 1; + emit_insn (body); +} + +/* Expand the __sync_lock_release intrinsic. ARGLIST is the operands list + to the function. */ + +static void +expand_builtin_lock_release (tree arglist) +{ + enum machine_mode mode; + enum insn_code icode; + rtx addr, val, mem, insn; + + /* Expand the operands. */ + addr = expand_expr (TREE_VALUE (arglist), NULL, Pmode, EXPAND_NORMAL); + mode = TYPE_MODE (TREE_TYPE (TREE_TYPE (TREE_VALUE (arglist)))); + val = const0_rtx; + + /* Note that we explicitly do not want any alias information for this + memory, so that we kill all other live memories. Otherwise we don't + satisfy the barrier semantics of the intrinsic. */ + mem = validize_mem (gen_rtx_MEM (mode, addr)); + MEM_VOLATILE_P (mem) = 1; + + /* If there is an explicit operation in the md file, use it. */ + icode = sync_lock_release[mode]; + if (icode != CODE_FOR_nothing) + { + if (!insn_data[icode].operand[1].predicate (val, mode)) + val = force_reg (mode, val); + + insn = GEN_FCN (icode) (mem, val); + if (insn) + { + emit_insn (insn); + return; + } + } + + /* Otherwise we can implement this operation by emitting a barrier + followed by a store of zero. */ + expand_builtin_synchronize (); + emit_move_insn (mem, val); +} /* Expand an expression EXP that calls a built-in function, with result going to TARGET if that's convenient @@ -5247,7 +5417,7 @@ expand_builtin (tree exp, rtx target, rtx subtarget, enum machine_mode mode, /* When not optimizing, generate calls to library functions for a certain set of builtins. */ if (!optimize - && !CALLED_AS_BUILT_IN (fndecl) + && !called_as_built_in (fndecl) && DECL_ASSEMBLER_NAME_SET_P (fndecl) && fcode != BUILT_IN_ALLOCA) return expand_call (exp, target, ignore); @@ -5881,6 +6051,166 @@ expand_builtin (tree exp, rtx target, rtx subtarget, enum machine_mode mode, return target; break; + case BUILT_IN_FETCH_AND_ADD_1: + case BUILT_IN_FETCH_AND_ADD_2: + case BUILT_IN_FETCH_AND_ADD_4: + case BUILT_IN_FETCH_AND_ADD_8: + target = expand_builtin_sync_operation (arglist, PLUS, + false, target, ignore); + if (target) + return target; + break; + + case BUILT_IN_FETCH_AND_SUB_1: + case BUILT_IN_FETCH_AND_SUB_2: + case BUILT_IN_FETCH_AND_SUB_4: + case BUILT_IN_FETCH_AND_SUB_8: + target = expand_builtin_sync_operation (arglist, MINUS, + false, target, ignore); + if (target) + return target; + break; + + case BUILT_IN_FETCH_AND_OR_1: + case BUILT_IN_FETCH_AND_OR_2: + case BUILT_IN_FETCH_AND_OR_4: + case BUILT_IN_FETCH_AND_OR_8: + target = expand_builtin_sync_operation (arglist, IOR, + false, target, ignore); + if (target) + return target; + break; + + case BUILT_IN_FETCH_AND_AND_1: + case BUILT_IN_FETCH_AND_AND_2: + case BUILT_IN_FETCH_AND_AND_4: + case BUILT_IN_FETCH_AND_AND_8: + target = expand_builtin_sync_operation (arglist, AND, + false, target, ignore); + if (target) + return target; + break; + + case BUILT_IN_FETCH_AND_XOR_1: + case BUILT_IN_FETCH_AND_XOR_2: + case BUILT_IN_FETCH_AND_XOR_4: + case BUILT_IN_FETCH_AND_XOR_8: + target = expand_builtin_sync_operation (arglist, XOR, + false, target, ignore); + if (target) + return target; + break; + + case BUILT_IN_FETCH_AND_NAND_1: + case BUILT_IN_FETCH_AND_NAND_2: + case BUILT_IN_FETCH_AND_NAND_4: + case BUILT_IN_FETCH_AND_NAND_8: + target = expand_builtin_sync_operation (arglist, NOT, + false, target, ignore); + if (target) + return target; + break; + + case BUILT_IN_ADD_AND_FETCH_1: + case BUILT_IN_ADD_AND_FETCH_2: + case BUILT_IN_ADD_AND_FETCH_4: + case BUILT_IN_ADD_AND_FETCH_8: + target = expand_builtin_sync_operation (arglist, PLUS, + true, target, ignore); + if (target) + return target; + break; + + case BUILT_IN_SUB_AND_FETCH_1: + case BUILT_IN_SUB_AND_FETCH_2: + case BUILT_IN_SUB_AND_FETCH_4: + case BUILT_IN_SUB_AND_FETCH_8: + target = expand_builtin_sync_operation (arglist, MINUS, + true, target, ignore); + if (target) + return target; + break; + + case BUILT_IN_OR_AND_FETCH_1: + case BUILT_IN_OR_AND_FETCH_2: + case BUILT_IN_OR_AND_FETCH_4: + case BUILT_IN_OR_AND_FETCH_8: + target = expand_builtin_sync_operation (arglist, IOR, + true, target, ignore); + if (target) + return target; + break; + + case BUILT_IN_AND_AND_FETCH_1: + case BUILT_IN_AND_AND_FETCH_2: + case BUILT_IN_AND_AND_FETCH_4: + case BUILT_IN_AND_AND_FETCH_8: + target = expand_builtin_sync_operation (arglist, AND, + true, target, ignore); + if (target) + return target; + break; + + case BUILT_IN_XOR_AND_FETCH_1: + case BUILT_IN_XOR_AND_FETCH_2: + case BUILT_IN_XOR_AND_FETCH_4: + case BUILT_IN_XOR_AND_FETCH_8: + target = expand_builtin_sync_operation (arglist, XOR, + true, target, ignore); + if (target) + return target; + break; + + case BUILT_IN_NAND_AND_FETCH_1: + case BUILT_IN_NAND_AND_FETCH_2: + case BUILT_IN_NAND_AND_FETCH_4: + case BUILT_IN_NAND_AND_FETCH_8: + target = expand_builtin_sync_operation (arglist, NOT, + true, target, ignore); + if (target) + return target; + break; + + case BUILT_IN_BOOL_COMPARE_AND_SWAP_1: + case BUILT_IN_BOOL_COMPARE_AND_SWAP_2: + case BUILT_IN_BOOL_COMPARE_AND_SWAP_4: + case BUILT_IN_BOOL_COMPARE_AND_SWAP_8: + if (!target || !register_operand (target, mode)) + target = gen_reg_rtx (mode); + target = expand_builtin_compare_and_swap (arglist, true, target); + if (target) + return target; + break; + + case BUILT_IN_VAL_COMPARE_AND_SWAP_1: + case BUILT_IN_VAL_COMPARE_AND_SWAP_2: + case BUILT_IN_VAL_COMPARE_AND_SWAP_4: + case BUILT_IN_VAL_COMPARE_AND_SWAP_8: + target = expand_builtin_compare_and_swap (arglist, false, target); + if (target) + return target; + break; + + case BUILT_IN_LOCK_TEST_AND_SET_1: + case BUILT_IN_LOCK_TEST_AND_SET_2: + case BUILT_IN_LOCK_TEST_AND_SET_4: + case BUILT_IN_LOCK_TEST_AND_SET_8: + target = expand_builtin_lock_test_and_set (arglist, target); + if (target) + return target; + break; + + case BUILT_IN_LOCK_RELEASE_1: + case BUILT_IN_LOCK_RELEASE_2: + case BUILT_IN_LOCK_RELEASE_4: + case BUILT_IN_LOCK_RELEASE_8: + expand_builtin_lock_release (arglist); + return const0_rtx; + + case BUILT_IN_SYNCHRONIZE: + expand_builtin_synchronize (); + return const0_rtx; + default: /* just do library call, if unknown builtin */ break; } diff --git a/gcc/builtins.def b/gcc/builtins.def index 51d87ca362e4..134bf98a1290 100644 --- a/gcc/builtins.def +++ b/gcc/builtins.def @@ -71,6 +71,12 @@ Software Foundation, 59 Temple Place - Suite 330, Boston, MA DEF_BUILTIN (ENUM, "__builtin_" NAME, BUILT_IN_NORMAL, TYPE, BT_LAST, \ false, false, false, ATTRS, true, true) +/* Like DEF_GCC_BUILTIN, except we don't prepend "__builtin_". */ +#undef DEF_SYNC_BUILTIN +#define DEF_SYNC_BUILTIN(ENUM, NAME, TYPE, ATTRS) \ + DEF_BUILTIN (ENUM, NAME, BUILT_IN_NORMAL, TYPE, BT_LAST, \ + false, false, false, ATTRS, true, true) + /* A library builtin (like __builtin_strchr) is a builtin equivalent of an ANSI/ISO standard library function. In addition to the `__builtin' version, we will create an ordinary version (e.g, @@ -657,3 +663,196 @@ DEF_BUILTIN_STUB (BUILT_IN_STACK_RESTORE, "__builtin_stack_restore") /* Profiling hooks. */ DEF_BUILTIN_STUB (BUILT_IN_PROFILE_FUNC_ENTER, "profile_func_enter") DEF_BUILTIN_STUB (BUILT_IN_PROFILE_FUNC_EXIT, "profile_func_exit") + +/* Synchronization Primitives. The "_N" version is the one that the user + is supposed to be using. It's overloaded, and is resolved to one of the + "_1" through "_8" versions, plus some extra casts. */ + +DEF_SYNC_BUILTIN (BUILT_IN_FETCH_AND_ADD_N, "__sync_fetch_and_add", + BT_FN_VOID_VAR, ATTR_NOTHROW_LIST) +DEF_SYNC_BUILTIN (BUILT_IN_FETCH_AND_ADD_1, "__sync_fetch_and_add_1", + BT_FN_I1_VPTR_I1, ATTR_NOTHROW_LIST) +DEF_SYNC_BUILTIN (BUILT_IN_FETCH_AND_ADD_2, "__sync_fetch_and_add_2", + BT_FN_I2_VPTR_I2, ATTR_NOTHROW_LIST) +DEF_SYNC_BUILTIN (BUILT_IN_FETCH_AND_ADD_4, "__sync_fetch_and_add_4", + BT_FN_I4_VPTR_I4, ATTR_NOTHROW_LIST) +DEF_SYNC_BUILTIN (BUILT_IN_FETCH_AND_ADD_8, "__sync_fetch_and_add_8", + BT_FN_I8_VPTR_I8, ATTR_NOTHROW_LIST) + +DEF_SYNC_BUILTIN (BUILT_IN_FETCH_AND_SUB_N, "__sync_fetch_and_sub", + BT_FN_VOID_VAR, ATTR_NOTHROW_LIST) +DEF_SYNC_BUILTIN (BUILT_IN_FETCH_AND_SUB_1, "__sync_fetch_and_sub_1", + BT_FN_I1_VPTR_I1, ATTR_NOTHROW_LIST) +DEF_SYNC_BUILTIN (BUILT_IN_FETCH_AND_SUB_2, "__sync_fetch_and_sub_2", + BT_FN_I2_VPTR_I2, ATTR_NOTHROW_LIST) +DEF_SYNC_BUILTIN (BUILT_IN_FETCH_AND_SUB_4, "__sync_fetch_and_sub_4", + BT_FN_I4_VPTR_I4, ATTR_NOTHROW_LIST) +DEF_SYNC_BUILTIN (BUILT_IN_FETCH_AND_SUB_8, "__sync_fetch_and_sub_8", + BT_FN_I8_VPTR_I8, ATTR_NOTHROW_LIST) + +DEF_SYNC_BUILTIN (BUILT_IN_FETCH_AND_OR_N, "__sync_fetch_and_or", + BT_FN_VOID_VAR, ATTR_NOTHROW_LIST) +DEF_SYNC_BUILTIN (BUILT_IN_FETCH_AND_OR_1, "__sync_fetch_and_or_1", + BT_FN_I1_VPTR_I1, ATTR_NOTHROW_LIST) +DEF_SYNC_BUILTIN (BUILT_IN_FETCH_AND_OR_2, "__sync_fetch_and_or_2", + BT_FN_I2_VPTR_I2, ATTR_NOTHROW_LIST) +DEF_SYNC_BUILTIN (BUILT_IN_FETCH_AND_OR_4, "__sync_fetch_and_or_4", + BT_FN_I4_VPTR_I4, ATTR_NOTHROW_LIST) +DEF_SYNC_BUILTIN (BUILT_IN_FETCH_AND_OR_8, "__sync_fetch_and_or_8", + BT_FN_I8_VPTR_I8, ATTR_NOTHROW_LIST) + +DEF_SYNC_BUILTIN (BUILT_IN_FETCH_AND_AND_N, "__sync_fetch_and_and", + BT_FN_VOID_VAR, ATTR_NOTHROW_LIST) +DEF_SYNC_BUILTIN (BUILT_IN_FETCH_AND_AND_1, "__sync_fetch_and_and_1", + BT_FN_I1_VPTR_I1, ATTR_NOTHROW_LIST) +DEF_SYNC_BUILTIN (BUILT_IN_FETCH_AND_AND_2, "__sync_fetch_and_and_2", + BT_FN_I2_VPTR_I2, ATTR_NOTHROW_LIST) +DEF_SYNC_BUILTIN (BUILT_IN_FETCH_AND_AND_4, "__sync_fetch_and_and_4", + BT_FN_I4_VPTR_I4, ATTR_NOTHROW_LIST) +DEF_SYNC_BUILTIN (BUILT_IN_FETCH_AND_AND_8, "__sync_fetch_and_and_8", + BT_FN_I8_VPTR_I8, ATTR_NOTHROW_LIST) + +DEF_SYNC_BUILTIN (BUILT_IN_FETCH_AND_XOR_N, "__sync_fetch_and_xor", + BT_FN_VOID_VAR, ATTR_NOTHROW_LIST) +DEF_SYNC_BUILTIN (BUILT_IN_FETCH_AND_XOR_1, "__sync_fetch_and_xor_1", + BT_FN_I1_VPTR_I1, ATTR_NOTHROW_LIST) +DEF_SYNC_BUILTIN (BUILT_IN_FETCH_AND_XOR_2, "__sync_fetch_and_xor_2", + BT_FN_I2_VPTR_I2, ATTR_NOTHROW_LIST) +DEF_SYNC_BUILTIN (BUILT_IN_FETCH_AND_XOR_4, "__sync_fetch_and_xor_4", + BT_FN_I4_VPTR_I4, ATTR_NOTHROW_LIST) +DEF_SYNC_BUILTIN (BUILT_IN_FETCH_AND_XOR_8, "__sync_fetch_and_xor_8", + BT_FN_I8_VPTR_I8, ATTR_NOTHROW_LIST) + +DEF_SYNC_BUILTIN (BUILT_IN_FETCH_AND_NAND_N, "__sync_fetch_and_nand", + BT_FN_VOID_VAR, ATTR_NOTHROW_LIST) +DEF_SYNC_BUILTIN (BUILT_IN_FETCH_AND_NAND_1, "__sync_fetch_and_nand_1", + BT_FN_I1_VPTR_I1, ATTR_NOTHROW_LIST) +DEF_SYNC_BUILTIN (BUILT_IN_FETCH_AND_NAND_2, "__sync_fetch_and_nand_2", + BT_FN_I2_VPTR_I2, ATTR_NOTHROW_LIST) +DEF_SYNC_BUILTIN (BUILT_IN_FETCH_AND_NAND_4, "__sync_fetch_and_nand_4", + BT_FN_I4_VPTR_I4, ATTR_NOTHROW_LIST) +DEF_SYNC_BUILTIN (BUILT_IN_FETCH_AND_NAND_8, "__sync_fetch_and_nand_8", + BT_FN_I8_VPTR_I8, ATTR_NOTHROW_LIST) + +DEF_SYNC_BUILTIN (BUILT_IN_ADD_AND_FETCH_N, "__sync_add_and_fetch", + BT_FN_VOID_VAR, ATTR_NOTHROW_LIST) +DEF_SYNC_BUILTIN (BUILT_IN_ADD_AND_FETCH_1, "__sync_add_and_fetch_1", + BT_FN_I1_VPTR_I1, ATTR_NOTHROW_LIST) +DEF_SYNC_BUILTIN (BUILT_IN_ADD_AND_FETCH_2, "__sync_add_and_fetch_2", + BT_FN_I2_VPTR_I2, ATTR_NOTHROW_LIST) +DEF_SYNC_BUILTIN (BUILT_IN_ADD_AND_FETCH_4, "__sync_add_and_fetch_4", + BT_FN_I4_VPTR_I4, ATTR_NOTHROW_LIST) +DEF_SYNC_BUILTIN (BUILT_IN_ADD_AND_FETCH_8, "__sync_add_and_fetch_8", + BT_FN_I8_VPTR_I8, ATTR_NOTHROW_LIST) + +DEF_SYNC_BUILTIN (BUILT_IN_SUB_AND_FETCH_N, "__sync_sub_and_fetch", + BT_FN_VOID_VAR, ATTR_NOTHROW_LIST) +DEF_SYNC_BUILTIN (BUILT_IN_SUB_AND_FETCH_1, "__sync_sub_and_fetch_1", + BT_FN_I1_VPTR_I1, ATTR_NOTHROW_LIST) +DEF_SYNC_BUILTIN (BUILT_IN_SUB_AND_FETCH_2, "__sync_sub_and_fetch_2", + BT_FN_I2_VPTR_I2, ATTR_NOTHROW_LIST) +DEF_SYNC_BUILTIN (BUILT_IN_SUB_AND_FETCH_4, "__sync_sub_and_fetch_4", + BT_FN_I4_VPTR_I4, ATTR_NOTHROW_LIST) +DEF_SYNC_BUILTIN (BUILT_IN_SUB_AND_FETCH_8, "__sync_sub_and_fetch_8", + BT_FN_I8_VPTR_I8, ATTR_NOTHROW_LIST) + +DEF_SYNC_BUILTIN (BUILT_IN_OR_AND_FETCH_N, "__sync_or_and_fetch", + BT_FN_VOID_VAR, ATTR_NOTHROW_LIST) +DEF_SYNC_BUILTIN (BUILT_IN_OR_AND_FETCH_1, "__sync_or_and_fetch_1", + BT_FN_I1_VPTR_I1, ATTR_NOTHROW_LIST) +DEF_SYNC_BUILTIN (BUILT_IN_OR_AND_FETCH_2, "__sync_or_and_fetch_2", + BT_FN_I2_VPTR_I2, ATTR_NOTHROW_LIST) +DEF_SYNC_BUILTIN (BUILT_IN_OR_AND_FETCH_4, "__sync_or_and_fetch_4", + BT_FN_I4_VPTR_I4, ATTR_NOTHROW_LIST) +DEF_SYNC_BUILTIN (BUILT_IN_OR_AND_FETCH_8, "__sync_or_and_fetch_8", + BT_FN_I8_VPTR_I8, ATTR_NOTHROW_LIST) + +DEF_SYNC_BUILTIN (BUILT_IN_AND_AND_FETCH_N, "__sync_and_and_fetch", + BT_FN_VOID_VAR, ATTR_NOTHROW_LIST) +DEF_SYNC_BUILTIN (BUILT_IN_AND_AND_FETCH_1, "__sync_and_and_fetch_1", + BT_FN_I1_VPTR_I1, ATTR_NOTHROW_LIST) +DEF_SYNC_BUILTIN (BUILT_IN_AND_AND_FETCH_2, "__sync_and_and_fetch_2", + BT_FN_I2_VPTR_I2, ATTR_NOTHROW_LIST) +DEF_SYNC_BUILTIN (BUILT_IN_AND_AND_FETCH_4, "__sync_and_and_fetch_4", + BT_FN_I4_VPTR_I4, ATTR_NOTHROW_LIST) +DEF_SYNC_BUILTIN (BUILT_IN_AND_AND_FETCH_8, "__sync_and_and_fetch_8", + BT_FN_I8_VPTR_I8, ATTR_NOTHROW_LIST) + +DEF_SYNC_BUILTIN (BUILT_IN_XOR_AND_FETCH_N, "__sync_xor_and_fetch", + BT_FN_VOID_VAR, ATTR_NOTHROW_LIST) +DEF_SYNC_BUILTIN (BUILT_IN_XOR_AND_FETCH_1, "__sync_xor_and_fetch_1", + BT_FN_I1_VPTR_I1, ATTR_NOTHROW_LIST) +DEF_SYNC_BUILTIN (BUILT_IN_XOR_AND_FETCH_2, "__sync_xor_and_fetch_2", + BT_FN_I2_VPTR_I2, ATTR_NOTHROW_LIST) +DEF_SYNC_BUILTIN (BUILT_IN_XOR_AND_FETCH_4, "__sync_xor_and_fetch_4", + BT_FN_I4_VPTR_I4, ATTR_NOTHROW_LIST) +DEF_SYNC_BUILTIN (BUILT_IN_XOR_AND_FETCH_8, "__sync_xor_and_fetch_8", + BT_FN_I8_VPTR_I8, ATTR_NOTHROW_LIST) + +DEF_SYNC_BUILTIN (BUILT_IN_NAND_AND_FETCH_N, "__sync_nand_and_fetch", + BT_FN_VOID_VAR, ATTR_NOTHROW_LIST) +DEF_SYNC_BUILTIN (BUILT_IN_NAND_AND_FETCH_1, "__sync_nand_and_fetch_1", + BT_FN_I1_VPTR_I1, ATTR_NOTHROW_LIST) +DEF_SYNC_BUILTIN (BUILT_IN_NAND_AND_FETCH_2, "__sync_nand_and_fetch_2", + BT_FN_I2_VPTR_I2, ATTR_NOTHROW_LIST) +DEF_SYNC_BUILTIN (BUILT_IN_NAND_AND_FETCH_4, "__sync_nand_and_fetch_4", + BT_FN_I4_VPTR_I4, ATTR_NOTHROW_LIST) +DEF_SYNC_BUILTIN (BUILT_IN_NAND_AND_FETCH_8, "__sync_nand_and_fetch_8", + BT_FN_I8_VPTR_I8, ATTR_NOTHROW_LIST) + +DEF_SYNC_BUILTIN (BUILT_IN_BOOL_COMPARE_AND_SWAP_N, + "__sync_bool_compare_and_swap", + BT_FN_VOID_VAR, ATTR_NOTHROW_LIST) +DEF_SYNC_BUILTIN (BUILT_IN_BOOL_COMPARE_AND_SWAP_1, + "__sync_bool_compare_and_swap_1", + BT_FN_BOOL_VPTR_I1_I1, ATTR_NOTHROW_LIST) +DEF_SYNC_BUILTIN (BUILT_IN_BOOL_COMPARE_AND_SWAP_2, + "__sync_bool_compare_and_swap_2", + BT_FN_BOOL_VPTR_I2_I2, ATTR_NOTHROW_LIST) +DEF_SYNC_BUILTIN (BUILT_IN_BOOL_COMPARE_AND_SWAP_4, + "__sync_bool_compare_and_swap_4", + BT_FN_BOOL_VPTR_I4_I4, ATTR_NOTHROW_LIST) +DEF_SYNC_BUILTIN (BUILT_IN_BOOL_COMPARE_AND_SWAP_8, + "__sync_bool_compare_and_swap_8", + BT_FN_BOOL_VPTR_I8_I8, ATTR_NOTHROW_LIST) + +DEF_SYNC_BUILTIN (BUILT_IN_VAL_COMPARE_AND_SWAP_N, + "__sync_val_compare_and_swap", + BT_FN_VOID_VAR, ATTR_NOTHROW_LIST) +DEF_SYNC_BUILTIN (BUILT_IN_VAL_COMPARE_AND_SWAP_1, + "__sync_val_compare_and_swap_1", + BT_FN_I1_VPTR_I1_I1, ATTR_NOTHROW_LIST) +DEF_SYNC_BUILTIN (BUILT_IN_VAL_COMPARE_AND_SWAP_2, + "__sync_val_compare_and_swap_2", + BT_FN_I2_VPTR_I2_I2, ATTR_NOTHROW_LIST) +DEF_SYNC_BUILTIN (BUILT_IN_VAL_COMPARE_AND_SWAP_4, + "__sync_val_compare_and_swap_4", + BT_FN_I4_VPTR_I4_I4, ATTR_NOTHROW_LIST) +DEF_SYNC_BUILTIN (BUILT_IN_VAL_COMPARE_AND_SWAP_8, + "__sync_val_compare_and_swap_8", + BT_FN_I8_VPTR_I8_I8, ATTR_NOTHROW_LIST) + +DEF_SYNC_BUILTIN (BUILT_IN_LOCK_TEST_AND_SET_N, "__sync_lock_test_and_set", + BT_FN_VOID_VAR, ATTR_NOTHROW_LIST) +DEF_SYNC_BUILTIN (BUILT_IN_LOCK_TEST_AND_SET_1, "__sync_lock_test_and_set_1", + BT_FN_I1_VPTR_I1, ATTR_NOTHROW_LIST) +DEF_SYNC_BUILTIN (BUILT_IN_LOCK_TEST_AND_SET_2, "__sync_lock_test_and_set_2", + BT_FN_I2_VPTR_I2, ATTR_NOTHROW_LIST) +DEF_SYNC_BUILTIN (BUILT_IN_LOCK_TEST_AND_SET_4, "__sync_lock_test_and_set_4", + BT_FN_I4_VPTR_I4, ATTR_NOTHROW_LIST) +DEF_SYNC_BUILTIN (BUILT_IN_LOCK_TEST_AND_SET_8, "__sync_lock_test_and_set_8", + BT_FN_I8_VPTR_I8, ATTR_NOTHROW_LIST) + +DEF_SYNC_BUILTIN (BUILT_IN_LOCK_RELEASE_N, "__sync_lock_release", + BT_FN_VOID_VAR, ATTR_NOTHROW_LIST) +DEF_SYNC_BUILTIN (BUILT_IN_LOCK_RELEASE_1, "__sync_lock_release_1", + BT_FN_VOID_VPTR, ATTR_NOTHROW_LIST) +DEF_SYNC_BUILTIN (BUILT_IN_LOCK_RELEASE_2, "__sync_lock_release_2", + BT_FN_VOID_VPTR, ATTR_NOTHROW_LIST) +DEF_SYNC_BUILTIN (BUILT_IN_LOCK_RELEASE_4, "__sync_lock_release_4", + BT_FN_VOID_VPTR, ATTR_NOTHROW_LIST) +DEF_SYNC_BUILTIN (BUILT_IN_LOCK_RELEASE_8, "__sync_lock_release_8", + BT_FN_VOID_VPTR, ATTR_NOTHROW_LIST) + +DEF_SYNC_BUILTIN (BUILT_IN_SYNCHRONIZE, "__sync_synchronize", + BT_FN_VOID, ATTR_NOTHROW_LIST) diff --git a/gcc/c-common.c b/gcc/c-common.c index 050a1eb97914..da6be4e21a6a 100644 --- a/gcc/c-common.c +++ b/gcc/c-common.c @@ -3242,8 +3242,9 @@ c_common_nodes_and_builtins (void) { \ tree decl; \ \ - gcc_assert (!strncmp (NAME, "__builtin_", \ - strlen ("__builtin_"))); \ + gcc_assert ((!BOTH_P && !FALLBACK_P) \ + || !strncmp (NAME, "__builtin_", \ + strlen ("__builtin_"))); \ \ if (!BOTH_P) \ decl = lang_hooks.builtin_function (NAME, builtin_types[TYPE], \ @@ -5836,4 +5837,170 @@ complete_array_type (tree *ptype, tree initial_value, bool do_default) return failure; } + +/* Used to help initialize the builtin-types.def table. When a type of + the correct size doesn't exist, use error_mark_node instead of NULL. + The later results in segfaults even when a decl using the type doesn't + get invoked. */ + +tree +builtin_type_for_size (int size, bool unsignedp) +{ + tree type = lang_hooks.types.type_for_size (size, unsignedp); + return type ? type : error_mark_node; +} + +/* A helper function for resolve_overloaded_builtin in resolving the + overloaded __sync_ builtins. Returns a positive power of 2 if the + first operand of PARAMS is a pointer to a supported data type. + Returns 0 if an error is encountered. */ + +static int +sync_resolve_size (tree function, tree params) +{ + tree type; + int size; + + if (params == NULL) + { + error ("too few arguments to function %qE", function); + return 0; + } + + type = TREE_TYPE (TREE_VALUE (params)); + if (TREE_CODE (type) != POINTER_TYPE) + goto incompatible; + + type = TREE_TYPE (type); + if (!INTEGRAL_TYPE_P (type) && !POINTER_TYPE_P (type)) + goto incompatible; + + size = tree_low_cst (TYPE_SIZE_UNIT (type), 1); + if (size == 1 || size == 2 || size == 4 || size == 8) + return size; + + incompatible: + error ("incompatible type for argument %d of %qE", 1, function); + return 0; +} + +/* A helper function for resolve_overloaded_builtin. Adds casts to + PARAMS to make arguments match up with those of FUNCTION. Drops + the variadic arguments at the end. Returns false if some error + was encountered; true on success. */ + +static bool +sync_resolve_params (tree orig_function, tree function, tree params) +{ + tree arg_types = TYPE_ARG_TYPES (TREE_TYPE (function)); + tree ptype; + int number; + + /* We've declared the implementation functions to use "volatile void *" + as the pointer parameter, so we shouldn't get any complaints from the + call to check_function_arguments what ever type the user used. */ + arg_types = TREE_CHAIN (arg_types); + ptype = TREE_TYPE (TREE_TYPE (TREE_VALUE (params))); + number = 2; + + /* For the rest of the values, we need to cast these to FTYPE, so that we + don't get warnings for passing pointer types, etc. */ + while (arg_types != void_list_node) + { + tree val; + + params = TREE_CHAIN (params); + if (params == NULL) + { + error ("too few arguments to function %qE", orig_function); + return false; + } + + /* ??? Ideally for the first conversion we'd use convert_for_assignment + so that we get warnings for anything that doesn't match the pointer + type. This isn't portable across the C and C++ front ends atm. */ + val = TREE_VALUE (params); + val = convert (ptype, val); + val = convert (TREE_VALUE (arg_types), val); + TREE_VALUE (params) = val; + + arg_types = TREE_CHAIN (arg_types); + number++; + } + + /* The definition of these primitives is variadic, with the remaining + being "an optional list of variables protected by the memory barrier". + No clue what that's supposed to mean, precisely, but we consider all + call-clobbered variables to be protected so we're safe. */ + TREE_CHAIN (params) = NULL; + + return true; +} + +/* A helper function for resolve_overloaded_builtin. Adds a cast to + RESULT to make it match the type of the first pointer argument in + PARAMS. */ + +static tree +sync_resolve_return (tree params, tree result) +{ + tree ptype = TREE_TYPE (TREE_TYPE (TREE_VALUE (params))); + return convert (ptype, result); +} + +/* Some builtin functions are placeholders for other expressions. This + function should be called immediately after parsing the call expression + before surrounding code has committed to the type of the expression. + + FUNCTION is the DECL that has been invoked; it is known to be a builtin. + PARAMS is the argument list for the call. The return value is non-null + when expansion is complete, and null if normal processing should + continue. */ + +tree +resolve_overloaded_builtin (tree function, tree params) +{ + enum built_in_function orig_code = DECL_FUNCTION_CODE (function); + switch (orig_code) + { + case BUILT_IN_FETCH_AND_ADD_N: + case BUILT_IN_FETCH_AND_SUB_N: + case BUILT_IN_FETCH_AND_OR_N: + case BUILT_IN_FETCH_AND_AND_N: + case BUILT_IN_FETCH_AND_XOR_N: + case BUILT_IN_FETCH_AND_NAND_N: + case BUILT_IN_ADD_AND_FETCH_N: + case BUILT_IN_SUB_AND_FETCH_N: + case BUILT_IN_OR_AND_FETCH_N: + case BUILT_IN_AND_AND_FETCH_N: + case BUILT_IN_XOR_AND_FETCH_N: + case BUILT_IN_NAND_AND_FETCH_N: + case BUILT_IN_BOOL_COMPARE_AND_SWAP_N: + case BUILT_IN_VAL_COMPARE_AND_SWAP_N: + case BUILT_IN_LOCK_TEST_AND_SET_N: + case BUILT_IN_LOCK_RELEASE_N: + { + int n = sync_resolve_size (function, params); + tree new_function, result; + + if (n == 0) + return error_mark_node; + + new_function = built_in_decls[orig_code + exact_log2 (n) + 1]; + if (!sync_resolve_params (function, new_function, params)) + return error_mark_node; + + result = build_function_call (new_function, params); + if (orig_code != BUILT_IN_BOOL_COMPARE_AND_SWAP_N + && orig_code != BUILT_IN_LOCK_RELEASE_N) + result = sync_resolve_return (params, result); + + return result; + } + + default: + return NULL; + } +} + #include "gt-c-common.h" diff --git a/gcc/c-common.h b/gcc/c-common.h index d2b9a4a9cbdb..cf6e88b9fa65 100644 --- a/gcc/c-common.h +++ b/gcc/c-common.h @@ -798,6 +798,8 @@ extern void c_do_switch_warnings (splay_tree, location_t, tree, tree); extern tree build_function_call (tree, tree); +extern tree resolve_overloaded_builtin (tree, tree); + extern tree finish_label_address_expr (tree); /* Same function prototype, but the C and C++ front ends have @@ -860,6 +862,8 @@ extern void lvalue_error (enum lvalue_use); extern int complete_array_type (tree *, tree, bool); +extern tree builtin_type_for_size (int, bool); + /* In c-gimplify.c */ extern void c_genericize (tree); extern int c_gimplify_expr (tree *, tree *, tree *); diff --git a/gcc/c-typeck.c b/gcc/c-typeck.c index 8c3996613e30..9c82bbe60195 100644 --- a/gcc/c-typeck.c +++ b/gcc/c-typeck.c @@ -1978,6 +1978,13 @@ build_function_call (tree function, tree params) /* Convert anything with function type to a pointer-to-function. */ if (TREE_CODE (function) == FUNCTION_DECL) { + if (DECL_BUILT_IN_CLASS (function) == BUILT_IN_NORMAL) + { + tem = resolve_overloaded_builtin (function, params); + if (tem) + return tem; + } + name = DECL_NAME (function); /* Differs from default_conversion by not setting TREE_ADDRESSABLE diff --git a/gcc/doc/extend.texi b/gcc/doc/extend.texi index 23b046d1bd1f..5fc85a45abbb 100644 --- a/gcc/doc/extend.texi +++ b/gcc/doc/extend.texi @@ -70,6 +70,7 @@ extensions, accepted by GCC in C89 mode and in C++. * Return Address:: Getting the return or frame address of a function. * Vector Extensions:: Using vector instructions through built-in functions. * Offsetof:: Special syntax for implementing @code{offsetof}. +* Atomic Builtins:: Built-in functions for atomic memory access. * Other Builtins:: Other built-in functions. * Target Builtins:: Built-in functions specific to particular targets. * Target Format Checks:: Format checks specific to particular targets. @@ -4581,6 +4582,133 @@ is a suitable definition of the @code{offsetof} macro. In C++, @var{type} may be dependent. In either case, @var{member} may consist of a single identifier, or a sequence of member accesses and array references. +@node Atomic Builtins +@section Built-in functions for atomic memory access + +The following builtins are intended to be compatible with those described +in the @cite{Intel Itanium Processor-specific Application Binary Interface}, +section 7.4. As such, they depart from the normal GCC practice of using +the ``__builtin_'' prefix, and further that they are overloaded such that +they work on multiple types. + +The definition given in the Intel documentation allows only for the use of +the types @code{int}, @code{long}, @code{long long} as well as their unsigned +counterparts. GCC will allow any integral scalar or pointer type that is +1, 2, 4 or 8 bytes in length. + +Not all operations are supported by all target processors. If a particular +operation cannot be implemented on the target processor, a warning will be +generated and a call an external function will be generated. The external +function will carry the same name as the builtin, with an additional suffix +@samp{_@var{n}} where @var{n} is the size of the data type. + +@c ??? Should we have a mechanism to suppress this warning? This is almost +@c useful for implementing the operation under the control of an external +@c mutex. + +In most cases, these builtins are considered a @dfn{full barrier}. That is, +no memory operand will be moved across the operation, either forward or +backward. Further, instructions will be issued as necessary to prevent the +processor from speculating loads across the operation and from queuing stores +after the operation. + +All of the routines are are described in the Intel documentation to take +``an optional list of variables protected by the memory barrier''. It's +not clear what is meant by that; it could mean that @emph{only} the +following variables are protected, or it could mean that these variables +should in addition be protected. At present GCC ignores this list and +protects all variables which are globally accessible. If in the future +we make some use of this list, an empty list will continue to mean all +globally accessible variables. + +@table @code +@item @var{type} __sync_fetch_and_add (@var{type} *ptr, @var{type} value, ...) +@itemx @var{type} __sync_fetch_and_sub (@var{type} *ptr, @var{type} value, ...) +@itemx @var{type} __sync_fetch_and_or (@var{type} *ptr, @var{type} value, ...) +@itemx @var{type} __sync_fetch_and_and (@var{type} *ptr, @var{type} value, ...) +@itemx @var{type} __sync_fetch_and_xor (@var{type} *ptr, @var{type} value, ...) +@itemx @var{type} __sync_fetch_and_nand (@var{type} *ptr, @var{type} value, ...) +@findex __sync_fetch_and_add +@findex __sync_fetch_and_sub +@findex __sync_fetch_and_or +@findex __sync_fetch_and_and +@findex __sync_fetch_and_xor +@findex __sync_fetch_and_nand +These builtins perform the operation suggested by the name, and +returns the value that had previously been in memory. That is, + +@smallexample +@{ tmp = *ptr; *ptr @var{op}= value; return tmp; @} +@end smallexample + +The builtin @code{__sync_fetch_and_nand} could be implemented by +@code{__sync_fetch_and_and(ptr, ~value)}. + +@item @var{type} __sync_add_and_fetch (@var{type} *ptr, @var{type} value, ...) +@itemx @var{type} __sync_sub_and_fetch (@var{type} *ptr, @var{type} value, ...) +@itemx @var{type} __sync_or_and_fetch (@var{type} *ptr, @var{type} value, ...) +@itemx @var{type} __sync_and_and_fetch (@var{type} *ptr, @var{type} value, ...) +@itemx @var{type} __sync_xor_and_fetch (@var{type} *ptr, @var{type} value, ...) +@itemx @var{type} __sync_nand_and_fetch (@var{type} *ptr, @var{type} value, ...) +@findex __sync_add_and_fetch +@findex __sync_sub_and_fetch +@findex __sync_or_and_fetch +@findex __sync_and_and_fetch +@findex __sync_xor_and_fetch +@findex __sync_nand_and_fetch +These builtins perform the operation suggested by the name, and +return the new value. That is, + +@smallexample +@{ *ptr @var{op}= value; return *ptr; @} +@end smallexample + +@item bool __sync_bool_compare_and_swap (@var{type} *ptr, @var{type} oldval @var{type} newval, ...) +@itemx @var{type} __sync_val_compare_and_swap (@var{type} *ptr, @var{type} oldval @var{type} newval, ...) +@findex __sync_bool_compare_and_swap +@findex __sync_val_compare_and_swap +These builtins perform an atomic compare and swap. That is, if the current +value of @code{*@var{ptr}} is @var{oldval}, then write @var{newval} into +@code{*@var{ptr}}. + +The ``bool'' version returns true if the comparison is successful and +@var{newval} was written. The ``val'' version returns the contents +of @code{*@var{ptr}} after the operation. + +@item __sync_synchronize (...) +@findex __sync_synchronize +This builtin issues a full memory barrier. + +@item @var{type} __sync_lock_test_and_set (@var{type} *ptr, @var{type} value, ...) +@findex __sync_lock_test_and_set +This builtin, as described by Intel, is not a traditional test-and-set +operation, but rather an atomic exchange operation. It writes @var{value} +into @code{*@var{ptr}}, and returns the previous contents of +@code{*@var{ptr}}. + +Many targets have only minimal support for such locks, and do not support +a full exchange operation. In this case, a target may support reduced +functionality here by which the @emph{only} valid value to store is the +immediate constant 1. The exact value actually stored in @code{*@var{ptr}} +is implementation defined. + +This builtin is not a full barrier, but rather an @dfn{acquire barrier}. +This means that references after the builtin cannot move to (or be +speculated to) before the builtin, but previous memory stores may not +be globally visible yet, and previous memory loads may not yet be +satisfied. + +@item void __sync_lock_release (@var{type} *ptr, ...) +@findex __sync_lock_release +This builtin releases the lock acquired by @code{__sync_lock_test_and_set}. +Normally this means writing the constant 0 to @code{*@var{ptr}}. + +This builtin is not a full barrier, but rather a @dfn{release barrier}. +This means that all previous memory stores are globally visible, and all +previous memory loads have been satisfied, but following memory reads +are not prevented from being speculated to before the barrier. +@end table + @node Other Builtins @section Other built-in functions provided by GCC @cindex built-in functions diff --git a/gcc/doc/md.texi b/gcc/doc/md.texi index 460023091f4e..4deacfb67d7c 100644 --- a/gcc/doc/md.texi +++ b/gcc/doc/md.texi @@ -3936,6 +3936,140 @@ respectively, a low or moderate degree of temporal locality. Targets that do not support write prefetches or locality hints can ignore the values of operands 1 and 2. +@cindex @code{memory_barrier} instruction pattern +@item @samp{memory_barrier} + +If the target memory model is not fully synchronous, then this pattern +should be defined to an instruction that orders both loads and stores +before the instruction with respect to loads and stores after the instruction. +This pattern has no operands. + +@cindex @code{sync_compare_and_swap@var{mode}} instruction pattern +@item @samp{sync_compare_and_swap@var{mode}} + +This pattern, if defined, emits code for an atomic compare-and-swap +operation. Operand 1 is the memory on which the atomic operation is +performed. Operand 2 is the ``old'' value to be compared against the +current contents of the memory location. Operand 3 is the ``new'' value +to store in the memory if the compare succeeds. Operand 0 is the result +of the operation; it should contain the current contents of the memory +after the operation. If the compare succeeds, this should obviously be +a copy of operand 3. + +This pattern must show that both operand 0 and operand 1 are modified. + +This pattern must issue any memory barrier instructions such that the +pattern as a whole acts as a full barrier. + +@cindex @code{sync_compare_and_swap_cc@var{mode}} instruction pattern +@item @samp{sync_compare_and_swap_cc@var{mode}} + +This pattern is just like @code{sync_compare_and_swap@var{mode}}, except +it should act as if compare part of the compare-and-swap were issued via +@code{cmp@var{m}}. This comparison will only be used with @code{EQ} and +@code{NE} branches and @code{setcc} operations. + +Some targets do expose the success or failure of the compare-and-swap +operation via the status flags. Ideally we wouldn't need a separate +named pattern in order to take advantage of this, but the combine pass +does not handle patterns with multiple sets, which is required by +definition for @code{sync_compare_and_swap@var{mode}}. + +@cindex @code{sync_add@var{mode}} instruction pattern +@cindex @code{sync_sub@var{mode}} instruction pattern +@cindex @code{sync_ior@var{mode}} instruction pattern +@cindex @code{sync_and@var{mode}} instruction pattern +@cindex @code{sync_xor@var{mode}} instruction pattern +@cindex @code{sync_nand@var{mode}} instruction pattern +@item @samp{sync_add@var{mode}}, @samp{sync_sub@var{mode}} +@itemx @samp{sync_ior@var{mode}}, @samp{sync_and@var{mode}} +@itemx @samp{sync_xor@var{mode}}, @samp{sync_nand@var{mode}} + +These patterns emit code for an atomic operation on memory. +Operand 0 is the memory on which the atomic operation is performed. +Operand 1 is the second operand to the binary operator. + +The ``nand'' operation is @code{op0 & ~op1}. + +This pattern must issue any memory barrier instructions such that the +pattern as a whole acts as a full barrier. + +If these patterns are not defined, the operation will be constructed +from a compare-and-swap operation, if defined. + +@cindex @code{sync_old_add@var{mode}} instruction pattern +@cindex @code{sync_old_sub@var{mode}} instruction pattern +@cindex @code{sync_old_ior@var{mode}} instruction pattern +@cindex @code{sync_old_and@var{mode}} instruction pattern +@cindex @code{sync_old_xor@var{mode}} instruction pattern +@cindex @code{sync_old_nand@var{mode}} instruction pattern +@item @samp{sync_old_add@var{mode}}, @samp{sync_old_sub@var{mode}} +@itemx @samp{sync_old_ior@var{mode}}, @samp{sync_old_and@var{mode}} +@itemx @samp{sync_old_xor@var{mode}}, @samp{sync_old_nand@var{mode}} + +These patterns are emit code for an atomic operation on memory, +and return the value that the memory contained before the operation. +Operand 0 is the result value, operand 1 is the memory on which the +atomic operation is performed, and operand 2 is the second operand +to the binary operator. + +This pattern must issue any memory barrier instructions such that the +pattern as a whole acts as a full barrier. + +If these patterns are not defined, the operation will be constructed +from a compare-and-swap operation, if defined. + +@cindex @code{sync_new_add@var{mode}} instruction pattern +@cindex @code{sync_new_sub@var{mode}} instruction pattern +@cindex @code{sync_new_ior@var{mode}} instruction pattern +@cindex @code{sync_new_and@var{mode}} instruction pattern +@cindex @code{sync_new_xor@var{mode}} instruction pattern +@cindex @code{sync_new_nand@var{mode}} instruction pattern +@item @samp{sync_new_add@var{mode}}, @samp{sync_new_sub@var{mode}} +@itemx @samp{sync_new_ior@var{mode}}, @samp{sync_new_and@var{mode}} +@itemx @samp{sync_new_xor@var{mode}}, @samp{sync_new_nand@var{mode}} + +These patterns are like their @code{sync_old_@var{op}} counterparts, +except that they return the value that exists in the memory location +after the operation, rather than before the operation. + +@cindex @code{sync_lock_test_and_set@var{mode}} instruction pattern +@item @samp{sync_lock_test_and_set@var{mode}} + +This pattern takes two forms, based on the capabilities of the target. +In either case, operand 0 is the result of the operand, operand 1 is +the memory on which the atomic operation is performed, and operand 2 +is the value to set in the lock. + +In the ideal case, this operation is an atomic exchange operation, in +which the previous value in memory operand is copied into the result +operand, and the value operand is stored in the memory operand. + +For less capable targets, any value operand that is not the constant 1 +should be rejected with @code{FAIL}. In this case the target may use +an atomic test-and-set bit operation. The result operand should contain +1 if the bit was previously set and 0 if the bit was previously clear. +The true contents of the memory operand are implementation defined. + +This pattern must issue any memory barrier instructions such that the +pattern as a whole acts as an acquire barrier. + +If this pattern is not defined, the operation will be constructed from +a compare-and-swap operation, if defined. + +@cindex @code{sync_lock_release@var{mode}} instruction pattern +@item @samp{sync_lock_release@var{mode}} + +This pattern, if defined, releases a lock set by +@code{sync_lock_test_and_set@var{mode}}. Operand 0 is the memory +that contains the lock. + +This pattern must issue any memory barrier instructions such that the +pattern as a whole acts as a release barrier. + +If this pattern is not defined, then a @code{memory_barrier} pattern +will be emitted, followed by a store of zero to the memory operand. + @end table @end ifset diff --git a/gcc/expr.c b/gcc/expr.c index c2a6e7db9a79..b334453efd59 100644 --- a/gcc/expr.c +++ b/gcc/expr.c @@ -208,6 +208,30 @@ enum insn_code clrmem_optab[NUM_MACHINE_MODES]; enum insn_code cmpstr_optab[NUM_MACHINE_MODES]; enum insn_code cmpmem_optab[NUM_MACHINE_MODES]; +/* Synchronization primitives. */ +enum insn_code sync_add_optab[NUM_MACHINE_MODES]; +enum insn_code sync_sub_optab[NUM_MACHINE_MODES]; +enum insn_code sync_ior_optab[NUM_MACHINE_MODES]; +enum insn_code sync_and_optab[NUM_MACHINE_MODES]; +enum insn_code sync_xor_optab[NUM_MACHINE_MODES]; +enum insn_code sync_nand_optab[NUM_MACHINE_MODES]; +enum insn_code sync_old_add_optab[NUM_MACHINE_MODES]; +enum insn_code sync_old_sub_optab[NUM_MACHINE_MODES]; +enum insn_code sync_old_ior_optab[NUM_MACHINE_MODES]; +enum insn_code sync_old_and_optab[NUM_MACHINE_MODES]; +enum insn_code sync_old_xor_optab[NUM_MACHINE_MODES]; +enum insn_code sync_old_nand_optab[NUM_MACHINE_MODES]; +enum insn_code sync_new_add_optab[NUM_MACHINE_MODES]; +enum insn_code sync_new_sub_optab[NUM_MACHINE_MODES]; +enum insn_code sync_new_ior_optab[NUM_MACHINE_MODES]; +enum insn_code sync_new_and_optab[NUM_MACHINE_MODES]; +enum insn_code sync_new_xor_optab[NUM_MACHINE_MODES]; +enum insn_code sync_new_nand_optab[NUM_MACHINE_MODES]; +enum insn_code sync_compare_and_swap[NUM_MACHINE_MODES]; +enum insn_code sync_compare_and_swap_cc[NUM_MACHINE_MODES]; +enum insn_code sync_lock_test_and_set[NUM_MACHINE_MODES]; +enum insn_code sync_lock_release[NUM_MACHINE_MODES]; + /* SLOW_UNALIGNED_ACCESS is nonzero if unaligned accesses are very slow. */ #ifndef SLOW_UNALIGNED_ACCESS diff --git a/gcc/expr.h b/gcc/expr.h index b624413e080f..f462dc84b472 100644 --- a/gcc/expr.h +++ b/gcc/expr.h @@ -310,6 +310,11 @@ int can_conditionally_move_p (enum machine_mode mode); rtx emit_conditional_add (rtx, enum rtx_code, rtx, rtx, enum machine_mode, rtx, rtx, enum machine_mode, int); +rtx expand_val_compare_and_swap (rtx, rtx, rtx, rtx); +rtx expand_bool_compare_and_swap (rtx, rtx, rtx, rtx); +rtx expand_sync_operation (rtx, rtx, enum rtx_code); +rtx expand_sync_fetch_operation (rtx, rtx, enum rtx_code, bool, rtx); +rtx expand_sync_lock_test_and_set (rtx, rtx, rtx); /* Functions from expmed.c: */ diff --git a/gcc/genopinit.c b/gcc/genopinit.c index da80f782789a..d58f88118123 100644 --- a/gcc/genopinit.c +++ b/gcc/genopinit.c @@ -171,12 +171,35 @@ static const char * const optabs[] = "clrmem_optab[$A] = CODE_FOR_$(clrmem$a$)", "cmpstr_optab[$A] = CODE_FOR_$(cmpstr$a$)", "cmpmem_optab[$A] = CODE_FOR_$(cmpmem$a$)", + "sync_add_optab[$A] = CODE_FOR_$(sync_add$I$a$)", + "sync_sub_optab[$A] = CODE_FOR_$(sync_sub$I$a$)", + "sync_ior_optab[$A] = CODE_FOR_$(sync_ior$I$a$)", + "sync_and_optab[$A] = CODE_FOR_$(sync_and$I$a$)", + "sync_xor_optab[$A] = CODE_FOR_$(sync_xor$I$a$)", + "sync_nand_optab[$A] = CODE_FOR_$(sync_nand$I$a$)", + "sync_old_add_optab[$A] = CODE_FOR_$(sync_old_add$I$a$)", + "sync_old_sub_optab[$A] = CODE_FOR_$(sync_old_sub$I$a$)", + "sync_old_ior_optab[$A] = CODE_FOR_$(sync_old_ior$I$a$)", + "sync_old_and_optab[$A] = CODE_FOR_$(sync_old_and$I$a$)", + "sync_old_xor_optab[$A] = CODE_FOR_$(sync_old_xor$I$a$)", + "sync_old_nand_optab[$A] = CODE_FOR_$(sync_old_nand$I$a$)", + "sync_new_add_optab[$A] = CODE_FOR_$(sync_new_add$I$a$)", + "sync_new_sub_optab[$A] = CODE_FOR_$(sync_new_sub$I$a$)", + "sync_new_ior_optab[$A] = CODE_FOR_$(sync_new_ior$I$a$)", + "sync_new_and_optab[$A] = CODE_FOR_$(sync_new_and$I$a$)", + "sync_new_xor_optab[$A] = CODE_FOR_$(sync_new_xor$I$a$)", + "sync_new_nand_optab[$A] = CODE_FOR_$(sync_new_nand$I$a$)", + "sync_compare_and_swap[$A] = CODE_FOR_$(sync_compare_and_swap$I$a$)", + "sync_compare_and_swap_cc[$A] = CODE_FOR_$(sync_compare_and_swap_cc$I$a$)", + "sync_lock_test_and_set[$A] = CODE_FOR_$(sync_lock_test_and_set$I$a$)", + "sync_lock_release[$A] = CODE_FOR_$(sync_lock_release$I$a$)", "vec_set_optab->handlers[$A].insn_code = CODE_FOR_$(vec_set$a$)", "vec_extract_optab->handlers[$A].insn_code = CODE_FOR_$(vec_extract$a$)", "vec_init_optab->handlers[$A].insn_code = CODE_FOR_$(vec_init$a$)", "vec_realign_load_optab->handlers[$A].insn_code = CODE_FOR_$(vec_realign_load_$a$)", "vcond_gen_code[$A] = CODE_FOR_$(vcond$a$)", - "vcondu_gen_code[$A] = CODE_FOR_$(vcondu$a$)" }; + "vcondu_gen_code[$A] = CODE_FOR_$(vcondu$a$)" +}; static void gen_insn (rtx); diff --git a/gcc/optabs.c b/gcc/optabs.c index 6835a9a76657..47eec4bbcf3b 100644 --- a/gcc/optabs.c +++ b/gcc/optabs.c @@ -5093,6 +5093,29 @@ init_optabs (void) cmpstr_optab[i] = CODE_FOR_nothing; cmpmem_optab[i] = CODE_FOR_nothing; + sync_add_optab[i] = CODE_FOR_nothing; + sync_sub_optab[i] = CODE_FOR_nothing; + sync_ior_optab[i] = CODE_FOR_nothing; + sync_and_optab[i] = CODE_FOR_nothing; + sync_xor_optab[i] = CODE_FOR_nothing; + sync_nand_optab[i] = CODE_FOR_nothing; + sync_old_add_optab[i] = CODE_FOR_nothing; + sync_old_sub_optab[i] = CODE_FOR_nothing; + sync_old_ior_optab[i] = CODE_FOR_nothing; + sync_old_and_optab[i] = CODE_FOR_nothing; + sync_old_xor_optab[i] = CODE_FOR_nothing; + sync_old_nand_optab[i] = CODE_FOR_nothing; + sync_new_add_optab[i] = CODE_FOR_nothing; + sync_new_sub_optab[i] = CODE_FOR_nothing; + sync_new_ior_optab[i] = CODE_FOR_nothing; + sync_new_and_optab[i] = CODE_FOR_nothing; + sync_new_xor_optab[i] = CODE_FOR_nothing; + sync_new_nand_optab[i] = CODE_FOR_nothing; + sync_compare_and_swap[i] = CODE_FOR_nothing; + sync_compare_and_swap_cc[i] = CODE_FOR_nothing; + sync_lock_test_and_set[i] = CODE_FOR_nothing; + sync_lock_release[i] = CODE_FOR_nothing; + #ifdef HAVE_SECONDARY_RELOADS reload_in_optab[i] = reload_out_optab[i] = CODE_FOR_nothing; #endif @@ -5486,4 +5509,517 @@ expand_vec_cond_expr (tree vec_cond_expr, rtx target) return target; } + + +/* This is an internal subroutine of the other compare_and_swap expanders. + MEM, OLD_VAL and NEW_VAL are as you'd expect for a compare-and-swap + operation. TARGET is an optional place to store the value result of + the operation. ICODE is the particular instruction to expand. Return + the result of the operation. */ + +static rtx +expand_val_compare_and_swap_1 (rtx mem, rtx old_val, rtx new_val, + rtx target, enum insn_code icode) +{ + enum machine_mode mode = GET_MODE (mem); + rtx insn; + + if (!target || !insn_data[icode].operand[0].predicate (target, mode)) + target = gen_reg_rtx (mode); + + if (GET_MODE (old_val) != VOIDmode && GET_MODE (old_val) != mode) + old_val = convert_modes (mode, GET_MODE (old_val), old_val, 1); + if (!insn_data[icode].operand[2].predicate (old_val, mode)) + old_val = force_reg (mode, old_val); + + if (GET_MODE (new_val) != VOIDmode && GET_MODE (new_val) != mode) + new_val = convert_modes (mode, GET_MODE (new_val), new_val, 1); + if (!insn_data[icode].operand[3].predicate (new_val, mode)) + new_val = force_reg (mode, new_val); + + insn = GEN_FCN (icode) (target, mem, old_val, new_val); + if (insn == NULL_RTX) + return NULL_RTX; + emit_insn (insn); + + return target; +} + +/* Expand a compare-and-swap operation and return its value. */ + +rtx +expand_val_compare_and_swap (rtx mem, rtx old_val, rtx new_val, rtx target) +{ + enum machine_mode mode = GET_MODE (mem); + enum insn_code icode = sync_compare_and_swap[mode]; + + if (icode == CODE_FOR_nothing) + return NULL_RTX; + + return expand_val_compare_and_swap_1 (mem, old_val, new_val, target, icode); +} + +/* Expand a compare-and-swap operation and store true into the result if + the operation was successful and false otherwise. Return the result. + Unlike other routines, TARGET is not optional. */ + +rtx +expand_bool_compare_and_swap (rtx mem, rtx old_val, rtx new_val, rtx target) +{ + enum machine_mode mode = GET_MODE (mem); + enum insn_code icode; + rtx subtarget, label0, label1; + + /* If the target supports a compare-and-swap pattern that simultaneously + sets some flag for success, then use it. Otherwise use the regular + compare-and-swap and follow that immediately with a compare insn. */ + icode = sync_compare_and_swap_cc[mode]; + switch (icode) + { + default: + subtarget = expand_val_compare_and_swap_1 (mem, old_val, new_val, + NULL_RTX, icode); + if (subtarget != NULL_RTX) + break; + + /* FALLTHRU */ + case CODE_FOR_nothing: + icode = sync_compare_and_swap[mode]; + if (icode == CODE_FOR_nothing) + return NULL_RTX; + + subtarget = expand_val_compare_and_swap_1 (mem, old_val, new_val, + NULL_RTX, icode); + if (subtarget == NULL_RTX) + return NULL_RTX; + + emit_cmp_insn (subtarget, new_val, EQ, const0_rtx, mode, true); + } + + /* If the target has a sane STORE_FLAG_VALUE, then go ahead and use a + setcc instruction from the beginning. We don't work too hard here, + but it's nice to not be stupid about initial code gen either. */ + if (STORE_FLAG_VALUE == 1) + { + icode = setcc_gen_code[EQ]; + if (icode != CODE_FOR_nothing) + { + enum machine_mode cmode = insn_data[icode].operand[0].mode; + rtx insn; + + subtarget = target; + if (!insn_data[icode].operand[0].predicate (target, cmode)) + subtarget = gen_reg_rtx (cmode); + + insn = GEN_FCN (icode) (subtarget); + if (insn) + { + emit_insn (insn); + if (GET_MODE (target) != GET_MODE (subtarget)) + { + convert_move (target, subtarget, 1); + subtarget = target; + } + return subtarget; + } + } + } + + /* Without an appropriate setcc instruction, use a set of branches to + get 1 and 0 stored into target. Presumably if the target has a + STORE_FLAG_VALUE that isn't 1, then this will get cleaned up by ifcvt. */ + + label0 = gen_label_rtx (); + label1 = gen_label_rtx (); + + emit_jump_insn (bcc_gen_fctn[EQ] (label0)); + emit_move_insn (target, const0_rtx); + emit_jump_insn (gen_jump (label1)); + emit_label (label0); + emit_move_insn (target, const1_rtx); + emit_label (label1); + + return target; +} + +/* This is a helper function for the other atomic operations. This function + emits a loop that contains SEQ that iterates until a compare-and-swap + operation at the end succeeds. MEM is the memory to be modified. SEQ is + a set of instructions that takes a value from OLD_REG as an input and + produces a value in NEW_REG as an output. Before SEQ, OLD_REG will be + set to the current contents of MEM. After SEQ, a compare-and-swap will + attempt to update MEM with NEW_REG. The function returns true when the + loop was generated successfully. */ + +static bool +expand_compare_and_swap_loop (rtx mem, rtx old_reg, rtx new_reg, rtx seq) +{ + enum machine_mode mode = GET_MODE (mem); + enum insn_code icode; + rtx label, subtarget; + + /* The loop we want to generate looks like + + old_reg = mem; + label: + seq; + old_reg = compare-and-swap(mem, old_reg, new_reg) + if (old_reg != new_reg) + goto label; + + Note that we only do the plain load from memory once. Subsequent + iterations use the value loaded by the compare-and-swap pattern. */ + + label = gen_label_rtx (); + + emit_move_insn (old_reg, mem); + emit_label (label); + if (seq) + emit_insn (seq); + + /* If the target supports a compare-and-swap pattern that simultaneously + sets some flag for success, then use it. Otherwise use the regular + compare-and-swap and follow that immediately with a compare insn. */ + icode = sync_compare_and_swap_cc[mode]; + switch (icode) + { + default: + subtarget = expand_val_compare_and_swap_1 (mem, old_reg, new_reg, + old_reg, icode); + if (subtarget != NULL_RTX) + break; + + /* FALLTHRU */ + case CODE_FOR_nothing: + icode = sync_compare_and_swap[mode]; + if (icode == CODE_FOR_nothing) + return false; + + subtarget = expand_val_compare_and_swap_1 (mem, old_reg, new_reg, + old_reg, icode); + if (subtarget == NULL_RTX) + return false; + + emit_cmp_insn (subtarget, new_reg, EQ, const0_rtx, mode, true); + } + + /* ??? Mark this jump predicted not taken? */ + emit_jump_insn (bcc_gen_fctn[NE] (label)); + + return true; +} + +/* This function generates the atomic operation MEM CODE= VAL. In this + case, we do not care about any resulting value. Returns NULL if we + cannot generate the operation. */ + +rtx +expand_sync_operation (rtx mem, rtx val, enum rtx_code code) +{ + enum machine_mode mode = GET_MODE (mem); + enum insn_code icode; + rtx insn; + + /* Look to see if the target supports the operation directly. */ + switch (code) + { + case PLUS: + icode = sync_add_optab[mode]; + break; + case IOR: + icode = sync_ior_optab[mode]; + break; + case XOR: + icode = sync_xor_optab[mode]; + break; + case AND: + icode = sync_and_optab[mode]; + break; + + case MINUS: + icode = sync_sub_optab[mode]; + if (icode == CODE_FOR_nothing) + { + icode = sync_add_optab[mode]; + if (icode != CODE_FOR_nothing) + { + val = expand_simple_unop (mode, NEG, val, NULL_RTX, 1); + code = PLUS; + } + } + break; + + case NOT: + icode = sync_nand_optab[mode]; + if (icode != CODE_FOR_nothing) + { + icode = sync_and_optab[mode]; + if (icode != CODE_FOR_nothing) + { + val = expand_simple_unop (mode, NOT, val, NULL_RTX, 1); + code = AND; + } + } + break; + + default: + gcc_unreachable (); + } + + /* Generate the direct operation, if present. */ + if (icode != CODE_FOR_nothing) + { + if (GET_MODE (val) != VOIDmode && GET_MODE (val) != mode) + val = convert_modes (mode, GET_MODE (val), val, 1); + if (!insn_data[icode].operand[1].predicate (val, mode)) + val = force_reg (mode, val); + + insn = GEN_FCN (icode) (mem, val); + if (insn) + { + emit_insn (insn); + return const0_rtx; + } + } + + /* Failing that, generate a compare-and-swap loop in which we perform the + operation with normal arithmetic instructions. */ + if (sync_compare_and_swap[mode] != CODE_FOR_nothing) + { + rtx t0 = gen_reg_rtx (mode), t1; + + start_sequence (); + + if (code == NOT) + { + val = expand_simple_unop (mode, NOT, val, NULL_RTX, true); + code = AND; + } + t1 = expand_simple_binop (mode, code, t0, val, NULL_RTX, + true, OPTAB_LIB_WIDEN); + + insn = get_insns (); + end_sequence (); + + if (t1 != NULL && expand_compare_and_swap_loop (mem, t0, t1, insn)) + return const0_rtx; + } + + return NULL_RTX; +} + +/* This function generates the atomic operation MEM CODE= VAL. In this + case, we do care about the resulting value: if AFTER is true then + return the value MEM holds after the operation, if AFTER is false + then return the value MEM holds before the operation. TARGET is an + optional place for the result value to be stored. */ + +rtx +expand_sync_fetch_operation (rtx mem, rtx val, enum rtx_code code, + bool after, rtx target) +{ + enum machine_mode mode = GET_MODE (mem); + enum insn_code old_code, new_code, icode; + bool compensate; + rtx insn; + + /* Look to see if the target supports the operation directly. */ + switch (code) + { + case PLUS: + old_code = sync_old_add_optab[mode]; + new_code = sync_new_add_optab[mode]; + break; + case IOR: + old_code = sync_old_ior_optab[mode]; + new_code = sync_new_ior_optab[mode]; + break; + case XOR: + old_code = sync_old_xor_optab[mode]; + new_code = sync_new_xor_optab[mode]; + break; + case AND: + old_code = sync_old_and_optab[mode]; + new_code = sync_new_and_optab[mode]; + break; + + case MINUS: + old_code = sync_old_sub_optab[mode]; + new_code = sync_new_sub_optab[mode]; + if (old_code == CODE_FOR_nothing && new_code == CODE_FOR_nothing) + { + old_code = sync_old_add_optab[mode]; + new_code = sync_new_add_optab[mode]; + if (old_code != CODE_FOR_nothing || new_code != CODE_FOR_nothing) + { + val = expand_simple_unop (mode, NEG, val, NULL_RTX, 1); + code = PLUS; + } + } + break; + + case NOT: + old_code = sync_old_nand_optab[mode]; + new_code = sync_new_nand_optab[mode]; + if (old_code == CODE_FOR_nothing && new_code == CODE_FOR_nothing) + { + old_code = sync_old_sub_optab[mode]; + new_code = sync_new_sub_optab[mode]; + if (old_code != CODE_FOR_nothing || new_code != CODE_FOR_nothing) + { + val = expand_simple_unop (mode, NOT, val, NULL_RTX, 1); + code = AND; + } + } + break; + + default: + gcc_unreachable (); + } + + /* If the target does supports the proper new/old operation, great. But + if we only support the opposite old/new operation, check to see if we + can compensate. In the case in which the old value is supported, then + we can always perform the operation again with normal arithmetic. In + the case in which the new value is supported, then we can only handle + this in the case the operation is reversible. */ + compensate = false; + if (after) + { + icode = new_code; + if (icode == CODE_FOR_nothing) + { + icode = old_code; + if (icode != CODE_FOR_nothing) + compensate = true; + } + } + else + { + icode = old_code; + if (icode == CODE_FOR_nothing + && (code == PLUS || code == MINUS || code == XOR)) + { + icode = new_code; + if (icode != CODE_FOR_nothing) + compensate = true; + } + } + + /* If we found something supported, great. */ + if (icode != CODE_FOR_nothing) + { + if (!target || !insn_data[icode].operand[0].predicate (target, mode)) + target = gen_reg_rtx (mode); + + if (GET_MODE (val) != VOIDmode && GET_MODE (val) != mode) + val = convert_modes (mode, GET_MODE (val), val, 1); + if (!insn_data[icode].operand[2].predicate (val, mode)) + val = force_reg (mode, val); + + insn = GEN_FCN (icode) (target, mem, val); + if (insn) + { + emit_insn (insn); + + /* If we need to compensate for using an operation with the + wrong return value, do so now. */ + if (compensate) + { + if (!after) + { + if (code == PLUS) + code = MINUS; + else if (code == MINUS) + code = PLUS; + } + target = expand_simple_binop (mode, code, target, val, NULL_RTX, + true, OPTAB_LIB_WIDEN); + } + + return target; + } + } + + /* Failing that, generate a compare-and-swap loop in which we perform the + operation with normal arithmetic instructions. */ + if (sync_compare_and_swap[mode] != CODE_FOR_nothing) + { + rtx t0 = gen_reg_rtx (mode), t1; + + if (!target || !register_operand (target, mode)) + target = gen_reg_rtx (mode); + + start_sequence (); + + if (code == NOT) + { + val = expand_simple_unop (mode, NOT, val, NULL_RTX, true); + code = AND; + } + if (!after) + emit_move_insn (target, t0); + t1 = expand_simple_binop (mode, code, t0, val, NULL_RTX, + true, OPTAB_LIB_WIDEN); + if (after) + emit_move_insn (target, t1); + + insn = get_insns (); + end_sequence (); + + if (t1 != NULL && expand_compare_and_swap_loop (mem, t0, t1, insn)) + return target; + } + + return NULL_RTX; +} + +/* This function expands a test-and-set operation. Ideally we atomically + store VAL in MEM and return the previous value in MEM. Some targets + may not support this operation and only support VAL with the constant 1; + in this case while the return value will be 0/1, but the exact value + stored in MEM is target defined. TARGET is an option place to stick + the return value. */ + +rtx +expand_sync_lock_test_and_set (rtx mem, rtx val, rtx target) +{ + enum machine_mode mode = GET_MODE (mem); + enum insn_code icode; + rtx insn; + + /* If the target supports the test-and-set directly, great. */ + icode = sync_lock_test_and_set[mode]; + if (icode != CODE_FOR_nothing) + { + if (!target || !insn_data[icode].operand[0].predicate (target, mode)) + target = gen_reg_rtx (mode); + + if (GET_MODE (val) != VOIDmode && GET_MODE (val) != mode) + val = convert_modes (mode, GET_MODE (val), val, 1); + if (!insn_data[icode].operand[2].predicate (val, mode)) + val = force_reg (mode, val); + + insn = GEN_FCN (icode) (target, mem, val); + if (insn) + { + emit_insn (insn); + return target; + } + } + + /* Otherwise, use a compare-and-swap loop for the exchange. */ + if (sync_compare_and_swap[mode] != CODE_FOR_nothing) + { + if (!target || !register_operand (target, mode)) + target = gen_reg_rtx (mode); + if (GET_MODE (val) != VOIDmode && GET_MODE (val) != mode) + val = convert_modes (mode, GET_MODE (val), val, 1); + if (expand_compare_and_swap_loop (mem, target, val, NULL_RTX)) + return target; + } + + return NULL_RTX; +} + #include "gt-optabs.h" diff --git a/gcc/optabs.h b/gcc/optabs.h index dea332e47569..1426e570fb96 100644 --- a/gcc/optabs.h +++ b/gcc/optabs.h @@ -432,6 +432,43 @@ extern enum insn_code clrmem_optab[NUM_MACHINE_MODES]; extern enum insn_code cmpstr_optab[NUM_MACHINE_MODES]; extern enum insn_code cmpmem_optab[NUM_MACHINE_MODES]; +/* Synchronization primitives. This first set is atomic operation for + which we don't care about the resulting value. */ +extern enum insn_code sync_add_optab[NUM_MACHINE_MODES]; +extern enum insn_code sync_sub_optab[NUM_MACHINE_MODES]; +extern enum insn_code sync_ior_optab[NUM_MACHINE_MODES]; +extern enum insn_code sync_and_optab[NUM_MACHINE_MODES]; +extern enum insn_code sync_xor_optab[NUM_MACHINE_MODES]; +extern enum insn_code sync_nand_optab[NUM_MACHINE_MODES]; + +/* This second set is atomic operations in which we return the value + that existed in memory before the operation. */ +extern enum insn_code sync_old_add_optab[NUM_MACHINE_MODES]; +extern enum insn_code sync_old_sub_optab[NUM_MACHINE_MODES]; +extern enum insn_code sync_old_ior_optab[NUM_MACHINE_MODES]; +extern enum insn_code sync_old_and_optab[NUM_MACHINE_MODES]; +extern enum insn_code sync_old_xor_optab[NUM_MACHINE_MODES]; +extern enum insn_code sync_old_nand_optab[NUM_MACHINE_MODES]; + +/* This third set is atomic operations in which we return the value + that resulted after performing the operation. */ +extern enum insn_code sync_new_add_optab[NUM_MACHINE_MODES]; +extern enum insn_code sync_new_sub_optab[NUM_MACHINE_MODES]; +extern enum insn_code sync_new_ior_optab[NUM_MACHINE_MODES]; +extern enum insn_code sync_new_and_optab[NUM_MACHINE_MODES]; +extern enum insn_code sync_new_xor_optab[NUM_MACHINE_MODES]; +extern enum insn_code sync_new_nand_optab[NUM_MACHINE_MODES]; + +/* Atomic compare and swap. */ +extern enum insn_code sync_compare_and_swap[NUM_MACHINE_MODES]; +extern enum insn_code sync_compare_and_swap_cc[NUM_MACHINE_MODES]; + +/* Atomic exchange with acquire semantics. */ +extern enum insn_code sync_lock_test_and_set[NUM_MACHINE_MODES]; + +/* Atomic clear with release semantics. */ +extern enum insn_code sync_lock_release[NUM_MACHINE_MODES]; + /* Define functions given in optabs.c. */ extern rtx expand_ternary_op (enum machine_mode mode, optab ternary_optab, diff --git a/gcc/testsuite/gcc.c-torture/compile/sync-1.c b/gcc/testsuite/gcc.c-torture/compile/sync-1.c new file mode 100644 index 000000000000..1703aaf267f2 --- /dev/null +++ b/gcc/testsuite/gcc.c-torture/compile/sync-1.c @@ -0,0 +1,276 @@ +/* Validate that each of the __sync builtins compiles. This won't + necessarily link, since the target might not support the builtin, + so this may result in external library calls. */ + +signed char sc; +unsigned char uc; +signed short ss; +unsigned short us; +signed int si; +unsigned int ui; +signed long sl; +unsigned long ul; +signed long long sll; +unsigned long long ull; +void *vp; +int *ip; +struct S { struct S *next; int x; } *sp; + +void test_op_ignore (void) +{ + (void) __sync_fetch_and_add (&sc, 1); + (void) __sync_fetch_and_add (&uc, 1); + (void) __sync_fetch_and_add (&ss, 1); + (void) __sync_fetch_and_add (&us, 1); + (void) __sync_fetch_and_add (&si, 1); + (void) __sync_fetch_and_add (&ui, 1); + (void) __sync_fetch_and_add (&sl, 1); + (void) __sync_fetch_and_add (&ul, 1); + (void) __sync_fetch_and_add (&sll, 1); + (void) __sync_fetch_and_add (&ull, 1); + + (void) __sync_fetch_and_sub (&sc, 1); + (void) __sync_fetch_and_sub (&uc, 1); + (void) __sync_fetch_and_sub (&ss, 1); + (void) __sync_fetch_and_sub (&us, 1); + (void) __sync_fetch_and_sub (&si, 1); + (void) __sync_fetch_and_sub (&ui, 1); + (void) __sync_fetch_and_sub (&sl, 1); + (void) __sync_fetch_and_sub (&ul, 1); + (void) __sync_fetch_and_sub (&sll, 1); + (void) __sync_fetch_and_sub (&ull, 1); + + (void) __sync_fetch_and_or (&sc, 1); + (void) __sync_fetch_and_or (&uc, 1); + (void) __sync_fetch_and_or (&ss, 1); + (void) __sync_fetch_and_or (&us, 1); + (void) __sync_fetch_and_or (&si, 1); + (void) __sync_fetch_and_or (&ui, 1); + (void) __sync_fetch_and_or (&sl, 1); + (void) __sync_fetch_and_or (&ul, 1); + (void) __sync_fetch_and_or (&sll, 1); + (void) __sync_fetch_and_or (&ull, 1); + + (void) __sync_fetch_and_xor (&sc, 1); + (void) __sync_fetch_and_xor (&uc, 1); + (void) __sync_fetch_and_xor (&ss, 1); + (void) __sync_fetch_and_xor (&us, 1); + (void) __sync_fetch_and_xor (&si, 1); + (void) __sync_fetch_and_xor (&ui, 1); + (void) __sync_fetch_and_xor (&sl, 1); + (void) __sync_fetch_and_xor (&ul, 1); + (void) __sync_fetch_and_xor (&sll, 1); + (void) __sync_fetch_and_xor (&ull, 1); + + (void) __sync_fetch_and_and (&sc, 1); + (void) __sync_fetch_and_and (&uc, 1); + (void) __sync_fetch_and_and (&ss, 1); + (void) __sync_fetch_and_and (&us, 1); + (void) __sync_fetch_and_and (&si, 1); + (void) __sync_fetch_and_and (&ui, 1); + (void) __sync_fetch_and_and (&sl, 1); + (void) __sync_fetch_and_and (&ul, 1); + (void) __sync_fetch_and_and (&sll, 1); + (void) __sync_fetch_and_and (&ull, 1); + + (void) __sync_fetch_and_nand (&sc, 1); + (void) __sync_fetch_and_nand (&uc, 1); + (void) __sync_fetch_and_nand (&ss, 1); + (void) __sync_fetch_and_nand (&us, 1); + (void) __sync_fetch_and_nand (&si, 1); + (void) __sync_fetch_and_nand (&ui, 1); + (void) __sync_fetch_and_nand (&sl, 1); + (void) __sync_fetch_and_nand (&ul, 1); + (void) __sync_fetch_and_nand (&sll, 1); + (void) __sync_fetch_and_nand (&ull, 1); +} + +void test_fetch_and_op (void) +{ + sc = __sync_fetch_and_add (&sc, 11); + uc = __sync_fetch_and_add (&uc, 11); + ss = __sync_fetch_and_add (&ss, 11); + us = __sync_fetch_and_add (&us, 11); + si = __sync_fetch_and_add (&si, 11); + ui = __sync_fetch_and_add (&ui, 11); + sl = __sync_fetch_and_add (&sl, 11); + ul = __sync_fetch_and_add (&ul, 11); + sll = __sync_fetch_and_add (&sll, 11); + ull = __sync_fetch_and_add (&ull, 11); + + sc = __sync_fetch_and_sub (&sc, 11); + uc = __sync_fetch_and_sub (&uc, 11); + ss = __sync_fetch_and_sub (&ss, 11); + us = __sync_fetch_and_sub (&us, 11); + si = __sync_fetch_and_sub (&si, 11); + ui = __sync_fetch_and_sub (&ui, 11); + sl = __sync_fetch_and_sub (&sl, 11); + ul = __sync_fetch_and_sub (&ul, 11); + sll = __sync_fetch_and_sub (&sll, 11); + ull = __sync_fetch_and_sub (&ull, 11); + + sc = __sync_fetch_and_or (&sc, 11); + uc = __sync_fetch_and_or (&uc, 11); + ss = __sync_fetch_and_or (&ss, 11); + us = __sync_fetch_and_or (&us, 11); + si = __sync_fetch_and_or (&si, 11); + ui = __sync_fetch_and_or (&ui, 11); + sl = __sync_fetch_and_or (&sl, 11); + ul = __sync_fetch_and_or (&ul, 11); + sll = __sync_fetch_and_or (&sll, 11); + ull = __sync_fetch_and_or (&ull, 11); + + sc = __sync_fetch_and_xor (&sc, 11); + uc = __sync_fetch_and_xor (&uc, 11); + ss = __sync_fetch_and_xor (&ss, 11); + us = __sync_fetch_and_xor (&us, 11); + si = __sync_fetch_and_xor (&si, 11); + ui = __sync_fetch_and_xor (&ui, 11); + sl = __sync_fetch_and_xor (&sl, 11); + ul = __sync_fetch_and_xor (&ul, 11); + sll = __sync_fetch_and_xor (&sll, 11); + ull = __sync_fetch_and_xor (&ull, 11); + + sc = __sync_fetch_and_and (&sc, 11); + uc = __sync_fetch_and_and (&uc, 11); + ss = __sync_fetch_and_and (&ss, 11); + us = __sync_fetch_and_and (&us, 11); + si = __sync_fetch_and_and (&si, 11); + ui = __sync_fetch_and_and (&ui, 11); + sl = __sync_fetch_and_and (&sl, 11); + ul = __sync_fetch_and_and (&ul, 11); + sll = __sync_fetch_and_and (&sll, 11); + ull = __sync_fetch_and_and (&ull, 11); + + sc = __sync_fetch_and_nand (&sc, 11); + uc = __sync_fetch_and_nand (&uc, 11); + ss = __sync_fetch_and_nand (&ss, 11); + us = __sync_fetch_and_nand (&us, 11); + si = __sync_fetch_and_nand (&si, 11); + ui = __sync_fetch_and_nand (&ui, 11); + sl = __sync_fetch_and_nand (&sl, 11); + ul = __sync_fetch_and_nand (&ul, 11); + sll = __sync_fetch_and_nand (&sll, 11); + ull = __sync_fetch_and_nand (&ull, 11); +} + +void test_op_and_fetch (void) +{ + sc = __sync_add_and_fetch (&sc, uc); + uc = __sync_add_and_fetch (&uc, uc); + ss = __sync_add_and_fetch (&ss, uc); + us = __sync_add_and_fetch (&us, uc); + si = __sync_add_and_fetch (&si, uc); + ui = __sync_add_and_fetch (&ui, uc); + sl = __sync_add_and_fetch (&sl, uc); + ul = __sync_add_and_fetch (&ul, uc); + sll = __sync_add_and_fetch (&sll, uc); + ull = __sync_add_and_fetch (&ull, uc); + + sc = __sync_sub_and_fetch (&sc, uc); + uc = __sync_sub_and_fetch (&uc, uc); + ss = __sync_sub_and_fetch (&ss, uc); + us = __sync_sub_and_fetch (&us, uc); + si = __sync_sub_and_fetch (&si, uc); + ui = __sync_sub_and_fetch (&ui, uc); + sl = __sync_sub_and_fetch (&sl, uc); + ul = __sync_sub_and_fetch (&ul, uc); + sll = __sync_sub_and_fetch (&sll, uc); + ull = __sync_sub_and_fetch (&ull, uc); + + sc = __sync_or_and_fetch (&sc, uc); + uc = __sync_or_and_fetch (&uc, uc); + ss = __sync_or_and_fetch (&ss, uc); + us = __sync_or_and_fetch (&us, uc); + si = __sync_or_and_fetch (&si, uc); + ui = __sync_or_and_fetch (&ui, uc); + sl = __sync_or_and_fetch (&sl, uc); + ul = __sync_or_and_fetch (&ul, uc); + sll = __sync_or_and_fetch (&sll, uc); + ull = __sync_or_and_fetch (&ull, uc); + + sc = __sync_xor_and_fetch (&sc, uc); + uc = __sync_xor_and_fetch (&uc, uc); + ss = __sync_xor_and_fetch (&ss, uc); + us = __sync_xor_and_fetch (&us, uc); + si = __sync_xor_and_fetch (&si, uc); + ui = __sync_xor_and_fetch (&ui, uc); + sl = __sync_xor_and_fetch (&sl, uc); + ul = __sync_xor_and_fetch (&ul, uc); + sll = __sync_xor_and_fetch (&sll, uc); + ull = __sync_xor_and_fetch (&ull, uc); + + sc = __sync_and_and_fetch (&sc, uc); + uc = __sync_and_and_fetch (&uc, uc); + ss = __sync_and_and_fetch (&ss, uc); + us = __sync_and_and_fetch (&us, uc); + si = __sync_and_and_fetch (&si, uc); + ui = __sync_and_and_fetch (&ui, uc); + sl = __sync_and_and_fetch (&sl, uc); + ul = __sync_and_and_fetch (&ul, uc); + sll = __sync_and_and_fetch (&sll, uc); + ull = __sync_and_and_fetch (&ull, uc); + + sc = __sync_nand_and_fetch (&sc, uc); + uc = __sync_nand_and_fetch (&uc, uc); + ss = __sync_nand_and_fetch (&ss, uc); + us = __sync_nand_and_fetch (&us, uc); + si = __sync_nand_and_fetch (&si, uc); + ui = __sync_nand_and_fetch (&ui, uc); + sl = __sync_nand_and_fetch (&sl, uc); + ul = __sync_nand_and_fetch (&ul, uc); + sll = __sync_nand_and_fetch (&sll, uc); + ull = __sync_nand_and_fetch (&ull, uc); +} + +void test_compare_and_swap (void) +{ + sc = __sync_val_compare_and_swap (&sc, uc, sc); + uc = __sync_val_compare_and_swap (&uc, uc, sc); + ss = __sync_val_compare_and_swap (&ss, uc, sc); + us = __sync_val_compare_and_swap (&us, uc, sc); + si = __sync_val_compare_and_swap (&si, uc, sc); + ui = __sync_val_compare_and_swap (&ui, uc, sc); + sl = __sync_val_compare_and_swap (&sl, uc, sc); + ul = __sync_val_compare_and_swap (&ul, uc, sc); + sll = __sync_val_compare_and_swap (&sll, uc, sc); + ull = __sync_val_compare_and_swap (&ull, uc, sc); + + ui = __sync_bool_compare_and_swap (&sc, uc, sc); + ui = __sync_bool_compare_and_swap (&uc, uc, sc); + ui = __sync_bool_compare_and_swap (&ss, uc, sc); + ui = __sync_bool_compare_and_swap (&us, uc, sc); + ui = __sync_bool_compare_and_swap (&si, uc, sc); + ui = __sync_bool_compare_and_swap (&ui, uc, sc); + ui = __sync_bool_compare_and_swap (&sl, uc, sc); + ui = __sync_bool_compare_and_swap (&ul, uc, sc); + ui = __sync_bool_compare_and_swap (&sll, uc, sc); + ui = __sync_bool_compare_and_swap (&ull, uc, sc); +} + +void test_lock (void) +{ + sc = __sync_lock_test_and_set (&sc, 1); + uc = __sync_lock_test_and_set (&uc, 1); + ss = __sync_lock_test_and_set (&ss, 1); + us = __sync_lock_test_and_set (&us, 1); + si = __sync_lock_test_and_set (&si, 1); + ui = __sync_lock_test_and_set (&ui, 1); + sl = __sync_lock_test_and_set (&sl, 1); + ul = __sync_lock_test_and_set (&ul, 1); + sll = __sync_lock_test_and_set (&sll, 1); + ull = __sync_lock_test_and_set (&ull, 1); + + __sync_synchronize (); + + __sync_lock_release (&sc); + __sync_lock_release (&uc); + __sync_lock_release (&ss); + __sync_lock_release (&us); + __sync_lock_release (&si); + __sync_lock_release (&ui); + __sync_lock_release (&sl); + __sync_lock_release (&ul); + __sync_lock_release (&sll); + __sync_lock_release (&ull); +} diff --git a/gcc/testsuite/gcc.dg/sync-1.c b/gcc/testsuite/gcc.dg/sync-1.c new file mode 100644 index 000000000000..f8cabe47a990 --- /dev/null +++ b/gcc/testsuite/gcc.dg/sync-1.c @@ -0,0 +1,40 @@ +/* Validate that the __sync builtins are overloaded properly. */ +/* { dg-do compile } */ +/* { dg-options "-Werror" } */ + +#define TEST1(TYPE, BUILTIN) \ +void t_##TYPE##BUILTIN(TYPE *p) \ +{ \ + __typeof(BUILTIN(p, 1)) *pp; \ + pp = p; \ +} + +#define TEST2(BUILTIN) \ + TEST1(int, BUILTIN) \ + TEST1(long, BUILTIN) + +TEST2(__sync_fetch_and_add) +TEST2(__sync_fetch_and_sub) +TEST2(__sync_fetch_and_or) +TEST2(__sync_fetch_and_and) +TEST2(__sync_fetch_and_xor) +TEST2(__sync_fetch_and_nand) + +TEST2(__sync_add_and_fetch) +TEST2(__sync_sub_and_fetch) +TEST2(__sync_or_and_fetch) +TEST2(__sync_and_and_fetch) +TEST2(__sync_xor_and_fetch) +TEST2(__sync_nand_and_fetch) + +TEST2(__sync_lock_test_and_set) + +#define TEST3(TYPE) \ +void t_##TYPE##__sync_val_compare_and_swap(TYPE *p) \ +{ \ + __typeof(__sync_val_compare_and_swap(p, 1, 2)) *pp; \ + pp = p; \ +} + +TEST3(int) +TEST3(long) -- 2.39.2