From d5f5fa27aabcdfb41c0c3a2b73e92db731c58c4f Mon Sep 17 00:00:00 2001 From: torvald Date: Wed, 1 Feb 2017 17:21:59 +0000 Subject: [PATCH] Fix __atomic to not implement atomic loads with CAS. gcc/ * builtins.c (fold_builtin_atomic_always_lock_free): Make "lock-free" conditional on existance of a fast atomic load. * optabs-query.c (can_atomic_load_p): New function. * optabs-query.h (can_atomic_load_p): Declare it. * optabs.c (expand_atomic_exchange): Always delegate to libatomic if no fast atomic load is available for the particular size of access. (expand_atomic_compare_and_swap): Likewise. (expand_atomic_load): Likewise. (expand_atomic_store): Likewise. (expand_atomic_fetch_op): Likewise. * testsuite/lib/target-supports.exp (check_effective_target_sync_int_128): Remove x86 because it provides no fast atomic load. (check_effective_target_sync_int_128_runtime): Likewise. libatomic/ * acinclude.m4: Add #define FAST_ATOMIC_LDST_*. * auto-config.h.in: Regenerate. * config/x86/host-config.h (FAST_ATOMIC_LDST_16): Define to 0. (atomic_compare_exchange_n): New. * glfree.c (EXACT, LARGER): Change condition and add comments. git-svn-id: svn+ssh://gcc.gnu.org/svn/gcc/trunk@245098 138bc75d-0d04-0410-961f-82ee72b054a4 --- gcc/ChangeLog | 18 ++++++++ gcc/builtins.c | 5 ++- gcc/optabs-query.c | 19 ++++++++ gcc/optabs-query.h | 1 + gcc/optabs.c | 63 ++++++++++++++++++--------- gcc/testsuite/lib/target-supports.exp | 21 ++------- libatomic/ChangeLog | 9 ++++ libatomic/acinclude.m4 | 1 + libatomic/auto-config.h.in | 30 ++++++++----- libatomic/config/x86/host-config.h | 18 ++++++++ libatomic/glfree.c | 21 +++++++-- 11 files changed, 152 insertions(+), 54 deletions(-) diff --git a/gcc/ChangeLog b/gcc/ChangeLog index bb31a8f4b162..594cc3bf1666 100644 --- a/gcc/ChangeLog +++ b/gcc/ChangeLog @@ -1,3 +1,21 @@ +2017-02-01 Torvald Riegel + Richard Henderson + + * builtins.c (fold_builtin_atomic_always_lock_free): Make "lock-free" + conditional on existance of a fast atomic load. + * optabs-query.c (can_atomic_load_p): New function. + * optabs-query.h (can_atomic_load_p): Declare it. + * optabs.c (expand_atomic_exchange): Always delegate to libatomic if + no fast atomic load is available for the particular size of access. + (expand_atomic_compare_and_swap): Likewise. + (expand_atomic_load): Likewise. + (expand_atomic_store): Likewise. + (expand_atomic_fetch_op): Likewise. + * testsuite/lib/target-supports.exp + (check_effective_target_sync_int_128): Remove x86 because it provides + no fast atomic load. + (check_effective_target_sync_int_128_runtime): Likewise. + 2017-02-01 Richard Biener * graphite.c: Include tree-vectorizer.h for find_loop_location. diff --git a/gcc/builtins.c b/gcc/builtins.c index bf68e317124f..0a0e8b9e2fa7 100644 --- a/gcc/builtins.c +++ b/gcc/builtins.c @@ -6157,8 +6157,9 @@ fold_builtin_atomic_always_lock_free (tree arg0, tree arg1) /* Check if a compare_and_swap pattern exists for the mode which represents the required size. The pattern is not allowed to fail, so the existence - of the pattern indicates support is present. */ - if (can_compare_and_swap_p (mode, true)) + of the pattern indicates support is present. Also require that an + atomic load exists for the required size. */ + if (can_compare_and_swap_p (mode, true) && can_atomic_load_p (mode)) return boolean_true_node; else return boolean_false_node; diff --git a/gcc/optabs-query.c b/gcc/optabs-query.c index 6c34a4e9a312..4899333096e5 100644 --- a/gcc/optabs-query.c +++ b/gcc/optabs-query.c @@ -584,6 +584,25 @@ can_atomic_exchange_p (machine_mode mode, bool allow_libcall) return can_compare_and_swap_p (mode, allow_libcall); } +/* Return true if an atomic load can be performed without falling back to + a compare-and-swap. */ + +bool +can_atomic_load_p (machine_mode mode) +{ + enum insn_code icode; + + /* Does the target supports the load directly? */ + icode = direct_optab_handler (atomic_load_optab, mode); + if (icode != CODE_FOR_nothing) + return true; + + /* If the size of the object is greater than word size on this target, + then we assume that a load will not be atomic. Also see + expand_atomic_load. */ + return GET_MODE_PRECISION (mode) <= BITS_PER_WORD; +} + /* Determine whether "1 << x" is relatively cheap in word_mode. */ bool diff --git a/gcc/optabs-query.h b/gcc/optabs-query.h index a80a0e7b0b69..e85a7f11b959 100644 --- a/gcc/optabs-query.h +++ b/gcc/optabs-query.h @@ -176,6 +176,7 @@ int can_mult_highpart_p (machine_mode, bool); bool can_vec_mask_load_store_p (machine_mode, machine_mode, bool); bool can_compare_and_swap_p (machine_mode, bool); bool can_atomic_exchange_p (machine_mode, bool); +bool can_atomic_load_p (machine_mode); bool lshift_cheap_p (bool); #endif diff --git a/gcc/optabs.c b/gcc/optabs.c index d8831a89f218..1afd593ae152 100644 --- a/gcc/optabs.c +++ b/gcc/optabs.c @@ -6086,8 +6086,15 @@ expand_atomic_test_and_set (rtx target, rtx mem, enum memmodel model) rtx expand_atomic_exchange (rtx target, rtx mem, rtx val, enum memmodel model) { + machine_mode mode = GET_MODE (mem); rtx ret; + /* If loads are not atomic for the required size and we are not called to + provide a __sync builtin, do not do anything so that we stay consistent + with atomic loads of the same size. */ + if (!can_atomic_load_p (mode) && !is_mm_sync (model)) + return NULL_RTX; + ret = maybe_emit_atomic_exchange (target, mem, val, model); /* Next try a compare-and-swap loop for the exchange. */ @@ -6121,6 +6128,12 @@ expand_atomic_compare_and_swap (rtx *ptarget_bool, rtx *ptarget_oval, rtx target_oval, target_bool = NULL_RTX; rtx libfunc; + /* If loads are not atomic for the required size and we are not called to + provide a __sync builtin, do not do anything so that we stay consistent + with atomic loads of the same size. */ + if (!can_atomic_load_p (mode) && !is_mm_sync (succ_model)) + return false; + /* Load expected into a register for the compare and swap. */ if (MEM_P (expected)) expected = copy_to_reg (expected); @@ -6316,19 +6329,13 @@ expand_atomic_load (rtx target, rtx mem, enum memmodel model) } /* If the size of the object is greater than word size on this target, - then we assume that a load will not be atomic. */ + then we assume that a load will not be atomic. We could try to + emulate a load with a compare-and-swap operation, but the store that + doing this could result in would be incorrect if this is a volatile + atomic load or targetting read-only-mapped memory. */ if (GET_MODE_PRECISION (mode) > BITS_PER_WORD) - { - /* Issue val = compare_and_swap (mem, 0, 0). - This may cause the occasional harmless store of 0 when the value is - already 0, but it seems to be OK according to the standards guys. */ - if (expand_atomic_compare_and_swap (NULL, &target, mem, const0_rtx, - const0_rtx, false, model, model)) - return target; - else - /* Otherwise there is no atomic load, leave the library call. */ - return NULL_RTX; - } + /* If there is no atomic load, leave the library call. */ + return NULL_RTX; /* Otherwise assume loads are atomic, and emit the proper barriers. */ if (!target || target == const0_rtx) @@ -6370,7 +6377,9 @@ expand_atomic_store (rtx mem, rtx val, enum memmodel model, bool use_release) return const0_rtx; } - /* If using __sync_lock_release is a viable alternative, try it. */ + /* If using __sync_lock_release is a viable alternative, try it. + Note that this will not be set to true if we are expanding a generic + __atomic_store_n. */ if (use_release) { icode = direct_optab_handler (sync_lock_release_optab, mode); @@ -6389,16 +6398,22 @@ expand_atomic_store (rtx mem, rtx val, enum memmodel model, bool use_release) } /* If the size of the object is greater than word size on this target, - a default store will not be atomic, Try a mem_exchange and throw away - the result. If that doesn't work, don't do anything. */ + a default store will not be atomic. */ if (GET_MODE_PRECISION (mode) > BITS_PER_WORD) { - rtx target = maybe_emit_atomic_exchange (NULL_RTX, mem, val, model); - if (!target) - target = maybe_emit_compare_and_swap_exchange_loop (NULL_RTX, mem, val); - if (target) - return const0_rtx; - else + /* If loads are atomic or we are called to provide a __sync builtin, + we can try a atomic_exchange and throw away the result. Otherwise, + don't do anything so that we do not create an inconsistency between + loads and stores. */ + if (can_atomic_load_p (mode) || is_mm_sync (model)) + { + rtx target = maybe_emit_atomic_exchange (NULL_RTX, mem, val, model); + if (!target) + target = maybe_emit_compare_and_swap_exchange_loop (NULL_RTX, mem, + val); + if (target) + return const0_rtx; + } return NULL_RTX; } @@ -6713,6 +6728,12 @@ expand_atomic_fetch_op (rtx target, rtx mem, rtx val, enum rtx_code code, rtx result; bool unused_result = (target == const0_rtx); + /* If loads are not atomic for the required size and we are not called to + provide a __sync builtin, do not do anything so that we stay consistent + with atomic loads of the same size. */ + if (!can_atomic_load_p (mode) && !is_mm_sync (model)) + return NULL_RTX; + result = expand_atomic_fetch_op_no_fallback (target, mem, val, code, model, after); diff --git a/gcc/testsuite/lib/target-supports.exp b/gcc/testsuite/lib/target-supports.exp index 95a1c500c28a..7a260085405c 100644 --- a/gcc/testsuite/lib/target-supports.exp +++ b/gcc/testsuite/lib/target-supports.exp @@ -6514,9 +6514,7 @@ proc check_effective_target_section_anchors { } { # Return 1 if the target supports atomic operations on "int_128" values. proc check_effective_target_sync_int_128 { } { - if { (([istarget i?86-*-*] || [istarget x86_64-*-*]) - && ![is-effective-target ia32]) - || [istarget spu-*-*] } { + if { [istarget spu-*-*] } { return 1 } else { return 0 @@ -6525,23 +6523,10 @@ proc check_effective_target_sync_int_128 { } { # Return 1 if the target supports atomic operations on "int_128" values # and can execute them. +# This requires support for both compare-and-swap and true atomic loads. proc check_effective_target_sync_int_128_runtime { } { - if { (([istarget i?86-*-*] || [istarget x86_64-*-*]) - && ![is-effective-target ia32] - && [check_cached_effective_target sync_int_128_available { - check_runtime_nocache sync_int_128_available { - #include "cpuid.h" - int main () - { - unsigned int eax, ebx, ecx, edx; - if (__get_cpuid (1, &eax, &ebx, &ecx, &edx)) - return !(ecx & bit_CMPXCHG16B); - return 1; - } - } "" - }]) - || [istarget spu-*-*] } { + if { [istarget spu-*-*] } { return 1 } else { return 0 diff --git a/libatomic/ChangeLog b/libatomic/ChangeLog index 1b47b8f2235d..d2b83369b74c 100644 --- a/libatomic/ChangeLog +++ b/libatomic/ChangeLog @@ -1,3 +1,12 @@ +2017-02-01 Richard Henderson + Torvald Riegel + + * acinclude.m4: Add #define FAST_ATOMIC_LDST_*. + * auto-config.h.in: Regenerate. + * config/x86/host-config.h (FAST_ATOMIC_LDST_16): Define to 0. + (atomic_compare_exchange_n): New. + * glfree.c (EXACT, LARGER): Change condition and add comments. + 2017-01-30 Szabolcs Nagy PR target/78945 diff --git a/libatomic/acinclude.m4 b/libatomic/acinclude.m4 index a86e52b4ff4b..485d731df551 100644 --- a/libatomic/acinclude.m4 +++ b/libatomic/acinclude.m4 @@ -96,6 +96,7 @@ AC_DEFUN([LIBAT_HAVE_ATOMIC_LOADSTORE],[ LIBAT_DEFINE_YESNO([HAVE_ATOMIC_LDST_$2], [$libat_cv_have_at_ldst_$2], [Have __atomic_load/store for $2 byte integers.]) AH_BOTTOM([#define MAYBE_HAVE_ATOMIC_LDST_$2 HAVE_ATOMIC_LDST_$2]) + AH_BOTTOM([#define FAST_ATOMIC_LDST_$2 HAVE_ATOMIC_LDST_$2]) ]) dnl diff --git a/libatomic/auto-config.h.in b/libatomic/auto-config.h.in index 83e54e2db3b0..d5b8a26e33e1 100644 --- a/libatomic/auto-config.h.in +++ b/libatomic/auto-config.h.in @@ -222,6 +222,16 @@ #define MAYBE_HAVE_ATOMIC_LDST_1 HAVE_ATOMIC_LDST_1 +#define FAST_ATOMIC_LDST_16 HAVE_ATOMIC_LDST_16 + +#define MAYBE_HAVE_ATOMIC_TAS_1 HAVE_ATOMIC_TAS_1 + +#define MAYBE_HAVE_ATOMIC_TAS_2 HAVE_ATOMIC_TAS_2 + +#define MAYBE_HAVE_ATOMIC_TAS_4 HAVE_ATOMIC_TAS_4 + +#define MAYBE_HAVE_ATOMIC_TAS_8 HAVE_ATOMIC_TAS_8 + #define MAYBE_HAVE_ATOMIC_TAS_16 HAVE_ATOMIC_TAS_16 #define MAYBE_HAVE_ATOMIC_EXCHANGE_1 HAVE_ATOMIC_EXCHANGE_1 @@ -232,6 +242,8 @@ #define MAYBE_HAVE_ATOMIC_EXCHANGE_8 HAVE_ATOMIC_EXCHANGE_8 +#define FAST_ATOMIC_LDST_1 HAVE_ATOMIC_LDST_1 + #define MAYBE_HAVE_ATOMIC_EXCHANGE_16 HAVE_ATOMIC_EXCHANGE_16 #define MAYBE_HAVE_ATOMIC_CAS_1 HAVE_ATOMIC_CAS_1 @@ -242,8 +254,6 @@ #define MAYBE_HAVE_ATOMIC_CAS_8 HAVE_ATOMIC_CAS_8 -#define MAYBE_HAVE_ATOMIC_LDST_2 HAVE_ATOMIC_LDST_2 - #define MAYBE_HAVE_ATOMIC_CAS_16 HAVE_ATOMIC_CAS_16 #define MAYBE_HAVE_ATOMIC_FETCH_ADD_1 HAVE_ATOMIC_FETCH_ADD_1 @@ -254,6 +264,8 @@ #define MAYBE_HAVE_ATOMIC_FETCH_ADD_8 HAVE_ATOMIC_FETCH_ADD_8 +#define MAYBE_HAVE_ATOMIC_LDST_2 HAVE_ATOMIC_LDST_2 + #define MAYBE_HAVE_ATOMIC_FETCH_ADD_16 HAVE_ATOMIC_FETCH_ADD_16 #define MAYBE_HAVE_ATOMIC_FETCH_OP_1 HAVE_ATOMIC_FETCH_OP_1 @@ -264,22 +276,20 @@ #define MAYBE_HAVE_ATOMIC_FETCH_OP_8 HAVE_ATOMIC_FETCH_OP_8 -#define MAYBE_HAVE_ATOMIC_LDST_4 HAVE_ATOMIC_LDST_4 - #define MAYBE_HAVE_ATOMIC_FETCH_OP_16 HAVE_ATOMIC_FETCH_OP_16 #ifndef WORDS_BIGENDIAN #define WORDS_BIGENDIAN 0 #endif -#define MAYBE_HAVE_ATOMIC_LDST_8 HAVE_ATOMIC_LDST_8 +#define FAST_ATOMIC_LDST_2 HAVE_ATOMIC_LDST_2 -#define MAYBE_HAVE_ATOMIC_LDST_16 HAVE_ATOMIC_LDST_16 +#define MAYBE_HAVE_ATOMIC_LDST_4 HAVE_ATOMIC_LDST_4 -#define MAYBE_HAVE_ATOMIC_TAS_1 HAVE_ATOMIC_TAS_1 +#define FAST_ATOMIC_LDST_4 HAVE_ATOMIC_LDST_4 -#define MAYBE_HAVE_ATOMIC_TAS_2 HAVE_ATOMIC_TAS_2 +#define MAYBE_HAVE_ATOMIC_LDST_8 HAVE_ATOMIC_LDST_8 -#define MAYBE_HAVE_ATOMIC_TAS_4 HAVE_ATOMIC_TAS_4 +#define FAST_ATOMIC_LDST_8 HAVE_ATOMIC_LDST_8 -#define MAYBE_HAVE_ATOMIC_TAS_8 HAVE_ATOMIC_TAS_8 +#define MAYBE_HAVE_ATOMIC_LDST_16 HAVE_ATOMIC_LDST_16 diff --git a/libatomic/config/x86/host-config.h b/libatomic/config/x86/host-config.h index 5754db4fccf2..2e9f85aee5f9 100644 --- a/libatomic/config/x86/host-config.h +++ b/libatomic/config/x86/host-config.h @@ -47,6 +47,9 @@ extern unsigned int libat_feat1_edx HIDDEN; # define MAYBE_HAVE_ATOMIC_EXCHANGE_16 IFUNC_COND_1 # undef MAYBE_HAVE_ATOMIC_LDST_16 # define MAYBE_HAVE_ATOMIC_LDST_16 IFUNC_COND_1 +/* Since load and store are implemented with CAS, they are not fast. */ +# undef FAST_ATOMIC_LDST_16 +# define FAST_ATOMIC_LDST_16 0 # if IFUNC_ALT == 1 # undef HAVE_ATOMIC_CAS_16 # define HAVE_ATOMIC_CAS_16 1 @@ -64,6 +67,21 @@ extern unsigned int libat_feat1_edx HIDDEN; # endif #endif +#if defined(__x86_64__) && N == 16 && IFUNC_ALT == 1 +static inline bool +atomic_compare_exchange_n (UTYPE *mptr, UTYPE *eptr, UTYPE newval, + bool weak_p UNUSED, int sm UNUSED, int fm UNUSED) +{ + UTYPE cmpval = *eptr; + UTYPE oldval = __sync_val_compare_and_swap_16 (mptr, cmpval, newval); + if (oldval == cmpval) + return true; + *eptr = oldval; + return false; +} +# define atomic_compare_exchange_n atomic_compare_exchange_n +#endif /* Have CAS 16 */ + #endif /* HAVE_IFUNC */ #include_next diff --git a/libatomic/glfree.c b/libatomic/glfree.c index b68dec77db0b..59fe533bc308 100644 --- a/libatomic/glfree.c +++ b/libatomic/glfree.c @@ -24,26 +24,41 @@ #include "libatomic_i.h" - +/* Accesses with a power-of-two size are not lock-free if we don't have an + integer type of this size or if they are not naturally aligned. They + are lock-free if such a naturally aligned access is always lock-free + according to the compiler, which requires that both atomic loads and CAS + are available. + In all other cases, we fall through to LARGER (see below). */ #define EXACT(N) \ do { \ if (!C2(HAVE_INT,N)) break; \ if ((uintptr_t)ptr & (N - 1)) break; \ if (__atomic_always_lock_free(N, 0)) return true; \ - if (C2(MAYBE_HAVE_ATOMIC_CAS_,N)) return true; \ + if (!C2(MAYBE_HAVE_ATOMIC_CAS_,N)) break; \ + if (C2(FAST_ATOMIC_LDST_,N)) return true; \ } while (0) +/* We next check to see if an access of a larger size is lock-free. We use + a similar check as in EXACT, except that we also check that the alignment + of the access is so that the data to be accessed is completely covered + by the larger access. */ #define LARGER(N) \ do { \ uintptr_t r = (uintptr_t)ptr & (N - 1); \ if (!C2(HAVE_INT,N)) break; \ - if (!C2(HAVE_ATOMIC_LDST_,N)) break; \ + if (!C2(FAST_ATOMIC_LDST_,N)) break; \ if (!C2(MAYBE_HAVE_ATOMIC_CAS_,N)) break; \ if (r + n <= N) return true; \ } while (0) +/* Note that this can return that a size/alignment is not lock-free even if + all the operations that we use to implement the respective accesses provide + lock-free forward progress as specified in C++14: Users likely expect + "lock-free" to also mean "fast", which is why we do not return true if, for + example, we implement loads with this size/alignment using a CAS. */ bool libat_is_lock_free (size_t n, void *ptr) { -- 2.39.2