AARCH64_EXTRA_TUNING_OPTION ("fully_pipelined_fma", FULLY_PIPELINED_FMA)
+AARCH64_EXTRA_TUNING_OPTION ("avoid_ldapur", AVOID_LDAPUR)
+
/* Enable is the target prefers to use a fresh register for predicate outputs
rather than re-use an input predicate register. */
AARCH64_EXTRA_TUNING_OPTION ("avoid_pred_rmw", AVOID_PRED_RMW)
if (TARGET_SVE2)
current_tune.extra_tuning_flags
&= ~AARCH64_EXTRA_TUNE_CSE_SVE_VL_CONSTANTS;
+ if (!AARCH64_HAVE_ISA(V8_8A))
+ aarch64_tune_params.extra_tuning_flags |= AARCH64_EXTRA_TUNE_AVOID_LDAPUR;
}
static void
/* Make a copy of the tuning parameters attached to the core, which
we may later overwrite. */
aarch64_tune_params = *(tune->tune);
- if (tune->tune == &generic_tunings)
+
+ if (tune->tune == &generic_tunings
+ || tune->tune == &generic_armv8_a_tunings
+ || tune->tune == &generic_armv9_a_tunings)
aarch64_adjust_generic_arch_tuning (aarch64_tune_params);
if (opts->x_aarch64_override_tune_string)
(bool (aarch64_tune_params.extra_tuning_flags \
& AARCH64_EXTRA_TUNE_CHEAP_FPMR_WRITE))
+/* Enable folding address computation into LDAPUR when RCPC2 is available. */
+#define TARGET_ENABLE_LDAPUR (TARGET_RCPC2 \
+ && !(aarch64_tune_params.extra_tuning_flags \
+ & AARCH64_EXTRA_TUNE_AVOID_LDAPUR))
+
/* Combinatorial tests. */
#define TARGET_SVE2_OR_SME2 \
;; clobber for SVE predicates.
(define_attr "pred_clobber" "any,no,yes" (const_string "any"))
+(define_attr "enable_ldapur" "any,no,yes" (const_string "any"))
+
;; [For compatibility with Arm in pipeline models]
;; Attribute that specifies whether or not the instruction touches fp
;; registers.
(eq_attr "pred_clobber" "yes")
(match_test "TARGET_SVE_PRED_CLOBBER"))
(eq_attr "pred_clobber" "any"))
-
+ (ior
+ (and
+ (eq_attr "enable_ldapur" "yes")
+ (match_test "TARGET_ENABLE_LDAPUR"))
+ (and
+ (eq_attr "enable_ldapur" "no")
+ (match_test "!TARGET_ENABLE_LDAPUR"))
+ (eq_attr "enable_ldapur" "any"))
(ior
(eq_attr "arch" "any")
)
(define_insn "aarch64_atomic_load<mode>_rcpc"
- [(set (match_operand:ALLI 0 "register_operand" "=r")
+ [(set (match_operand:ALLI 0 "register_operand")
(unspec_volatile:ALLI
- [(match_operand:ALLI 1 "aarch64_sync_memory_operand" "Q")
+ [(match_operand:ALLI 1 "aarch64_rcpc_memory_operand")
(match_operand:SI 2 "const_int_operand")] ;; model
UNSPECV_LDAP))]
"TARGET_RCPC"
- "ldapr<atomic_sfx>\t%<w>0, %1"
+ {@ [ cons: =0 , 1 ; attrs: enable_ldapur ]
+ [ r , Q ; any ] ldapr<atomic_sfx>\t%<w>0, %1
+ [ r , Ust ; yes ] ldapur<atomic_sfx>\t%<w>0, %1
+ }
)
(define_insn "aarch64_atomic_load<mode>"
)
(define_insn "*aarch64_atomic_load<ALLX:mode>_rcpc_zext"
- [(set (match_operand:SD_HSDI 0 "register_operand" "=r")
+ [(set (match_operand:SD_HSDI 0 "register_operand")
(zero_extend:SD_HSDI
(unspec_volatile:ALLX
- [(match_operand:ALLX 1 "aarch64_sync_memory_operand" "Q")
+ [(match_operand:ALLX 1 "aarch64_rcpc_memory_operand")
(match_operand:SI 2 "const_int_operand")] ;; model
UNSPECV_LDAP)))]
"TARGET_RCPC && (<SD_HSDI:sizen> > <ALLX:sizen>)"
- "ldapr<ALLX:atomic_sfx>\t%w0, %1"
+ {@ [ cons: =0 , 1 ; attrs: enable_ldapur ]
+ [ r , Q ; any ] ldapr<ALLX:atomic_sfx>\t%w0, %1
+ [ r , Ust ; yes ] ldapur<ALLX:atomic_sfx>\t%w0, %1
+ }
)
(define_insn "*aarch64_atomic_load<ALLX:mode>_rcpc_sext"
[(set (match_operand:GPI 0 "register_operand" "=r")
(sign_extend:GPI
(unspec_volatile:ALLX
- [(match_operand:ALLX 1 "aarch64_sync_memory_operand" "Q")
+ [(match_operand:ALLX 1 "aarch64_rcpc_memory_operand" "Ust")
(match_operand:SI 2 "const_int_operand")] ;; model
UNSPECV_LDAP)))]
"TARGET_RCPC2 && (<GPI:sizen> > <ALLX:sizen>)"
(AARCH64_EXTRA_TUNE_BASE
| AARCH64_EXTRA_TUNE_CSE_SVE_VL_CONSTANTS
| AARCH64_EXTRA_TUNE_MATCHED_VECTOR_THROUGHPUT
- | AARCH64_EXTRA_TUNE_AVOID_PRED_RMW), /* tune_flags. */
+ | AARCH64_EXTRA_TUNE_AVOID_PRED_RMW
+ | AARCH64_EXTRA_TUNE_AVOID_LDAPUR), /* tune_flags. */
&generic_armv9a_prefetch_tune,
AARCH64_LDP_STP_POLICY_ALWAYS, /* ldp_policy_model. */
AARCH64_LDP_STP_POLICY_ALWAYS /* stp_policy_model. */
(AARCH64_EXTRA_TUNE_BASE
| AARCH64_EXTRA_TUNE_CSE_SVE_VL_CONSTANTS
| AARCH64_EXTRA_TUNE_MATCHED_VECTOR_THROUGHPUT
- | AARCH64_EXTRA_TUNE_AVOID_PRED_RMW), /* tune_flags. */
+ | AARCH64_EXTRA_TUNE_AVOID_PRED_RMW
+ | AARCH64_EXTRA_TUNE_AVOID_LDAPUR), /* tune_flags. */
&generic_armv9a_prefetch_tune,
AARCH64_LDP_STP_POLICY_ALWAYS, /* ldp_policy_model. */
AARCH64_LDP_STP_POLICY_ALWAYS /* stp_policy_model. */
(AARCH64_EXTRA_TUNE_BASE
| AARCH64_EXTRA_TUNE_CSE_SVE_VL_CONSTANTS
| AARCH64_EXTRA_TUNE_MATCHED_VECTOR_THROUGHPUT
- | AARCH64_EXTRA_TUNE_AVOID_PRED_RMW), /* tune_flags. */
+ | AARCH64_EXTRA_TUNE_AVOID_PRED_RMW
+ | AARCH64_EXTRA_TUNE_AVOID_LDAPUR), /* tune_flags. */
&generic_armv9a_prefetch_tune,
AARCH64_LDP_STP_POLICY_ALWAYS, /* ldp_policy_model. */
AARCH64_LDP_STP_POLICY_ALWAYS /* stp_policy_model. */
(AARCH64_EXTRA_TUNE_BASE
| AARCH64_EXTRA_TUNE_CSE_SVE_VL_CONSTANTS
| AARCH64_EXTRA_TUNE_MATCHED_VECTOR_THROUGHPUT
- | AARCH64_EXTRA_TUNE_AVOID_PRED_RMW), /* tune_flags. */
+ | AARCH64_EXTRA_TUNE_AVOID_PRED_RMW
+ | AARCH64_EXTRA_TUNE_AVOID_LDAPUR), /* tune_flags. */
&generic_armv9a_prefetch_tune,
AARCH64_LDP_STP_POLICY_ALWAYS, /* ldp_policy_model. */
AARCH64_LDP_STP_POLICY_ALWAYS /* stp_policy_model. */
/*
**test_s16_s64:
**...
-** ldapursh x0, \[x[0-9]+\]
+** ldapursh x0, \[x[0-9]+, [0-9]+\]
** ret
*/
/*
**test_s32_s64:
**...
-** ldapursw x0, \[x[0-9]+\]
+** ldapursw x0, \[x[0-9]+, [0-9]+\]
** ret
*/
/*
**test_s16_s32:
**...
-** ldapursh w0, \[x[0-9]+\]
+** ldapursh w0, \[x[0-9]+, [0-9]+\]
** ret
*/
--- /dev/null
+/* { dg-do compile } */
+/* { dg-options "-O2 -std=c99" } */
+/* { dg-final { check-function-bodies "**" "" "" } } */
+
+#include <stdatomic.h>
+#include <stdint.h>
+
+#pragma GCC target "arch=armv8.8-a"
+
+atomic_ullong u64;
+atomic_uint u32;
+atomic_ushort u16;
+atomic_uchar u8[2]; /* Force an offset for u8 */
+
+#define TEST(name, ldsize, rettype) \
+rettype \
+test_##name (void) \
+{ \
+ return atomic_load_explicit (&ldsize, memory_order_acquire); \
+} \
+
+
+/*
+** test_u8_u64:
+** ...
+** ldapurb w[0-9]+, \[x[0-9]+, [0-9]+\]
+** ret
+*/
+TEST(u8_u64, u8[1], uint64_t)
+
+/*
+** test_u16_u64:
+** ...
+** ldapurh w[0-9]+, \[x[0-9]+, [0-9]+\]
+** ret
+*/
+TEST(u16_u64, u16, uint64_t)
+
+/*
+**test_u32_u64:
+** ...
+** ldapur w[0-9]+, \[x[0-9]+, [0-9]+\]
+** ret
+*/
+TEST(u32_u64, u32, uint64_t)
+
+/*
+**test_u64_u64:
+** ...
+** ldapur x[0-9]+, \[x[0-9]+, [0-9]+\]
+** ret
+*/
+TEST(u64_u64, u64, uint64_t)
+
+/*
+**test_u8_u32:
+** ...
+** ldapurb w[0-9]+, \[x[0-9]+, [0-9]+\]
+** ret
+*/
+TEST(u8_u32, u8[1], uint32_t)
+
+/*
+**test_u16_u32:
+** ...
+** ldapurh w[0-9]+, \[x[0-9]+, [0-9]+\]
+** ret
+*/
+TEST(u16_u32, u16, uint32_t)
+
+/*
+**test_u32_u32:
+** ...
+** ldapur w[0-9]+, \[x[0-9]+, [0-9]+\]
+** ret
+*/
+TEST(u32_u32, u32, uint32_t)
\ No newline at end of file
--- /dev/null
+/* { dg-do compile } */
+/* { dg-options "-O2 -std=c99 -moverride=tune=avoid_ldapur" } */
+
+#include <stdatomic.h>
+#include <stdint.h>
+
+#pragma GCC target "arch=armv8.8-a"
+/* LDAPUR is only avoided for armv8.4 to armv8.7. This checks for the working
+of avoid_ldapur flag. */
+
+/* { dg-final { scan-assembler-not "ldapur\t" } } */
+
+atomic_ullong u64;
+atomic_uint u32;
+atomic_ushort u16;
+atomic_uchar u8[2]; /* Force an offset for u8 */
+
+#define TEST(name, ldsize, rettype) \
+rettype \
+test_##name (void) \
+{ \
+ return atomic_load_explicit (&ldsize, memory_order_acquire); \
+} \
+
+TEST(u8_u64, u8[1], uint64_t)
+TEST(u16_u64, u16, uint64_t)
+TEST(u32_u64, u32, uint64_t)
+TEST(u64_u64, u64, uint64_t)
+TEST(u8_u32, u8[1], uint32_t)
+TEST(u16_u32, u16, uint32_t)
+TEST(u32_u32, u32, uint32_t)
+
+/* { dg-final { scan-assembler-times "ldapr\t" 3 } } */
+/* { dg-final { scan-assembler-times "ldaprh\t" 2 } } */
+/* { dg-final { scan-assembler-times "ldaprb\t" 2 } } */
+
+