[(set_attr "type" "crypto_sha3")]
)
-(define_insn "aarch64_xarqv2di"
+(define_insn "*aarch64_xarqv2di_insn"
[(set (match_operand:V2DI 0 "register_operand" "=w")
- (rotatert:V2DI
+ (rotate:V2DI
(xor:V2DI
(match_operand:V2DI 1 "register_operand" "%w")
(match_operand:V2DI 2 "register_operand" "w"))
- (match_operand:SI 3 "aarch64_simd_shift_imm_di" "Usd")))]
+ (match_operand:V2DI 3 "aarch64_simd_lshift_imm" "Dl")))]
"TARGET_SHA3"
- "xar\\t%0.2d, %1.2d, %2.2d, %3"
+ {
+ operands[3]
+ = GEN_INT (64 - INTVAL (unwrap_const_vec_duplicate (operands[3])));
+ return "xar\\t%0.2d, %1.2d, %2.2d, %3";
+ }
[(set_attr "type" "crypto_sha3")]
)
+;; The semantics of the vxarq_u64 intrinsics treat the immediate argument as a
+;; right-rotate amount but the recommended representation of rotates by a
+;; constant in RTL is with the left ROTATE code. Translate between the
+;; intrinsic-provided amount and the RTL operands in the expander here.
+;; The define_insn for XAR will translate back to instruction semantics in its
+;; output logic.
+(define_expand "aarch64_xarqv2di"
+ [(set (match_operand:V2DI 0 "register_operand")
+ (rotate:V2DI
+ (xor:V2DI
+ (match_operand:V2DI 1 "register_operand")
+ (match_operand:V2DI 2 "register_operand"))
+ (match_operand:SI 3 "aarch64_simd_shift_imm_di")))]
+ "TARGET_SHA3"
+ {
+ operands[3]
+ = aarch64_simd_gen_const_vector_dup (V2DImode,
+ 64 - INTVAL (operands[3]));
+ }
+)
+
(define_insn "bcaxq<mode>4"
[(set (match_operand:VQ_I 0 "register_operand" "=w")
(xor:VQ_I
--- /dev/null
+/* { dg-do compile } */
+/* { dg-options "-O2" } */
+
+#include <arm_neon.h>
+
+#pragma GCC target "+sha3"
+
+static inline uint64x2_t
+rotr64_vec(uint64x2_t x, const int b)
+{
+ int64x2_t neg_b = vdupq_n_s64(-b);
+ int64x2_t left_shift = vsubq_s64(vdupq_n_s64(64), vdupq_n_s64(b));
+
+ uint64x2_t right_shifted = vshlq_u64(x, neg_b);
+ uint64x2_t left_shifted = vshlq_u64(x, left_shift);
+
+ return vorrq_u64(right_shifted, left_shifted);
+}
+
+void G(
+ int64_t* v,
+ int64x2_t& m1_01,
+ int64x2_t& m1_23,
+ int64x2_t& m2_01,
+ int64x2_t& m2_23
+) {
+ int64x2_t vd01 = {v[12],v[13]};
+ vd01 = veorq_s64(vd01, m1_01);
+ vd01 = vreinterpretq_s64_u64(rotr64_vec( vreinterpretq_u64_s64 (vd01), 32));
+ v[12] = vgetq_lane_s64(vd01, 0);
+}
+
+/* { dg-final { scan-assembler {\txar\tv[0-9]+\.2d, v[0-9]+\.2d, v[0-9]+\.2d, 32\n} } } */
+