new_stmt = gimple_build_assign (gimple_call_lhs (stmt),
LSHIFT_EXPR, args[0], args[1]);
break;
- /* lower saturating add/sub neon builtins to gimple. */
- BUILTIN_VSDQ_I (BINOP, ssadd, 3, DEFAULT)
- BUILTIN_VSDQ_I (BINOPU, usadd, 3, DEFAULT)
- new_stmt = gimple_build_call_internal (IFN_SAT_ADD, 2, args[0], args[1]);
- gimple_call_set_lhs (new_stmt, gimple_call_lhs (stmt));
- break;
- BUILTIN_VSDQ_I (BINOP, sssub, 3, DEFAULT)
- BUILTIN_VSDQ_I (BINOPU, ussub, 3, DEFAULT)
- new_stmt = gimple_build_call_internal (IFN_SAT_SUB, 2, args[0], args[1]);
- gimple_call_set_lhs (new_stmt, gimple_call_lhs (stmt));
- break;
-
BUILTIN_VSDQ_I_DI (BINOP, sshl, 0, DEFAULT)
BUILTIN_VSDQ_I_DI (BINOP_UUS, ushl, 0, DEFAULT)
{
BUILTIN_VSDQ_I (BINOP, sqrshl, 0, DEFAULT)
BUILTIN_VSDQ_I (BINOP_UUS, uqrshl, 0, DEFAULT)
/* Implemented by aarch64_<su_optab><optab><mode>. */
- BUILTIN_VSDQ_I (BINOP, ssadd, 3, DEFAULT)
- BUILTIN_VSDQ_I (BINOPU, usadd, 3, DEFAULT)
- BUILTIN_VSDQ_I (BINOP, sssub, 3, DEFAULT)
- BUILTIN_VSDQ_I (BINOPU, ussub, 3, DEFAULT)
+ BUILTIN_VSDQ_I (BINOP, sqadd, 0, DEFAULT)
+ BUILTIN_VSDQ_I (BINOPU, uqadd, 0, DEFAULT)
+ BUILTIN_VSDQ_I (BINOP, sqsub, 0, DEFAULT)
+ BUILTIN_VSDQ_I (BINOPU, uqsub, 0, DEFAULT)
/* Implemented by aarch64_<sur>qadd<mode>. */
BUILTIN_VSDQ_I (BINOP_SSU, suqadd, 0, DEFAULT)
BUILTIN_VSDQ_I (BINOP_UUS, usqadd, 0, DEFAULT)
)
;; <su>q<addsub>
-(define_insn "<su_optab>s<addsub><mode>3<vczle><vczbe>"
- [(set (match_operand:VSDQ_I_QI_HI 0 "register_operand" "=w")
- (BINQOPS:VSDQ_I_QI_HI
- (match_operand:VSDQ_I_QI_HI 1 "register_operand" "w")
- (match_operand:VSDQ_I_QI_HI 2 "register_operand" "w")))]
+(define_insn "aarch64_<su_optab>q<addsub><mode><vczle><vczbe>"
+ [(set (match_operand:VSDQ_I 0 "register_operand" "=w")
+ (BINQOPS:VSDQ_I (match_operand:VSDQ_I 1 "register_operand" "w")
+ (match_operand:VSDQ_I 2 "register_operand" "w")))]
"TARGET_SIMD"
"<su_optab>q<addsub>\\t%<v>0<Vmtype>, %<v>1<Vmtype>, %<v>2<Vmtype>"
[(set_attr "type" "neon_q<addsub><q>")]
)
-(define_expand "<su_optab>s<addsub><mode>3"
- [(parallel
- [(set (match_operand:GPI 0 "register_operand")
- (SBINQOPS:GPI (match_operand:GPI 1 "register_operand")
- (match_operand:GPI 2 "aarch64_plus_operand")))
- (clobber (scratch:GPI))
- (clobber (reg:CC CC_REGNUM))])]
-)
-
-;; Introducing a temporary GP reg allows signed saturating arithmetic with GPR
-;; operands to be calculated without the use of costly transfers to and from FP
-;; registers. For example, saturating addition usually uses three FMOVs:
-;;
-;; fmov d0, x0
-;; fmov d1, x1
-;; sqadd d0, d0, d1
-;; fmov x0, d0
-;;
-;; Using a temporary register results in three cheaper instructions being used
-;; in place of the three FMOVs, which calculate the saturating limit accounting
-;; for the signedness of operand2:
-;;
-;; asr x2, x1, 63
-;; adds x0, x0, x1
-;; eor x2, x2, 0x8000000000000000
-;; csinv x0, x0, x2, vc
-;;
-;; If operand2 is a constant value, the temporary register can be used to store
-;; the saturating limit without the need for asr, xor to calculate said limit.
-
-(define_insn_and_split "aarch64_<su_optab>s<addsub><mode>3<vczle><vczbe>"
- [(set (match_operand:GPI 0 "register_operand")
- (SBINQOPS:GPI (match_operand:GPI 1 "register_operand")
- (match_operand:GPI 2 "aarch64_plus_operand")))
- (clobber (match_scratch:GPI 3))
- (clobber (reg:CC CC_REGNUM))]
- ""
- {@ [ cons: =0, 1 , 2 , =3 ; attrs: type , arch , length ]
- [ w , w , w , X ; neon_q<addsub><q> , simd , 4 ] <su_optab>q<addsub>\\t%<v>0<Vmtype>, %<v>1<Vmtype>, %<v>2<Vmtype>
- [ r , r , JIr , &r ; * , * , 8 ] #
- }
- "&& reload_completed && GP_REGNUM_P (REGNO (operands[0]))"
- [(set (match_dup 0)
- (if_then_else:GPI
- (match_dup 4)
- (match_dup 5)
- (match_dup 6)))]
- {
- if (REG_P (operands[2]))
- {
- rtx shift_constant = gen_int_mode (GET_MODE_BITSIZE (<MODE>mode) - 1,
- <MODE>mode);
- auto limit = HOST_WIDE_INT_1U << (GET_MODE_BITSIZE (<MODE>mode) - 1);
- rtx limit_constant = gen_int_mode (limit, <MODE>mode);
- emit_insn (gen_ashr<mode>3 (operands[3], operands[2], shift_constant));
- emit_insn (gen_xor<mode>3 (operands[3], operands[3], limit_constant));
-
- switch (<CODE>)
- {
- case SS_MINUS:
- emit_insn (gen_sub<mode>3_compare1 (operands[0], operands[1],
- operands[2]));
- break;
- case SS_PLUS:
- emit_insn (gen_add<mode>3_compare0 (operands[0], operands[1],
- operands[2]));
- break;
- default:
- gcc_unreachable ();
- }
-
- rtx ccin = gen_rtx_REG (E_CC_Vmode, CC_REGNUM);
- switch (<CODE>)
- {
- case SS_PLUS:
- operands[4] = gen_rtx_NE (<MODE>mode, ccin, const0_rtx);
- operands[5] = gen_rtx_NOT (<MODE>mode, operands[3]);
- operands[6] = operands[0];
- break;
- case SS_MINUS:
- operands[4] = gen_rtx_EQ (<MODE>mode, ccin, const0_rtx);
- operands[5] = operands[0];
- operands[6] = operands[3];
- break;
- default:
- gcc_unreachable ();
- }
- }
- else
- {
- auto imm = INTVAL (operands[2]);
- rtx neg_imm = gen_int_mode (-imm, <MODE>mode);
- wide_int limit;
-
- switch (<CODE>)
- {
- case SS_MINUS:
- emit_insn (gen_sub<mode>3_compare1_imm (operands[0], operands[1],
- operands[2], neg_imm));
- limit = imm >= 0 ? wi::min_value (<MODE>mode, SIGNED)
- : wi::max_value (<MODE>mode, SIGNED);
- break;
- case SS_PLUS:
- emit_insn (gen_sub<mode>3_compare1_imm (operands[0], operands[1],
- neg_imm, operands[2]));
- limit = imm >= 0 ? wi::max_value (<MODE>mode, SIGNED)
- : wi::min_value (<MODE>mode, SIGNED);
- break;
- default:
- gcc_unreachable ();
- }
-
- rtx sat_limit = immed_wide_int_const (limit, <MODE>mode);
- emit_insn (gen_rtx_SET (operands[3], sat_limit));
-
- rtx ccin = gen_rtx_REG (E_CC_Vmode, CC_REGNUM);
- operands[4] = gen_rtx_EQ (<MODE>mode, ccin, const0_rtx);
- operands[5] = operands[0];
- operands[6] = operands[3];
- }
- }
-)
-
-;; Unsigned saturating arithmetic with GPR operands can be optimised similarly
-;; to the signed case, albeit without the need for a temporary register as the
-;; saturating limit can be inferred from the <addsub> code. This applies only
-;; to SImode and DImode.
-
-(define_insn_and_split "<su_optab>s<addsub><mode>3<vczle><vczbe>"
- [(set (match_operand:GPI 0 "register_operand")
- (UBINQOPS:GPI (match_operand:GPI 1 "register_operand")
- (match_operand:GPI 2 "aarch64_plus_operand")))
- (clobber (reg:CC CC_REGNUM))]
- ""
- {@ [ cons: =0, 1 , 2 ; attrs: type , arch , length ]
- [ w , w , w ; neon_q<addsub><q> , simd , 4 ] <su_optab>q<addsub>\\t%<v>0<Vmtype>, %<v>1<Vmtype>, %<v>2<Vmtype>
- [ r , r , JIr ; * , * , 8 ] #
- }
- "&& reload_completed && GP_REGNUM_P (REGNO (operands[0]))"
- [(set (match_dup 0)
- (if_then_else:GPI
- (match_dup 3)
- (match_dup 0)
- (match_dup 4)))]
- {
-
- if (REG_P (operands[2]))
- {
- switch (<CODE>)
- {
- case US_MINUS:
- emit_insn (gen_sub<mode>3_compare1 (operands[0], operands[1],
- operands[2]));
- break;
- case US_PLUS:
- emit_insn (gen_add<mode>3_compare0 (operands[0], operands[1],
- operands[2]));
- break;
- default:
- gcc_unreachable ();
- }
- }
- else
- {
- auto imm = UINTVAL (operands[2]);
- rtx neg_imm = gen_int_mode (-imm, <MODE>mode);
- switch (<CODE>)
- {
- case US_MINUS:
- emit_insn (gen_sub<mode>3_compare1_imm (operands[0], operands[1],
- operands[2], neg_imm));
- break;
- case US_PLUS:
- emit_insn (gen_sub<mode>3_compare1_imm (operands[0], operands[1],
- neg_imm, operands[2]));
- break;
- default:
- gcc_unreachable ();
- }
- }
-
- rtx ccin = gen_rtx_REG (CCmode, CC_REGNUM);
- switch (<CODE>)
- {
- case US_PLUS:
- operands[3] = gen_rtx_LTU (<MODE>mode, ccin, const0_rtx);
- operands[4] = gen_int_mode (-1, <MODE>mode);
- break;
- case US_MINUS:
- operands[3] = gen_rtx_GEU (<MODE>mode, ccin, const0_rtx);
- operands[4] = const0_rtx;
- break;
- default:
- gcc_unreachable ();
- }
- }
-)
-
;; suqadd and usqadd
(define_insn "aarch64_<sur>qadd<mode><vczle><vczbe>"
__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
vqadd_s8 (int8x8_t __a, int8x8_t __b)
{
- return (int8x8_t) __builtin_aarch64_ssaddv8qi (__a, __b);
+ return (int8x8_t) __builtin_aarch64_sqaddv8qi (__a, __b);
}
__extension__ extern __inline int16x4_t
__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
vqadd_s16 (int16x4_t __a, int16x4_t __b)
{
- return (int16x4_t) __builtin_aarch64_ssaddv4hi (__a, __b);
+ return (int16x4_t) __builtin_aarch64_sqaddv4hi (__a, __b);
}
__extension__ extern __inline int32x2_t
__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
vqadd_s32 (int32x2_t __a, int32x2_t __b)
{
- return (int32x2_t) __builtin_aarch64_ssaddv2si (__a, __b);
+ return (int32x2_t) __builtin_aarch64_sqaddv2si (__a, __b);
}
__extension__ extern __inline int64x1_t
__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
vqadd_s64 (int64x1_t __a, int64x1_t __b)
{
- return (int64x1_t) {__builtin_aarch64_ssadddi (__a[0], __b[0])};
+ return (int64x1_t) {__builtin_aarch64_sqadddi (__a[0], __b[0])};
}
__extension__ extern __inline uint8x8_t
__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
vqadd_u8 (uint8x8_t __a, uint8x8_t __b)
{
- return __builtin_aarch64_usaddv8qi_uuu (__a, __b);
+ return __builtin_aarch64_uqaddv8qi_uuu (__a, __b);
}
__extension__ extern __inline int8x8_t
__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
vqadd_u16 (uint16x4_t __a, uint16x4_t __b)
{
- return __builtin_aarch64_usaddv4hi_uuu (__a, __b);
+ return __builtin_aarch64_uqaddv4hi_uuu (__a, __b);
}
__extension__ extern __inline uint32x2_t
__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
vqadd_u32 (uint32x2_t __a, uint32x2_t __b)
{
- return __builtin_aarch64_usaddv2si_uuu (__a, __b);
+ return __builtin_aarch64_uqaddv2si_uuu (__a, __b);
}
__extension__ extern __inline uint64x1_t
__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
vqadd_u64 (uint64x1_t __a, uint64x1_t __b)
{
- return (uint64x1_t) {__builtin_aarch64_usadddi_uuu (__a[0], __b[0])};
+ return (uint64x1_t) {__builtin_aarch64_uqadddi_uuu (__a[0], __b[0])};
}
__extension__ extern __inline int8x16_t
__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
vqaddq_s8 (int8x16_t __a, int8x16_t __b)
{
- return (int8x16_t) __builtin_aarch64_ssaddv16qi (__a, __b);
+ return (int8x16_t) __builtin_aarch64_sqaddv16qi (__a, __b);
}
__extension__ extern __inline int16x8_t
__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
vqaddq_s16 (int16x8_t __a, int16x8_t __b)
{
- return (int16x8_t) __builtin_aarch64_ssaddv8hi (__a, __b);
+ return (int16x8_t) __builtin_aarch64_sqaddv8hi (__a, __b);
}
__extension__ extern __inline int32x4_t
__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
vqaddq_s32 (int32x4_t __a, int32x4_t __b)
{
- return (int32x4_t) __builtin_aarch64_ssaddv4si (__a, __b);
+ return (int32x4_t) __builtin_aarch64_sqaddv4si (__a, __b);
}
__extension__ extern __inline int64x2_t
__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
vqaddq_s64 (int64x2_t __a, int64x2_t __b)
{
- return (int64x2_t) __builtin_aarch64_ssaddv2di (__a, __b);
+ return (int64x2_t) __builtin_aarch64_sqaddv2di (__a, __b);
}
__extension__ extern __inline uint8x16_t
__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
vqaddq_u8 (uint8x16_t __a, uint8x16_t __b)
{
- return __builtin_aarch64_usaddv16qi_uuu (__a, __b);
+ return __builtin_aarch64_uqaddv16qi_uuu (__a, __b);
}
__extension__ extern __inline uint16x8_t
__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
vqaddq_u16 (uint16x8_t __a, uint16x8_t __b)
{
- return __builtin_aarch64_usaddv8hi_uuu (__a, __b);
+ return __builtin_aarch64_uqaddv8hi_uuu (__a, __b);
}
__extension__ extern __inline uint32x4_t
__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
vqaddq_u32 (uint32x4_t __a, uint32x4_t __b)
{
- return __builtin_aarch64_usaddv4si_uuu (__a, __b);
+ return __builtin_aarch64_uqaddv4si_uuu (__a, __b);
}
__extension__ extern __inline uint64x2_t
__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
vqaddq_u64 (uint64x2_t __a, uint64x2_t __b)
{
- return __builtin_aarch64_usaddv2di_uuu (__a, __b);
+ return __builtin_aarch64_uqaddv2di_uuu (__a, __b);
}
__extension__ extern __inline int8x8_t
__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
vqsub_s8 (int8x8_t __a, int8x8_t __b)
{
- return (int8x8_t) __builtin_aarch64_sssubv8qi (__a, __b);
+ return (int8x8_t) __builtin_aarch64_sqsubv8qi (__a, __b);
}
__extension__ extern __inline int16x4_t
__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
vqsub_s16 (int16x4_t __a, int16x4_t __b)
{
- return (int16x4_t) __builtin_aarch64_sssubv4hi (__a, __b);
+ return (int16x4_t) __builtin_aarch64_sqsubv4hi (__a, __b);
}
__extension__ extern __inline int32x2_t
__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
vqsub_s32 (int32x2_t __a, int32x2_t __b)
{
- return (int32x2_t) __builtin_aarch64_sssubv2si (__a, __b);
+ return (int32x2_t) __builtin_aarch64_sqsubv2si (__a, __b);
}
__extension__ extern __inline int64x1_t
__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
vqsub_s64 (int64x1_t __a, int64x1_t __b)
{
- return (int64x1_t) {__builtin_aarch64_sssubdi (__a[0], __b[0])};
+ return (int64x1_t) {__builtin_aarch64_sqsubdi (__a[0], __b[0])};
}
__extension__ extern __inline uint8x8_t
__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
vqsub_u8 (uint8x8_t __a, uint8x8_t __b)
{
- return __builtin_aarch64_ussubv8qi_uuu (__a, __b);
+ return __builtin_aarch64_uqsubv8qi_uuu (__a, __b);
}
__extension__ extern __inline uint16x4_t
__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
vqsub_u16 (uint16x4_t __a, uint16x4_t __b)
{
- return __builtin_aarch64_ussubv4hi_uuu (__a, __b);
+ return __builtin_aarch64_uqsubv4hi_uuu (__a, __b);
}
__extension__ extern __inline uint32x2_t
__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
vqsub_u32 (uint32x2_t __a, uint32x2_t __b)
{
- return __builtin_aarch64_ussubv2si_uuu (__a, __b);
+ return __builtin_aarch64_uqsubv2si_uuu (__a, __b);
}
__extension__ extern __inline uint64x1_t
__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
vqsub_u64 (uint64x1_t __a, uint64x1_t __b)
{
- return (uint64x1_t) {__builtin_aarch64_ussubdi_uuu (__a[0], __b[0])};
+ return (uint64x1_t) {__builtin_aarch64_uqsubdi_uuu (__a[0], __b[0])};
}
__extension__ extern __inline int8x16_t
__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
vqsubq_s8 (int8x16_t __a, int8x16_t __b)
{
- return (int8x16_t) __builtin_aarch64_sssubv16qi (__a, __b);
+ return (int8x16_t) __builtin_aarch64_sqsubv16qi (__a, __b);
}
__extension__ extern __inline int16x8_t
__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
vqsubq_s16 (int16x8_t __a, int16x8_t __b)
{
- return (int16x8_t) __builtin_aarch64_sssubv8hi (__a, __b);
+ return (int16x8_t) __builtin_aarch64_sqsubv8hi (__a, __b);
}
__extension__ extern __inline int32x4_t
__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
vqsubq_s32 (int32x4_t __a, int32x4_t __b)
{
- return (int32x4_t) __builtin_aarch64_sssubv4si (__a, __b);
+ return (int32x4_t) __builtin_aarch64_sqsubv4si (__a, __b);
}
__extension__ extern __inline int64x2_t
__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
vqsubq_s64 (int64x2_t __a, int64x2_t __b)
{
- return (int64x2_t) __builtin_aarch64_sssubv2di (__a, __b);
+ return (int64x2_t) __builtin_aarch64_sqsubv2di (__a, __b);
}
__extension__ extern __inline uint8x16_t
__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
vqsubq_u8 (uint8x16_t __a, uint8x16_t __b)
{
- return __builtin_aarch64_ussubv16qi_uuu (__a, __b);
+ return __builtin_aarch64_uqsubv16qi_uuu (__a, __b);
}
__extension__ extern __inline uint16x8_t
__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
vqsubq_u16 (uint16x8_t __a, uint16x8_t __b)
{
- return __builtin_aarch64_ussubv8hi_uuu (__a, __b);
+ return __builtin_aarch64_uqsubv8hi_uuu (__a, __b);
}
__extension__ extern __inline uint32x4_t
__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
vqsubq_u32 (uint32x4_t __a, uint32x4_t __b)
{
- return __builtin_aarch64_ussubv4si_uuu (__a, __b);
+ return __builtin_aarch64_uqsubv4si_uuu (__a, __b);
}
__extension__ extern __inline uint64x2_t
__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
vqsubq_u64 (uint64x2_t __a, uint64x2_t __b)
{
- return __builtin_aarch64_ussubv2di_uuu (__a, __b);
+ return __builtin_aarch64_uqsubv2di_uuu (__a, __b);
}
__extension__ extern __inline int8x8_t
__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
vqaddb_s8 (int8_t __a, int8_t __b)
{
- return (int8_t) __builtin_aarch64_ssaddqi (__a, __b);
+ return (int8_t) __builtin_aarch64_sqaddqi (__a, __b);
}
__extension__ extern __inline int16_t
__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
vqaddh_s16 (int16_t __a, int16_t __b)
{
- return (int16_t) __builtin_aarch64_ssaddhi (__a, __b);
+ return (int16_t) __builtin_aarch64_sqaddhi (__a, __b);
}
__extension__ extern __inline int32_t
__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
vqadds_s32 (int32_t __a, int32_t __b)
{
- return (int32_t) __builtin_aarch64_ssaddsi (__a, __b);
+ return (int32_t) __builtin_aarch64_sqaddsi (__a, __b);
}
__extension__ extern __inline int64_t
__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
vqaddd_s64 (int64_t __a, int64_t __b)
{
- return __builtin_aarch64_ssadddi (__a, __b);
+ return __builtin_aarch64_sqadddi (__a, __b);
}
__extension__ extern __inline uint8_t
__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
vqaddb_u8 (uint8_t __a, uint8_t __b)
{
- return (uint8_t) __builtin_aarch64_usaddqi_uuu (__a, __b);
+ return (uint8_t) __builtin_aarch64_uqaddqi_uuu (__a, __b);
}
__extension__ extern __inline uint16_t
__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
vqaddh_u16 (uint16_t __a, uint16_t __b)
{
- return (uint16_t) __builtin_aarch64_usaddhi_uuu (__a, __b);
+ return (uint16_t) __builtin_aarch64_uqaddhi_uuu (__a, __b);
}
__extension__ extern __inline uint32_t
__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
vqadds_u32 (uint32_t __a, uint32_t __b)
{
- return (uint32_t) __builtin_aarch64_usaddsi_uuu (__a, __b);
+ return (uint32_t) __builtin_aarch64_uqaddsi_uuu (__a, __b);
}
__extension__ extern __inline uint64_t
__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
vqaddd_u64 (uint64_t __a, uint64_t __b)
{
- return __builtin_aarch64_usadddi_uuu (__a, __b);
+ return __builtin_aarch64_uqadddi_uuu (__a, __b);
}
/* vqdmlal */
__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
vqsubb_s8 (int8_t __a, int8_t __b)
{
- return (int8_t) __builtin_aarch64_sssubqi (__a, __b);
+ return (int8_t) __builtin_aarch64_sqsubqi (__a, __b);
}
__extension__ extern __inline int16_t
__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
vqsubh_s16 (int16_t __a, int16_t __b)
{
- return (int16_t) __builtin_aarch64_sssubhi (__a, __b);
+ return (int16_t) __builtin_aarch64_sqsubhi (__a, __b);
}
__extension__ extern __inline int32_t
__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
vqsubs_s32 (int32_t __a, int32_t __b)
{
- return (int32_t) __builtin_aarch64_sssubsi (__a, __b);
+ return (int32_t) __builtin_aarch64_sqsubsi (__a, __b);
}
__extension__ extern __inline int64_t
__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
vqsubd_s64 (int64_t __a, int64_t __b)
{
- return __builtin_aarch64_sssubdi (__a, __b);
+ return __builtin_aarch64_sqsubdi (__a, __b);
}
__extension__ extern __inline uint8_t
__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
vqsubb_u8 (uint8_t __a, uint8_t __b)
{
- return (uint8_t) __builtin_aarch64_ussubqi_uuu (__a, __b);
+ return (uint8_t) __builtin_aarch64_uqsubqi_uuu (__a, __b);
}
__extension__ extern __inline uint16_t
__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
vqsubh_u16 (uint16_t __a, uint16_t __b)
{
- return (uint16_t) __builtin_aarch64_ussubhi_uuu (__a, __b);
+ return (uint16_t) __builtin_aarch64_uqsubhi_uuu (__a, __b);
}
__extension__ extern __inline uint32_t
__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
vqsubs_u32 (uint32_t __a, uint32_t __b)
{
- return (uint32_t) __builtin_aarch64_ussubsi_uuu (__a, __b);
+ return (uint32_t) __builtin_aarch64_uqsubsi_uuu (__a, __b);
}
__extension__ extern __inline uint64_t
__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
vqsubd_u64 (uint64_t __a, uint64_t __b)
{
- return __builtin_aarch64_ussubdi_uuu (__a, __b);
+ return __builtin_aarch64_uqsubdi_uuu (__a, __b);
}
/* vqtbl2 */
;; integer modes; 64-bit scalar integer mode.
(define_mode_iterator VSDQ_I_DI [V8QI V16QI V4HI V8HI V2SI V4SI V2DI DI])
-;; Advanced SIMD and scalar, 64 & 128-bit container; 8 and 16-bit scalar
-;; integer modes.
-(define_mode_iterator VSDQ_I_QI_HI [VDQ_I HI QI])
-
;; Double vector modes.
(define_mode_iterator VD [V8QI V4HI V4HF V2SI V2SF V4BF])
+++ /dev/null
-/* Template file for vector saturating arithmetic validation.
-
- This file defines saturating addition and subtraction functions for a given
- scalar type, testing the auto-vectorization of these two operators. This
- type, along with the corresponding minimum and maximum values for that type,
- must be defined by any test file which includes this template file. */
-
-#ifndef SAT_ARIT_AUTOVEC_INC
-#define SAT_ARIT_AUTOVEC_INC
-
-#include <limits.h>
-#include <arm_neon.h>
-
-#ifndef UT
-#define UT unsigned int
-#define VT uint32x4_t
-#define UMAX UINT_MAX
-#define UMIN 0
-#endif
-
-
-UT uadd_lane (UT a, VT b)
-{
- UT sum = a + b[0];
- return sum < a ? UMAX : sum;
-}
-
-void uaddq (UT *out, UT *a, UT *b, int n)
-{
- for (int i = 0; i < n; i++)
- {
- UT sum = a[i] + b[i];
- out[i] = sum < a[i] ? UMAX : sum;
- }
-}
-
-void uaddq2 (UT *out, UT *a, UT *b, int n)
-{
- for (int i = 0; i < n; i++)
- {
- UT sum;
- if (!__builtin_add_overflow(a[i], b[i], &sum))
- out[i] = sum;
- else
- out[i] = UMAX;
- }
-}
-
-void usubq (UT *out, UT *a, UT *b, int n)
-{
- for (int i = 0; i < n; i++)
- {
- UT sum = a[i] - b[i];
- out[i] = sum > a[i] ? UMIN : sum;
- }
-}
-
-#endif
\ No newline at end of file
+++ /dev/null
-/* { dg-do assemble { target { aarch64*-*-* } } } */
-/* { dg-options "-O2 --save-temps -ftree-vectorize" } */
-/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */
-
-/*
-** uadd_lane: { xfail *-*-* }
-** dup\tv([0-9]+).8b, w0
-** uqadd\tb([0-9]+), (?:b\1, b0|b0, b\1)
-** umov\tw0, v\2.b\[0\]
-** ret
-*/
-/*
-** uaddq:
-** ...
-** ldr\tq([0-9]+), .*
-** ldr\tq([0-9]+), .*
-** uqadd\tv[0-9]+.16b, (?:v\1.16b, v\2.16b|v\2.16b, v\1.16b)
-** ...
-** ldr\td([0-9]+), .*
-** ldr\td([0-9]+), .*
-** uqadd\tv[0-9]+.8b, (?:v\3.8b, v\4.8b|v\4.8b, v\3.8b)
-** ...
-** ldr\tb([0-9]+), .*
-** ldr\tb([0-9]+), .*
-** uqadd\tb[0-9]+, (?:b\5, b\6|b\6, b\5)
-** ...
-** ldr\tb([0-9]+), .*
-** ldr\tb([0-9]+), .*
-** uqadd\tb[0-9]+, (?:b\7, b\8|b\8, b\7)
-** ...
-*/
-/*
-** uaddq2:
-** ...
-** ldr\tq([0-9]+), .*
-** ldr\tq([0-9]+), .*
-** uqadd\tv[0-9]+.16b, (?:v\1.16b, v\2.16b|v\2.16b, v\1.16b)
-** ...
-** ldr\td([0-9]+), .*
-** ldr\td([0-9]+), .*
-** uqadd\tv[0-9]+.8b, (?:v\3.8b, v\4.8b|v\4.8b, v\3.8b)
-** ...
-** ldr\tb([0-9]+), .*
-** ldr\tb([0-9]+), .*
-** uqadd\tb[0-9]+, (?:b\5, b\6|b\6, b\5)
-** ...
-** uqadd\tb([0-9]+), (?:b[0-9]+, b\7|b\7, b[0-9]+)
-** ...
-*/
-/*
-** usubq: { xfail *-*-* }
-** ...
-** ldr\tq([0-9]+), .*
-** ldr\tq([0-9]+), .*
-** uqsub\tv[0-9]+.16b, v\1.16b, v\2.16b
-** ...
-** ldr\td([0-9]+), .*
-** ldr\td([0-9]+), .*
-** uqsub\tv[0-9]+.8b, v\3.8b, v\4.8b
-** ...
-** ldr\tb([0-9]+), .*
-** ldr\tb([0-9]+), .*
-** uqsub\tb[0-9]+, b\5, b\6
-** ...
-** ldr\tb([0-9]+), .*
-** ldr\tb([0-9]+), .*
-** uqsub\tb[0-9]+, b\7, b\8
-** ...
-*/
-
-#include <limits.h>
-#include <arm_neon.h>
-
-#define UT unsigned char
-#define VT uint8x8_t
-#define UMAX UCHAR_MAX
-#define UMIN 0
-
-#include "saturating_arithmetic_autovect.inc"
\ No newline at end of file
+++ /dev/null
-/* { dg-do assemble { target { aarch64*-*-* } } } */
-/* { dg-options "-O2 --save-temps -ftree-vectorize" } */
-/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */
-
-/*
-** uadd_lane: { xfail *-*-* }
-** dup\tv([0-9]+).4h, w0
-** uqadd\th([0-9]+), (?:h\1, h0|h0, h\1)
-** umov\tw0, v\2.h\[0\]
-** ret
-*/
-/*
-** uaddq:
-** ...
-** ldr\tq([0-9]+), .*
-** ldr\tq([0-9]+), .*
-** uqadd\tv[0-9]+.8h, (?:v\1.8h, v\2.8h|v\2.8h, v\1.8h)
-** ...
-** ldr\td([0-9]+), .*
-** ldr\td([0-9]+), .*
-** uqadd\tv[0-9]+.4h, (?:v\3.4h, v\4.4h|v\4.4h, v\3.4h)
-** ...
-** ldr\th([0-9]+), .*
-** ldr\th([0-9]+), .*
-** uqadd\th[0-9]+, (?:h\5, h\6|h\6, h\5)
-** ...
-** ldr\th([0-9]+), .*
-** ldr\th([0-9]+), .*
-** uqadd\th[0-9]+, (?:h\7, h\8|h\8, h\7)
-** ...
-*/
-/*
-** uaddq2:
-** ...
-** ldr\tq([0-9]+), .*
-** ldr\tq([0-9]+), .*
-** uqadd\tv[0-9]+.8h, (?:v\1.8h, v\2.8h|v\2.8h, v\1.8h)
-** ...
-** ldr\td([0-9]+), .*
-** ldr\td([0-9]+), .*
-** uqadd\tv[0-9]+.4h, (?:v\3.4h, v\4.4h|v\4.4h, v\3.4h)
-** ...
-** ldr\th([0-9]+), .*
-** ldr\th([0-9]+), .*
-** uqadd\th[0-9]+, (?:h\5, h\6|h\6, h\5)
-** ...
-** uqadd\th([0-9]+), (?:h[0-9]+, h\7|h\7, h[0-9]+)
-** ...
-*/
-/*
-** usubq: { xfail *-*-* }
-** ...
-** ldr\tq([0-9]+), .*
-** ldr\tq([0-9]+), .*
-** uqsub\tv[0-9]+.8h, v\1.8h, v\2.8h
-** ...
-** ldr\td([0-9]+), .*
-** ldr\td([0-9]+), .*
-** uqsub\tv[0-9]+.4h, v\3.4h, v\4.4h
-** ...
-** ldr\th([0-9]+), .*
-** ldr\th([0-9]+), .*
-** uqsub\th[0-9]+, h\5, h\6
-** ...
-** ldr\th([0-9]+), .*
-** ldr\th([0-9]+), .*
-** uqsub\th[0-9]+, h\7, h\8
-** ...
-*/
-
-#include <limits.h>
-#include <arm_neon.h>
-
-#define UT unsigned short
-#define VT uint16x4_t
-#define UMAX USHRT_MAX
-#define UMIN 0
-
-#include "saturating_arithmetic_autovect.inc"
\ No newline at end of file
+++ /dev/null
-/* { dg-do assemble { target { aarch64*-*-* } } } */
-/* { dg-options "-O2 --save-temps -ftree-vectorize" } */
-/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */
-
-/*
-** uadd_lane:
-** fmov\tw([0-9]+), s0
-** adds\tw([0-9]+), (?:w\1, w0|w0, w\1)
-** csinv\tw\2, w\2, wzr, cc
-** ret
-*/
-/*
-** uaddq:
-** ...
-** ldr\tq([0-9]+), .*
-** ldr\tq([0-9]+), .*
-** uqadd\tv[0-9]+.4s, (?:v\1.4s, v\2.4s|v\2.4s, v\1.4s)
-** ...
-** ldr\tw([0-9]+), .*
-** ldr\tw([0-9]+), .*
-** adds\tw([0-9]+), (?:w\3, w\4|w\4, w\3)
-** csinv\tw\5, w\5, wzr, cc
-** ...
-** ldr\tw([0-9]+), .*
-** ldr\tw([0-9]+), .*
-** adds\tw([0-9]+), (?:w\6, w\7|w\7, w\6)
-** csinv\tw\8, w\8, wzr, cc
-** ...
-*/
-/*
-** uaddq2:
-** ...
-** ldr\tq([0-9]+), .*
-** ldr\tq([0-9]+), .*
-** uqadd\tv[0-9]+.4s, (?:v\1.4s, v\2.4s|v\2.4s, v\1.4s)
-** ...
-** ldr\tw([0-9]+), .*
-** ldr\tw([0-9]+), .*
-** adds\tw([0-9]+), (?:w\3, w\4|w\4, w\3)
-** csinv\tw\5, w\5, wzr, cc
-** ...
-** ldr\tw([0-9]+), .*
-** ldr\tw([0-9]+), .*
-** adds\tw([0-9]+), (?:w\6, w\7|w\7, w\6)
-** csinv\tw\8, w\8, wzr, cc
-** ...
-*/
-/*
-** usubq: { xfail *-*-* }
-** ...
-** ldr\tq([0-9]+), .*
-** ldr\tq([0-9]+), .*
-** uqsub\tv[0-9]+.4s, v\1.4s, v\2.4s
-** ...
-** ldr\tw([0-9]+), .*
-** ldr\tw([0-9]+), .*
-** subs\tw([0-9]+), w\3, w\4
-** csel\tw\5, w\5, wzr, cs
-** ...
-** ldr\tw([0-9]+), .*
-** ldr\tw([0-9]+), .*
-** subs\tw([0-9]+), w\6, w\7
-** csel\tw\8, w\8, wzr, cs
-** ...
-*/
-
-#include <limits.h>
-#include <arm_neon.h>
-
-#define UT unsigned int
-#define VT uint32x2_t
-#define UMAX UINT_MAX
-#define UMIN 0
-
-#include "saturating_arithmetic_autovect.inc"
\ No newline at end of file
+++ /dev/null
-/* { dg-do assemble { target { aarch64*-*-* } } } */
-/* { dg-options "-O2 --save-temps -ftree-vectorize" } */
-/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */
-
-/*
-** uadd_lane:
-** ...
-** (?:fmov|ldr)\tx([0-9]+), .*
-** ...
-** adds\tx([0-9]+), (?:x\1, x0|x0, x\1)
-** csinv\tx\2, x\2, xzr, cc
-** ret
-*/
-/*
-** uaddq:
-** ...
-** ldr\tq([0-9]+), .*
-** ldr\tq([0-9]+), .*
-** uqadd\tv[0-9]+.2d, (?:v\1.2d, v\2.2d|v\2.2d, v\1.2d)
-** ...
-** ldr\tx([0-9]+), .*
-** ldr\tx([0-9]+), .*
-** adds\tx([0-9]+), (?:x\3, x\4|x\4, x\3)
-** csinv\tx\5, x\5, xzr, cc
-** ...
-** ldr\tx([0-9]+), .*
-** ldr\tx([0-9]+), .*
-** adds\tx([0-9]+), (?:x\6, x\7|x\7, x\6)
-** csinv\tx\8, x\8, xzr, cc
-** ...
-*/
-/*
-** uaddq2:
-** ...
-** ldr\tq([0-9]+), .*
-** ldr\tq([0-9]+), .*
-** uqadd\tv[0-9]+.2d, (?:v\1.2d, v\2.2d|v\2.2d, v\1.2d)
-** ...
-** ldr\tx([0-9]+), .*
-** ldr\tx([0-9]+), .*
-** adds\tx([0-9]+), (?:x\3, x\4|x\4, x\3)
-** csinv\tx\5, x\5, xzr, cc
-** ...
-** ldr\tx([0-9]+), .*
-** ldr\tx([0-9]+), .*
-** adds\tx([0-9]+), (?:x\6, x\7|x\7, x\6)
-** csinv\tx\8, x\8, xzr, cc
-** ...
-*/
-/*
-** usubq: { xfail *-*-* }
-** ...
-** ldr\tq([0-9]+), .*
-** ldr\tq([0-9]+), .*
-** uqsub\tv[0-9]+.2d, v\1.2d, v\2.2d
-** ...
-** ldr\tx([0-9]+), .*
-** ldr\tx([0-9]+), .*
-** subs\tx([0-9]+), x\3, x\4
-** csel\tx\5, x\5, xzr, cs
-** ...
-** ldr\tx([0-9]+), .*
-** ldr\tx([0-9]+), .*
-** subs\tx([0-9]+), x\6, x\7
-** csel\tx\8, x\8, xzr, cs
-** ...
-*/
-
-#include <limits.h>
-#include <arm_neon.h>
-
-#define UT unsigned long
-#define VT uint64x2_t
-#define UMAX ULONG_MAX
-#define UMIN 0
-
-#include "saturating_arithmetic_autovect.inc"
\ No newline at end of file
+++ /dev/null
-/* { dg-do run } */
-/* { dg-options "-O2 --save-temps -mearly-ra=none -fno-schedule-insns2" } */
-/* { dg-final { check-function-bodies "**" "" "" } } */
-
-#include <limits.h>
-#include <stdbool.h>
-#include <stdint.h>
-
-/*
-** sadd32:
-** asr w([0-9]+), w1, 31
-** eor w\1, w\1, -2147483648
-** adds w([0-9]+), (?:w0, w1|w1, w0)
-** csinv w0, w\2, w\1, vc
-** ret
-*/
-int32_t __attribute__((noipa))
-sadd32 (int32_t __a, int32_t __b)
-{
- int32_t sum;
- bool overflow = __builtin_add_overflow (__a, __b, &sum);
- return !overflow ? sum : __a < 0 ? INT_MIN : INT_MAX;
-}
-
-/*
-** sadd32_imm:
-** adds w([0-9]+), w0, #67
-** mov w([0-9]+), 2147483647
-** csel w0, w\1, w\2, vc
-** ret
-*/
-int32_t __attribute__((noipa))
-sadd32_imm (int32_t __a)
-{
- int32_t sum;
- bool overflow = __builtin_add_overflow (__a, 67, &sum);
- return !overflow ? sum : __a < 0 ? INT_MIN : INT_MAX;
-}
-
-/*
-** sadd32_imm2:
-** subs w([0-9]+), w0, 67
-** mov w([0-9]+), -2147483648
-** csel w0, w\1, w\2, vc
-** ret
-*/
-int32_t __attribute__((noipa))
-sadd32_imm2 (int32_t __a)
-{
- int32_t sum;
- bool overflow = __builtin_add_overflow (__a, -67, &sum);
- return !overflow ? sum : __a < 0 ? INT_MIN : INT_MAX;
-}
-
-/*
-** ssub32:
-** asr w([0-9]+), w1, 31
-** eor w\1, w\1, -2147483648
-** subs w([0-9]+), w0, w1
-** csel w0, w\2, w\1, vc
-** ret
-*/
-int32_t __attribute__((noipa))
-ssub32 (int32_t __a, int32_t __b)
-{
- int32_t result;
- bool overflow = __builtin_sub_overflow (__a, __b, &result);
- return !overflow ? result : __a < 0 ? INT_MIN : INT_MAX;
-}
-
-/*
-** ssub32_imm:
-** subs w([0-9]+), w0, 67
-** mov w([0-9]+), -2147483648
-** csel w0, w\1, w\2, vc
-** ret
-*/
-int32_t __attribute__((noipa))
-ssub32_imm (int32_t __a)
-{
- int32_t result;
- bool overflow = __builtin_sub_overflow (__a, 67, &result);
- return !overflow ? result : __a < 0 ? INT_MIN : INT_MAX;
-}
-
-/*
-** ssub32_imm2:
-** adds w([0-9]+), w0, #67
-** mov w([0-9]+), 2147483647
-** csel w0, w\1, w\2, vc
-** ret
-*/
-int32_t __attribute__((noipa))
-ssub32_imm2 (int32_t __a)
-{
- int32_t result;
- bool overflow = __builtin_sub_overflow (__a, -67, &result);
- return !overflow ? result : __a < 0 ? INT_MIN : INT_MAX;
-}
-
-/*
-** sadd64:
-** asr x([0-9]+), x1, 63
-** eor x\1, x\1, -9223372036854775808
-** adds x([0-9]+), (?:x0, x1|x1, x0)
-** csinv x0, x\2, x\1, vc
-** ret
-*/
-int64_t __attribute__((noipa))
-sadd64 (int64_t __a, int64_t __b)
-{
- int64_t sum;
- bool overflow = __builtin_add_overflow (__a, __b, &sum);
- return !overflow ? sum : __a < 0 ? LONG_MIN : LONG_MAX;
-}
-
-/*
-** sadd64_imm:
-** adds x([0-9]+), x0, #67
-** mov x([0-9]+), 9223372036854775807
-** csel x0, x\1, x\2, vc
-** ret
-*/
-int64_t __attribute__((noipa))
-sadd64_imm (int64_t __a)
-{
- int64_t sum;
- bool overflow = __builtin_add_overflow (__a, (int64_t)67, &sum);
- return !overflow ? sum : __a < 0 ? LONG_MIN : LONG_MAX;
-}
-
-/*
-** sadd64_imm2:
-** subs x([0-9]+), x0, 67
-** mov x([0-9]+), -9223372036854775808
-** csel x0, x\1, x\2, vc
-** ret
-*/
-int64_t __attribute__((noipa))
-sadd64_imm2 (int64_t __a)
-{
- int64_t sum;
- bool overflow = __builtin_add_overflow (__a, (int64_t)-67, &sum);
- return !overflow ? sum : __a < 0 ? LONG_MIN : LONG_MAX;
-}
-
-/*
-** ssub64:
-** asr x([0-9]+), x1, 63
-** eor x\1, x\1, -9223372036854775808
-** subs x([0-9]+), x0, x1
-** csel x0, x\2, x\1, vc
-** ret
-*/
-int64_t __attribute__((noipa))
-ssub64 (int64_t __a, int64_t __b)
-{
- int64_t result;
- bool overflow = __builtin_sub_overflow (__a, __b, &result);
- return !overflow ? result : __a < 0 ? LONG_MIN : LONG_MAX;
-}
-
-/*
-** ssub64_imm:
-** subs x([0-9]+), x0, 67
-** mov x([0-9]+), -9223372036854775808
-** csel x0, x\1, x\2, vc
-** ret
-*/
-int64_t __attribute__((noipa))
-ssub64_imm (int64_t __a)
-{
- int64_t result;
- bool overflow = __builtin_sub_overflow (__a, (int64_t) 67, &result);
- return !overflow ? result : __a < 0 ? LONG_MIN : LONG_MAX;
-}
-
-/*
-** ssub64_imm2:
-** adds x([0-9]+), x0, #67
-** mov x([0-9]+), 9223372036854775807
-** csel x0, x\1, x\2, vc
-** ret
-*/
-int64_t __attribute__((noipa))
-ssub64_imm2 (int64_t __a)
-{
- int64_t result;
- bool overflow = __builtin_sub_overflow (__a, (int64_t) -67, &result);
- return !overflow ? result : __a < 0 ? LONG_MIN : LONG_MAX;
-}
-
-int
-main (void)
-{
- /* Addition:
- SAT_ADD(x, +ve), non-saturating
- SAT_ADD(x, +ve), saturating
- SAT_ADD(x, immediate +ve)
- SAT_ADD(x, immediate -ve)
- SAT_ADD(x, -ve), non-saturating
- SAT_ADD(x, -ve), saturating
-
- Subtraction:
- SAT_SUB(x, +ve), non-saturating
- SAT_SUB(x, +ve), saturating
- SAT_SUB(x, immediate +ve)
- SAT_SUB(x, immediate -ve)
- SAT_SUB(x, -ve), non-saturating */
-
- int32_t a = 4;
- int32_t b = 70;
- int32_t c = 2147483647;
- int32_t d = (int32_t) -2147483648;
-
- if (sadd32 (a, b) != (a + b))
- __builtin_abort ();
- if (sadd32 (a, c) != c)
- __builtin_abort ();
- if (sadd32_imm (a) != (a + 67))
- __builtin_abort ();
- if (sadd32_imm2 (a) != (a - 67))
- __builtin_abort ();
- if (sadd32 (a, -b) != (a - b))
- __builtin_abort ();
- if (sadd32 (a, d) != (d + 4))
- __builtin_abort ();
-
- if (ssub32 (a, b) != (a - b))
- __builtin_abort ();
- if (ssub32 (-a, c) != d)
- __builtin_abort ();
- if (ssub32_imm (a) != (a - 67))
- __builtin_abort ();
- if (ssub32_imm2 (a) != (a + 67))
- __builtin_abort ();
- if (ssub32 (a, -b) != (a + b))
- __builtin_abort ();
-
- int64_t a_64 = a;
- int64_t b_64 = b;
- int64_t c_64 = (int64_t) 9223372036854775807;
- int64_t d_64 = (int64_t) 0x8000000000000000;
-
- if (sadd64 (a_64, b_64) != (a_64 + b_64))
- __builtin_abort ();
- if (sadd64 (a_64, c_64) != c_64)
- __builtin_abort ();
- if (sadd64_imm (a_64) != (a_64 + 67))
- __builtin_abort ();
- if (sadd64_imm2 (a_64) != (a_64 - 67))
- __builtin_abort ();
- if (sadd64 (a_64, -b_64) != (a_64 - b_64))
- __builtin_abort ();
- if (sadd64 (a_64, d_64) != (d_64 + 4))
- __builtin_abort ();
-
- if (ssub64 (a_64, b_64) != (a_64 - b_64))
- __builtin_abort ();
- if (ssub64 (-a_64, c_64) != d_64)
- __builtin_abort ();
- if (ssub64_imm (a_64) != (a_64 - 67))
- __builtin_abort ();
- if (ssub64_imm2 (a_64) != (a_64 + 67))
- __builtin_abort ();
- if (ssub64 (a_64, -b_64) != (a_64 + b_64))
- __builtin_abort ();
-
- return 0;
-}
\ No newline at end of file
+++ /dev/null
-/* Template file for scalar saturating arithmetic validation.
-
- This file defines scalar saturating addition and subtraction functions for a
- given type. This type, along with the corresponding minimum and maximum
- values for that type, must be defined by any test file which includes this
- template file. */
-
-#ifndef SAT_ARIT_INC
-#define SAT_ARIT_INC
-
-#include <limits.h>
-
-#ifndef UT
-#define UT unsigned int
-#define UMAX UINT_MAX
-#define UMIN 0
-#endif
-
-UT uadd (UT a, UT b)
-{
- UT sum = a + b;
- return sum < a ? UMAX : sum;
-}
-
-UT uadd2 (UT a, UT b)
-{
- UT c;
- if (!__builtin_add_overflow(a, b, &c))
- return c;
- return UMAX;
-}
-
-UT usub (UT a, UT b)
-{
- UT sum = a - b;
- return sum > a ? UMIN : sum;
-}
-
-#endif
\ No newline at end of file
+++ /dev/null
-/* { dg-do-compile } */
-/* { dg-options "-O2 --save-temps -fno-schedule-insns2" } */
-/* { dg-final { check-function-bodies "**" "" "" } } */
-
-/*
-** uadd:
-** dup v([0-9]+).8b, w1
-** dup v([0-9]+).8b, w0
-** uqadd b([0-9]+), (?:b\2, b\1|b\1, b\2)
-** umov w0, v\3.b\[0\]
-** ret
-*/
-/*
-** uadd2:
-** dup v([0-9]+).8b, w1
-** dup v([0-9]+).8b, w0
-** uqadd b([0-9]+), (?:b\2, b\1|b\1, b\2)
-** umov w0, v\3.b\[0\]
-** ret
-*/
-/*
-** usub: { xfail *-*-* }
-** dup v([0-9]+).8b, w1
-** dup v([0-9]+).8b, w0
-** uqsub b([0-9]+), b\1, b\2
-** umov w0, v\3.b\[0\]
-** ret
-*/
-
-#include <limits.h>
-
-#define UT unsigned char
-#define UMAX UCHAR_MAX
-#define UMIN 0
-
-#include "saturating_arithmetic.inc"
\ No newline at end of file
+++ /dev/null
-/* { dg-do-compile } */
-/* { dg-options "-O2 --save-temps -fno-schedule-insns2" } */
-/* { dg-final { check-function-bodies "**" "" "" } } */
-
-/*
-** uadd:
-** dup v([0-9]+).4h, w1
-** dup v([0-9]+).4h, w0
-** uqadd h([0-9]+), (?:h\2, h\1|h\1, h\2)
-** umov w0, v\3.h\[0\]
-** ret
-*/
-/*
-** uadd2:
-** dup v([0-9]+).4h, w1
-** dup v([0-9]+).4h, w0
-** uqadd h([0-9]+), (?:h\2, h\1|h\1, h\2)
-** umov w0, v\3.h\[0\]
-** ret
-*/
-/*
-** usub: { xfail *-*-* }
-** dup v([0-9]+).4h, w1
-** dup v([0-9]+).4h, w0
-** uqsub h([0-9]+), h\1, h\2
-** umov w0, v\3.h\[0\]
-** ret
-*/
-
-#include <limits.h>
-
-#define UT unsigned short
-#define UMAX USHRT_MAX
-#define UMIN 0
-
-#include "saturating_arithmetic.inc"
\ No newline at end of file
+++ /dev/null
-/* { dg-do compile { target { aarch64*-*-* } } } */
-/* { dg-options "-O2 --save-temps -ftree-vectorize" } */
-/* { dg-final { check-function-bodies "**" "" "" } } */
-
-/*
-** uadd:
-** adds\tw([0-9]+), w([0-9]+), w([0-9]+)
-** csinv\tw\1, w\1, wzr, cc
-** ret
-*/
-/*
-** uadd2:
-** adds\tw([0-9]+), w([0-9]+), w([0-9]+)
-** csinv\tw\1, w\1, wzr, cc
-** ret
-*/
-/*
-** usub:
-** subs\tw([0-9]+), w([0-9]+), w([0-9]+)
-** csel\tw\1, w\1, wzr, cs
-** ret
-*/
-
-#include <limits.h>
-
-#define UT unsigned int
-#define UMAX UINT_MAX
-#define UMIN 0
-
-#include "saturating_arithmetic.inc"
\ No newline at end of file
+++ /dev/null
-/* { dg-do compile { target { aarch64*-*-* } } } */
-/* { dg-options "-O2 --save-temps -ftree-vectorize" } */
-/* { dg-final { check-function-bodies "**" "" "" } } */
-
-/*
-** uadd:
-** adds\tx([0-9]+), x([0-9]+), x([0-9]+)
-** csinv\tx\1, x\1, xzr, cc
-** ret
-*/
-/*
-** uadd2:
-** adds\tx([0-9]+), x([0-9]+), x([0-9]+)
-** csinv\tx\1, x\1, xzr, cc
-** ret
-*/
-/*
-** usub:
-** subs\tx([0-9]+), x([0-9]+), x([0-9]+)
-** csel\tx\1, x\1, xzr, cs
-** ret
-*/
-
-#include <limits.h>
-
-#define UT unsigned long
-#define UMAX ULONG_MAX
-#define UMIN 0
-
-#include "saturating_arithmetic.inc"
\ No newline at end of file
/* { dg-final { scan-assembler-times "\\tuqadd\\td\[0-9\]+" 1 } } */
uint64_t
-test_vqaddd_u64 (uint64x1_t a, uint64x1_t b)
+test_vqaddd_u64 (uint64_t a, uint64_t b)
{
- return vqaddd_u64 (a[0], b[0]);
+ return vqaddd_u64 (a, b);
}
/* { dg-final { scan-assembler-times "\\tuqadd\\ts\[0-9\]+" 1 } } */
uint32_t
-test_vqadds_u32 (uint32x4_t a, uint32x4_t b)
+test_vqadds_u32 (uint32_t a, uint32_t b)
{
- return vqadds_u32 (a[0], b[0]);
+ return vqadds_u32 (a, b);
}
/* { dg-final { scan-assembler-times "\\tuqadd\\th\[0-9\]+" 1 } } */
uint16_t
-test_vqaddh_u16 (uint16x8_t a, uint16x8_t b)
+test_vqaddh_u16 (uint16_t a, uint16_t b)
{
- return vqaddh_u16 (a[0], b[0]);
+ return vqaddh_u16 (a, b);
}
/* { dg-final { scan-assembler-times "\\tuqadd\\tb\[0-9\]+" 1 } } */
uint8_t
-test_vqaddb_u8 (uint8x16_t a, uint8x16_t b)
+test_vqaddb_u8 (uint8_t a, uint8_t b)
{
- return vqaddb_u8 (a[0], b[0]);
+ return vqaddb_u8 (a, b);
}
/* { dg-final { scan-assembler-times "\\tsqadd\\td\[0-9\]+" 1 } } */
/* { dg-final { scan-assembler-times "\\tuqsub\\td\[0-9\]+" 1 } } */
uint64_t
-test_vqsubd_u64 (uint64x1_t a, uint64x1_t b)
+test_vqsubd_u64 (uint64_t a, uint64_t b)
{
- return vqsubd_u64 (a[0], b[0]);
+ return vqsubd_u64 (a, b);
}
/* { dg-final { scan-assembler-times "\\tuqsub\\ts\[0-9\]+" 1 } } */
uint32_t
-test_vqsubs_u32 (uint32x4_t a, uint32x4_t b)
+test_vqsubs_u32 (uint32_t a, uint32_t b)
{
- return vqsubs_u32 (a[0], b[0]);
+ return vqsubs_u32 (a, b);
}
/* { dg-final { scan-assembler-times "\\tuqsub\\th\[0-9\]+" 1 } } */
uint16_t
-test_vqsubh_u16 (uint16x8_t a, uint16x8_t b)
+test_vqsubh_u16 (uint16_t a, uint16_t b)
{
- return vqsubh_u16 (a[0], b[0]);
+ return vqsubh_u16 (a, b);
}
/* { dg-final { scan-assembler-times "\\tuqsub\\tb\[0-9\]+" 1 } } */
uint8_t
-test_vqsubb_u8 (uint8x16_t a, uint8x16_t b)
+test_vqsubb_u8 (uint8_t a, uint8_t b)
{
- return vqsubb_u8 (a[0], b[0]);
+ return vqsubb_u8 (a, b);
}
/* { dg-final { scan-assembler-times "\\tsqsub\\td\[0-9\]+" 1 } } */