From: Richard Sandiford Date: Wed, 10 Nov 2021 12:38:43 +0000 (+0000) Subject: aarch64: Tweak FMAX/FMIN iterators X-Git-Tag: basepoints/gcc-13~3194 X-Git-Url: http://git.ipfire.org/cgi-bin/gitweb.cgi?a=commitdiff_plain;h=6d331688fcb69e9aae84bb94cb7cc54641a90ab6;p=thirdparty%2Fgcc.git aarch64: Tweak FMAX/FMIN iterators There was some duplication between the maxmin_uns (uns for unspec rather than unsigned) int attribute and the optab int attribute. The difficulty for FMAXNM and FMINNM is that the instructions really correspond to two things: the smax/smin optabs for floats (used only for fast-math-like flags) and the fmax/fmin optabs (used for built-in functions). The optab attribute was consistently for the former but maxmin_uns had a mixture of both. This patch renames maxmin_uns to fmaxmin and only uses it for the fmax and fmin optabs. The reductions that previously used the maxmin_uns attribute now use the optab attribute instead. FMAX and FMIN are awkward in that they don't correspond to any optab. It's nevertheless useful to define them alongside the “real” optabs. Previously they were known as “smax_nan” and “smin_nan”, but the problem with those names it that smax and smin are only used for floats if NaNs don't matter. This patch therefore uses fmax_nan and fmin_nan instead. There is still some inconsistency, in that the optab attribute handles UNSPEC_COND_FMAX but the fmaxmin attribute handles UNSPEC_FMAX. This is because the SVE FP instructions, being predicated, have to use unspecs in cases where the Advanced SIMD ones could use rtl codes. At least there are no duplicate entries though, so this seemed like the best compromise for now. gcc/ * config/aarch64/iterators.md (optab): Use fmax_nan instead of smax_nan and fmin_nan instead of smin_nan. (maxmin_uns): Rename to... (fmaxmin): ...this and make the same changes. Remove entries unrelated to fmax* and fmin*. * config/aarch64/aarch64.md (3): Rename to... (3): ...this. * config/aarch64/aarch64-simd.md (aarch64_p): Rename to... (aarch64_p): ...this. (3): Rename to... (3): ...this. (reduc__scal_): Rename to... (reduc__scal_): ...this and update gen* call. (aarch64_reduc__internal): Rename to... (aarch64_reduc__internal): ...this. (aarch64_reduc__internalv2si): Rename to... (aarch64_reduc__internalv2si): ...this. * config/aarch64/aarch64-sve.md (3): Rename to... (3): ...this. * config/aarch64/aarch64-simd-builtins.def (smax_nan, smin_nan) Rename to... (fmax_nan, fmin_nan): ...this. * config/aarch64/arm_neon.h (vmax_f32, vmax_f64, vmaxq_f32, vmaxq_f64) (vmin_f32, vmin_f64, vminq_f32, vminq_f64, vmax_f16, vmaxq_f16) (vmin_f16, vminq_f16): Update accordingly. --- diff --git a/gcc/config/aarch64/aarch64-simd-builtins.def b/gcc/config/aarch64/aarch64-simd-builtins.def index 4a7e2cf4125f..9b0a6eceafef 100644 --- a/gcc/config/aarch64/aarch64-simd-builtins.def +++ b/gcc/config/aarch64/aarch64-simd-builtins.def @@ -502,21 +502,19 @@ BUILTIN_VHSDF (UNOP, reduc_smax_nan_scal_, 10, NONE) BUILTIN_VHSDF (UNOP, reduc_smin_nan_scal_, 10, NONE) - /* Implemented by 3. - smax variants map to fmaxnm, - smax_nan variants map to fmax. */ + /* Implemented by 3. */ BUILTIN_VDQ_BHSI (BINOP, smax, 3, NONE) BUILTIN_VDQ_BHSI (BINOP, smin, 3, NONE) BUILTIN_VDQ_BHSI (BINOP, umax, 3, NONE) BUILTIN_VDQ_BHSI (BINOP, umin, 3, NONE) - BUILTIN_VHSDF_DF (BINOP, smax_nan, 3, NONE) - BUILTIN_VHSDF_DF (BINOP, smin_nan, 3, NONE) - /* Implemented by 3. */ + /* Implemented by 3. */ BUILTIN_VHSDF_HSDF (BINOP, fmax, 3, FP) BUILTIN_VHSDF_HSDF (BINOP, fmin, 3, FP) + BUILTIN_VHSDF_DF (BINOP, fmax_nan, 3, FP) + BUILTIN_VHSDF_DF (BINOP, fmin_nan, 3, FP) - /* Implemented by aarch64_p. */ + /* Implemented by aarch64_p. */ BUILTIN_VDQ_BHSI (BINOP, smaxp, 0, NONE) BUILTIN_VDQ_BHSI (BINOP, sminp, 0, NONE) BUILTIN_VDQ_BHSI (BINOP, umaxp, 0, NONE) diff --git a/gcc/config/aarch64/aarch64-simd.md b/gcc/config/aarch64/aarch64-simd.md index bff76e4b6e97..35d55a3e51e7 100644 --- a/gcc/config/aarch64/aarch64-simd.md +++ b/gcc/config/aarch64/aarch64-simd.md @@ -1553,7 +1553,7 @@ }) ;; Pairwise Integer Max/Min operations. -(define_insn "aarch64_p" +(define_insn "aarch64_p" [(set (match_operand:VDQ_BHSI 0 "register_operand" "=w") (unspec:VDQ_BHSI [(match_operand:VDQ_BHSI 1 "register_operand" "w") (match_operand:VDQ_BHSI 2 "register_operand" "w")] @@ -1564,7 +1564,7 @@ ) ;; Pairwise FP Max/Min operations. -(define_insn "aarch64_p" +(define_insn "aarch64_p" [(set (match_operand:VHSDF 0 "register_operand" "=w") (unspec:VHSDF [(match_operand:VHSDF 1 "register_operand" "w") (match_operand:VHSDF 2 "register_operand" "w")] @@ -3488,7 +3488,7 @@ ;; Vector forms for fmax, fmin, fmaxnm, fminnm. ;; fmaxnm and fminnm are used for the fmax3 standard pattern names, ;; which implement the IEEE fmax ()/fmin () functions. -(define_insn "3" +(define_insn "3" [(set (match_operand:VHSDF 0 "register_operand" "=w") (unspec:VHSDF [(match_operand:VHSDF 1 "register_operand" "w") (match_operand:VHSDF 2 "register_operand" "w")] @@ -3622,7 +3622,7 @@ ;; Template for outputting a scalar, so we can create __builtins which can be ;; gimple_fold'd to the IFN_REDUC_(MAX|MIN) function. (This is FP smax/smin). -(define_expand "reduc__scal_" +(define_expand "reduc__scal_" [(match_operand: 0 "register_operand") (unspec:VHSDF [(match_operand:VHSDF 1 "register_operand")] FMAXMINV)] @@ -3630,15 +3630,15 @@ { rtx elt = aarch64_endian_lane_rtx (mode, 0); rtx scratch = gen_reg_rtx (mode); - emit_insn (gen_aarch64_reduc__internal (scratch, - operands[1])); + emit_insn (gen_aarch64_reduc__internal (scratch, + operands[1])); emit_insn (gen_aarch64_get_lane (operands[0], scratch, elt)); DONE; } ) ;; Likewise for integer cases, signed and unsigned. -(define_expand "reduc__scal_" +(define_expand "reduc__scal_" [(match_operand: 0 "register_operand") (unspec:VDQ_BHSI [(match_operand:VDQ_BHSI 1 "register_operand")] MAXMINV)] @@ -3646,14 +3646,14 @@ { rtx elt = aarch64_endian_lane_rtx (mode, 0); rtx scratch = gen_reg_rtx (mode); - emit_insn (gen_aarch64_reduc__internal (scratch, - operands[1])); + emit_insn (gen_aarch64_reduc__internal (scratch, + operands[1])); emit_insn (gen_aarch64_get_lane (operands[0], scratch, elt)); DONE; } ) -(define_insn "aarch64_reduc__internal" +(define_insn "aarch64_reduc__internal" [(set (match_operand:VDQV_S 0 "register_operand" "=w") (unspec:VDQV_S [(match_operand:VDQV_S 1 "register_operand" "w")] MAXMINV))] @@ -3662,7 +3662,7 @@ [(set_attr "type" "neon_reduc_minmax")] ) -(define_insn "aarch64_reduc__internalv2si" +(define_insn "aarch64_reduc__internalv2si" [(set (match_operand:V2SI 0 "register_operand" "=w") (unspec:V2SI [(match_operand:V2SI 1 "register_operand" "w")] MAXMINV))] @@ -3671,7 +3671,7 @@ [(set_attr "type" "neon_reduc_minmax")] ) -(define_insn "aarch64_reduc__internal" +(define_insn "aarch64_reduc__internal" [(set (match_operand:VHSDF 0 "register_operand" "=w") (unspec:VHSDF [(match_operand:VHSDF 1 "register_operand" "w")] FMAXMINV))] diff --git a/gcc/config/aarch64/aarch64-sve.md b/gcc/config/aarch64/aarch64-sve.md index 8fe4c721313e..5de479e141a1 100644 --- a/gcc/config/aarch64/aarch64-sve.md +++ b/gcc/config/aarch64/aarch64-sve.md @@ -6288,7 +6288,7 @@ ;; Unpredicated fmax/fmin (the libm functions). The optabs for the ;; smin/smax rtx codes are handled in the generic section above. -(define_expand "3" +(define_expand "3" [(set (match_operand:SVE_FULL_F 0 "register_operand") (unspec:SVE_FULL_F [(match_dup 3) diff --git a/gcc/config/aarch64/aarch64.md b/gcc/config/aarch64/aarch64.md index 4035e0617067..5297b2d3f957 100644 --- a/gcc/config/aarch64/aarch64.md +++ b/gcc/config/aarch64/aarch64.md @@ -6475,7 +6475,7 @@ ;; Scalar forms for fmax, fmin, fmaxnm, fminnm. ;; fmaxnm and fminnm are used for the fmax3 standard pattern names, ;; which implement the IEEE fmax ()/fmin () functions. -(define_insn "3" +(define_insn "3" [(set (match_operand:GPF_F16 0 "register_operand" "=w") (unspec:GPF_F16 [(match_operand:GPF_F16 1 "register_operand" "w") (match_operand:GPF_F16 2 "register_operand" "w")] diff --git a/gcc/config/aarch64/arm_neon.h b/gcc/config/aarch64/arm_neon.h index 398a2e3a021f..2e64f0798338 100644 --- a/gcc/config/aarch64/arm_neon.h +++ b/gcc/config/aarch64/arm_neon.h @@ -18264,7 +18264,7 @@ __extension__ extern __inline float32x2_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vmax_f32 (float32x2_t __a, float32x2_t __b) { - return __builtin_aarch64_smax_nanv2sf (__a, __b); + return __builtin_aarch64_fmax_nanv2sf (__a, __b); } __extension__ extern __inline float64x1_t @@ -18272,7 +18272,7 @@ __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vmax_f64 (float64x1_t __a, float64x1_t __b) { return (float64x1_t) - { __builtin_aarch64_smax_nandf (vget_lane_f64 (__a, 0), + { __builtin_aarch64_fmax_nandf (vget_lane_f64 (__a, 0), vget_lane_f64 (__b, 0)) }; } @@ -18325,14 +18325,14 @@ __extension__ extern __inline float32x4_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vmaxq_f32 (float32x4_t __a, float32x4_t __b) { - return __builtin_aarch64_smax_nanv4sf (__a, __b); + return __builtin_aarch64_fmax_nanv4sf (__a, __b); } __extension__ extern __inline float64x2_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vmaxq_f64 (float64x2_t __a, float64x2_t __b) { - return __builtin_aarch64_smax_nanv2df (__a, __b); + return __builtin_aarch64_fmax_nanv2df (__a, __b); } __extension__ extern __inline int8x16_t @@ -19003,7 +19003,7 @@ __extension__ extern __inline float32x2_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vmin_f32 (float32x2_t __a, float32x2_t __b) { - return __builtin_aarch64_smin_nanv2sf (__a, __b); + return __builtin_aarch64_fmin_nanv2sf (__a, __b); } __extension__ extern __inline float64x1_t @@ -19011,7 +19011,7 @@ __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vmin_f64 (float64x1_t __a, float64x1_t __b) { return (float64x1_t) - { __builtin_aarch64_smin_nandf (vget_lane_f64 (__a, 0), + { __builtin_aarch64_fmin_nandf (vget_lane_f64 (__a, 0), vget_lane_f64 (__b, 0)) }; } @@ -19064,14 +19064,14 @@ __extension__ extern __inline float32x4_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vminq_f32 (float32x4_t __a, float32x4_t __b) { - return __builtin_aarch64_smin_nanv4sf (__a, __b); + return __builtin_aarch64_fmin_nanv4sf (__a, __b); } __extension__ extern __inline float64x2_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vminq_f64 (float64x2_t __a, float64x2_t __b) { - return __builtin_aarch64_smin_nanv2df (__a, __b); + return __builtin_aarch64_fmin_nanv2df (__a, __b); } __extension__ extern __inline int8x16_t @@ -29131,14 +29131,14 @@ __extension__ extern __inline float16x4_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vmax_f16 (float16x4_t __a, float16x4_t __b) { - return __builtin_aarch64_smax_nanv4hf (__a, __b); + return __builtin_aarch64_fmax_nanv4hf (__a, __b); } __extension__ extern __inline float16x8_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vmaxq_f16 (float16x8_t __a, float16x8_t __b) { - return __builtin_aarch64_smax_nanv8hf (__a, __b); + return __builtin_aarch64_fmax_nanv8hf (__a, __b); } __extension__ extern __inline float16x4_t @@ -29159,14 +29159,14 @@ __extension__ extern __inline float16x4_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vmin_f16 (float16x4_t __a, float16x4_t __b) { - return __builtin_aarch64_smin_nanv4hf (__a, __b); + return __builtin_aarch64_fmin_nanv4hf (__a, __b); } __extension__ extern __inline float16x8_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vminq_f16 (float16x8_t __a, float16x8_t __b) { - return __builtin_aarch64_smin_nanv8hf (__a, __b); + return __builtin_aarch64_fmin_nanv8hf (__a, __b); } __extension__ extern __inline float16x4_t diff --git a/gcc/config/aarch64/iterators.md b/gcc/config/aarch64/iterators.md index bdc8ba3576cf..e8eebd863a6d 100644 --- a/gcc/config/aarch64/iterators.md +++ b/gcc/config/aarch64/iterators.md @@ -3189,9 +3189,9 @@ (UNSPEC_COND_FCVTZS "fix_trunc") (UNSPEC_COND_FCVTZU "fixuns_trunc") (UNSPEC_COND_FDIV "div") - (UNSPEC_COND_FMAX "smax_nan") + (UNSPEC_COND_FMAX "fmax_nan") (UNSPEC_COND_FMAXNM "smax") - (UNSPEC_COND_FMIN "smin_nan") + (UNSPEC_COND_FMIN "fmin_nan") (UNSPEC_COND_FMINNM "smin") (UNSPEC_COND_FMLA "fma") (UNSPEC_COND_FMLS "fnma") @@ -3214,22 +3214,12 @@ (UNSPEC_COND_SCVTF "float") (UNSPEC_COND_UCVTF "floatuns")]) -(define_int_attr maxmin_uns [(UNSPEC_UMAXV "umax") - (UNSPEC_UMINV "umin") - (UNSPEC_SMAXV "smax") - (UNSPEC_SMINV "smin") - (UNSPEC_FMAX "smax_nan") - (UNSPEC_FMAXNMV "smax") - (UNSPEC_FMAXV "smax_nan") - (UNSPEC_FMIN "smin_nan") - (UNSPEC_FMINNMV "smin") - (UNSPEC_FMINV "smin_nan") - (UNSPEC_FMAXNM "fmax") - (UNSPEC_FMINNM "fmin") - (UNSPEC_COND_FMAX "fmax_nan") - (UNSPEC_COND_FMAXNM "fmax") - (UNSPEC_COND_FMIN "fmin_nan") - (UNSPEC_COND_FMINNM "fmin")]) +(define_int_attr fmaxmin [(UNSPEC_FMAX "fmax_nan") + (UNSPEC_FMAXNM "fmax") + (UNSPEC_FMIN "fmin_nan") + (UNSPEC_FMINNM "fmin") + (UNSPEC_COND_FMAXNM "fmax") + (UNSPEC_COND_FMINNM "fmin")]) (define_int_attr maxmin_uns_op [(UNSPEC_UMAXV "umax") (UNSPEC_UMINV "umin")