]> git.ipfire.org Git - thirdparty/gcc.git/commitdiff
aarch64: Tweak FMAX/FMIN iterators
authorRichard Sandiford <richard.sandiford@arm.com>
Wed, 10 Nov 2021 12:38:43 +0000 (12:38 +0000)
committerRichard Sandiford <richard.sandiford@arm.com>
Wed, 10 Nov 2021 12:38:43 +0000 (12:38 +0000)
There was some duplication between the maxmin_uns (uns for unspec
rather than unsigned) int attribute and the optab int attribute.
The difficulty for FMAXNM and FMINNM is that the instructions
really correspond to two things: the smax/smin optabs for floats
(used only for fast-math-like flags) and the fmax/fmin optabs
(used for built-in functions).  The optab attribute was
consistently for the former but maxmin_uns had a mixture of both.

This patch renames maxmin_uns to fmaxmin and only uses it
for the fmax and fmin optabs.  The reductions that previously
used the maxmin_uns attribute now use the optab attribute instead.

FMAX and FMIN are awkward in that they don't correspond to any
optab.  It's nevertheless useful to define them alongside the
“real” optabs.  Previously they were known as “smax_nan” and
“smin_nan”, but the problem with those names it that smax and
smin are only used for floats if NaNs don't matter.  This patch
therefore uses fmax_nan and fmin_nan instead.

There is still some inconsistency, in that the optab attribute
handles UNSPEC_COND_FMAX but the fmaxmin attribute handles
UNSPEC_FMAX.  This is because the SVE FP instructions, being
predicated, have to use unspecs in cases where the Advanced
SIMD ones could use rtl codes.

At least there are no duplicate entries though, so this seemed
like the best compromise for now.

gcc/
* config/aarch64/iterators.md (optab): Use fmax_nan instead of
smax_nan and fmin_nan instead of smin_nan.
(maxmin_uns): Rename to...
(fmaxmin): ...this and make the same changes.  Remove entries
unrelated to fmax* and fmin*.
* config/aarch64/aarch64.md (<maxmin_uns><mode>3): Rename to...
(<fmaxmin><mode>3): ...this.
* config/aarch64/aarch64-simd.md (aarch64_<maxmin_uns>p<mode>):
Rename to...
(aarch64_<optab>p<mode>): ...this.
(<maxmin_uns><mode>3): Rename to...
(<fmaxmin><mode>3): ...this.
(reduc_<maxmin_uns>_scal_<mode>): Rename to...
(reduc_<optab>_scal_<mode>): ...this and update gen* call.
(aarch64_reduc_<maxmin_uns>_internal<mode>): Rename to...
(aarch64_reduc_<optab>_internal<mode>): ...this.
(aarch64_reduc_<maxmin_uns>_internalv2si): Rename to...
(aarch64_reduc_<optab>_internalv2si): ...this.
* config/aarch64/aarch64-sve.md (<maxmin_uns><mode>3): Rename to...
(<fmaxmin><mode>3): ...this.
* config/aarch64/aarch64-simd-builtins.def (smax_nan, smin_nan)
Rename to...
(fmax_nan, fmin_nan): ...this.
* config/aarch64/arm_neon.h (vmax_f32, vmax_f64, vmaxq_f32, vmaxq_f64)
(vmin_f32, vmin_f64, vminq_f32, vminq_f64, vmax_f16, vmaxq_f16)
(vmin_f16, vminq_f16): Update accordingly.

gcc/config/aarch64/aarch64-simd-builtins.def
gcc/config/aarch64/aarch64-simd.md
gcc/config/aarch64/aarch64-sve.md
gcc/config/aarch64/aarch64.md
gcc/config/aarch64/arm_neon.h
gcc/config/aarch64/iterators.md

index 4a7e2cf4125fe674dbb31c8f068b3b9970e9ea80..9b0a6eceafefee2aa6e3965fa5c038c7fae7db57 100644 (file)
   BUILTIN_VHSDF (UNOP, reduc_smax_nan_scal_, 10, NONE)
   BUILTIN_VHSDF (UNOP, reduc_smin_nan_scal_, 10, NONE)
 
-  /* Implemented by <maxmin_uns><mode>3.
-     smax variants map to fmaxnm,
-     smax_nan variants map to fmax.  */
+  /* Implemented by <optab><mode>3.  */
   BUILTIN_VDQ_BHSI (BINOP, smax, 3, NONE)
   BUILTIN_VDQ_BHSI (BINOP, smin, 3, NONE)
   BUILTIN_VDQ_BHSI (BINOP, umax, 3, NONE)
   BUILTIN_VDQ_BHSI (BINOP, umin, 3, NONE)
-  BUILTIN_VHSDF_DF (BINOP, smax_nan, 3, NONE)
-  BUILTIN_VHSDF_DF (BINOP, smin_nan, 3, NONE)
 
-  /* Implemented by <maxmin_uns><mode>3.  */
+  /* Implemented by <fmaxmin><mode>3.  */
   BUILTIN_VHSDF_HSDF (BINOP, fmax, 3, FP)
   BUILTIN_VHSDF_HSDF (BINOP, fmin, 3, FP)
+  BUILTIN_VHSDF_DF (BINOP, fmax_nan, 3, FP)
+  BUILTIN_VHSDF_DF (BINOP, fmin_nan, 3, FP)
 
-  /* Implemented by aarch64_<maxmin_uns>p<mode>.  */
+  /* Implemented by aarch64_<optab>p<mode>.  */
   BUILTIN_VDQ_BHSI (BINOP, smaxp, 0, NONE)
   BUILTIN_VDQ_BHSI (BINOP, sminp, 0, NONE)
   BUILTIN_VDQ_BHSI (BINOP, umaxp, 0, NONE)
index bff76e4b6e97db2613ab0ce1d721bf1828f0671b..35d55a3e51e794733156b8938cccb6c5151e32f4 100644 (file)
 })
 
 ;; Pairwise Integer Max/Min operations.
-(define_insn "aarch64_<maxmin_uns>p<mode>"
+(define_insn "aarch64_<optab>p<mode>"
  [(set (match_operand:VDQ_BHSI 0 "register_operand" "=w")
        (unspec:VDQ_BHSI [(match_operand:VDQ_BHSI 1 "register_operand" "w")
                         (match_operand:VDQ_BHSI 2 "register_operand" "w")]
 )
 
 ;; Pairwise FP Max/Min operations.
-(define_insn "aarch64_<maxmin_uns>p<mode>"
+(define_insn "aarch64_<optab>p<mode>"
  [(set (match_operand:VHSDF 0 "register_operand" "=w")
        (unspec:VHSDF [(match_operand:VHSDF 1 "register_operand" "w")
                      (match_operand:VHSDF 2 "register_operand" "w")]
 ;; Vector forms for fmax, fmin, fmaxnm, fminnm.
 ;; fmaxnm and fminnm are used for the fmax<mode>3 standard pattern names,
 ;; which implement the IEEE fmax ()/fmin () functions.
-(define_insn "<maxmin_uns><mode>3"
+(define_insn "<fmaxmin><mode>3"
   [(set (match_operand:VHSDF 0 "register_operand" "=w")
        (unspec:VHSDF [(match_operand:VHSDF 1 "register_operand" "w")
                      (match_operand:VHSDF 2 "register_operand" "w")]
 
 ;; Template for outputting a scalar, so we can create __builtins which can be
 ;; gimple_fold'd to the IFN_REDUC_(MAX|MIN) function.  (This is FP smax/smin).
-(define_expand "reduc_<maxmin_uns>_scal_<mode>"
+(define_expand "reduc_<optab>_scal_<mode>"
   [(match_operand:<VEL> 0 "register_operand")
    (unspec:VHSDF [(match_operand:VHSDF 1 "register_operand")]
                  FMAXMINV)]
   {
     rtx elt = aarch64_endian_lane_rtx (<MODE>mode, 0);
     rtx scratch = gen_reg_rtx (<MODE>mode);
-    emit_insn (gen_aarch64_reduc_<maxmin_uns>_internal<mode> (scratch,
-                                                             operands[1]));
+    emit_insn (gen_aarch64_reduc_<optab>_internal<mode> (scratch,
+                                                        operands[1]));
     emit_insn (gen_aarch64_get_lane<mode> (operands[0], scratch, elt));
     DONE;
   }
 )
 
 ;; Likewise for integer cases, signed and unsigned.
-(define_expand "reduc_<maxmin_uns>_scal_<mode>"
+(define_expand "reduc_<optab>_scal_<mode>"
   [(match_operand:<VEL> 0 "register_operand")
    (unspec:VDQ_BHSI [(match_operand:VDQ_BHSI 1 "register_operand")]
                    MAXMINV)]
   {
     rtx elt = aarch64_endian_lane_rtx (<MODE>mode, 0);
     rtx scratch = gen_reg_rtx (<MODE>mode);
-    emit_insn (gen_aarch64_reduc_<maxmin_uns>_internal<mode> (scratch,
-                                                             operands[1]));
+    emit_insn (gen_aarch64_reduc_<optab>_internal<mode> (scratch,
+                                                        operands[1]));
     emit_insn (gen_aarch64_get_lane<mode> (operands[0], scratch, elt));
     DONE;
   }
 )
 
-(define_insn "aarch64_reduc_<maxmin_uns>_internal<mode>"
+(define_insn "aarch64_reduc_<optab>_internal<mode>"
  [(set (match_operand:VDQV_S 0 "register_operand" "=w")
        (unspec:VDQV_S [(match_operand:VDQV_S 1 "register_operand" "w")]
                    MAXMINV))]
   [(set_attr "type" "neon_reduc_minmax<q>")]
 )
 
-(define_insn "aarch64_reduc_<maxmin_uns>_internalv2si"
+(define_insn "aarch64_reduc_<optab>_internalv2si"
  [(set (match_operand:V2SI 0 "register_operand" "=w")
        (unspec:V2SI [(match_operand:V2SI 1 "register_operand" "w")]
                    MAXMINV))]
   [(set_attr "type" "neon_reduc_minmax")]
 )
 
-(define_insn "aarch64_reduc_<maxmin_uns>_internal<mode>"
+(define_insn "aarch64_reduc_<optab>_internal<mode>"
  [(set (match_operand:VHSDF 0 "register_operand" "=w")
        (unspec:VHSDF [(match_operand:VHSDF 1 "register_operand" "w")]
                      FMAXMINV))]
index 8fe4c721313e70592d2cf0acbfbe2f07b070b51a..5de479e141a1cc95ca659ac36b194f62d2630c39 100644 (file)
 
 ;; Unpredicated fmax/fmin (the libm functions).  The optabs for the
 ;; smin/smax rtx codes are handled in the generic section above.
-(define_expand "<maxmin_uns><mode>3"
+(define_expand "<fmaxmin><mode>3"
   [(set (match_operand:SVE_FULL_F 0 "register_operand")
        (unspec:SVE_FULL_F
          [(match_dup 3)
index 4035e061706793849c68ae09bcb2e4b9580ab7b6..5297b2d3f95744ac72e36814c6676cc97478d48b 100644 (file)
 ;; Scalar forms for fmax, fmin, fmaxnm, fminnm.
 ;; fmaxnm and fminnm are used for the fmax<mode>3 standard pattern names,
 ;; which implement the IEEE fmax ()/fmin () functions.
-(define_insn "<maxmin_uns><mode>3"
+(define_insn "<fmaxmin><mode>3"
   [(set (match_operand:GPF_F16 0 "register_operand" "=w")
        (unspec:GPF_F16 [(match_operand:GPF_F16 1 "register_operand" "w")
                     (match_operand:GPF_F16 2 "register_operand" "w")]
index 398a2e3a021fc488519acf6b54ff114805340e8a..2e64f079833828b8659a8b1fa5a4c74efc8b1cbe 100644 (file)
@@ -18264,7 +18264,7 @@ __extension__ extern __inline float32x2_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
 vmax_f32 (float32x2_t __a, float32x2_t __b)
 {
-  return __builtin_aarch64_smax_nanv2sf (__a, __b);
+  return __builtin_aarch64_fmax_nanv2sf (__a, __b);
 }
 
 __extension__ extern __inline float64x1_t
@@ -18272,7 +18272,7 @@ __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
 vmax_f64 (float64x1_t __a, float64x1_t __b)
 {
     return (float64x1_t)
-      { __builtin_aarch64_smax_nandf (vget_lane_f64 (__a, 0),
+      { __builtin_aarch64_fmax_nandf (vget_lane_f64 (__a, 0),
                                      vget_lane_f64 (__b, 0)) };
 }
 
@@ -18325,14 +18325,14 @@ __extension__ extern __inline float32x4_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
 vmaxq_f32 (float32x4_t __a, float32x4_t __b)
 {
-  return __builtin_aarch64_smax_nanv4sf (__a, __b);
+  return __builtin_aarch64_fmax_nanv4sf (__a, __b);
 }
 
 __extension__ extern __inline float64x2_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
 vmaxq_f64 (float64x2_t __a, float64x2_t __b)
 {
-  return __builtin_aarch64_smax_nanv2df (__a, __b);
+  return __builtin_aarch64_fmax_nanv2df (__a, __b);
 }
 
 __extension__ extern __inline int8x16_t
@@ -19003,7 +19003,7 @@ __extension__ extern __inline float32x2_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
 vmin_f32 (float32x2_t __a, float32x2_t __b)
 {
-  return __builtin_aarch64_smin_nanv2sf (__a, __b);
+  return __builtin_aarch64_fmin_nanv2sf (__a, __b);
 }
 
 __extension__ extern __inline float64x1_t
@@ -19011,7 +19011,7 @@ __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
 vmin_f64 (float64x1_t __a, float64x1_t __b)
 {
     return (float64x1_t)
-         { __builtin_aarch64_smin_nandf (vget_lane_f64 (__a, 0),
+         { __builtin_aarch64_fmin_nandf (vget_lane_f64 (__a, 0),
                                          vget_lane_f64 (__b, 0)) };
 }
 
@@ -19064,14 +19064,14 @@ __extension__ extern __inline float32x4_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
 vminq_f32 (float32x4_t __a, float32x4_t __b)
 {
-  return __builtin_aarch64_smin_nanv4sf (__a, __b);
+  return __builtin_aarch64_fmin_nanv4sf (__a, __b);
 }
 
 __extension__ extern __inline float64x2_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
 vminq_f64 (float64x2_t __a, float64x2_t __b)
 {
-  return __builtin_aarch64_smin_nanv2df (__a, __b);
+  return __builtin_aarch64_fmin_nanv2df (__a, __b);
 }
 
 __extension__ extern __inline int8x16_t
@@ -29131,14 +29131,14 @@ __extension__ extern __inline float16x4_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
 vmax_f16 (float16x4_t __a, float16x4_t __b)
 {
-  return __builtin_aarch64_smax_nanv4hf (__a, __b);
+  return __builtin_aarch64_fmax_nanv4hf (__a, __b);
 }
 
 __extension__ extern __inline float16x8_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
 vmaxq_f16 (float16x8_t __a, float16x8_t __b)
 {
-  return __builtin_aarch64_smax_nanv8hf (__a, __b);
+  return __builtin_aarch64_fmax_nanv8hf (__a, __b);
 }
 
 __extension__ extern __inline float16x4_t
@@ -29159,14 +29159,14 @@ __extension__ extern __inline float16x4_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
 vmin_f16 (float16x4_t __a, float16x4_t __b)
 {
-  return __builtin_aarch64_smin_nanv4hf (__a, __b);
+  return __builtin_aarch64_fmin_nanv4hf (__a, __b);
 }
 
 __extension__ extern __inline float16x8_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
 vminq_f16 (float16x8_t __a, float16x8_t __b)
 {
-  return __builtin_aarch64_smin_nanv8hf (__a, __b);
+  return __builtin_aarch64_fmin_nanv8hf (__a, __b);
 }
 
 __extension__ extern __inline float16x4_t
index bdc8ba3576cf2c9b4ae96b45a382234e4e25b13f..e8eebd863a6db6f9ac5ec6d87e9ffbe5449da6f2 100644 (file)
                        (UNSPEC_COND_FCVTZS "fix_trunc")
                        (UNSPEC_COND_FCVTZU "fixuns_trunc")
                        (UNSPEC_COND_FDIV "div")
-                       (UNSPEC_COND_FMAX "smax_nan")
+                       (UNSPEC_COND_FMAX "fmax_nan")
                        (UNSPEC_COND_FMAXNM "smax")
-                       (UNSPEC_COND_FMIN "smin_nan")
+                       (UNSPEC_COND_FMIN "fmin_nan")
                        (UNSPEC_COND_FMINNM "smin")
                        (UNSPEC_COND_FMLA "fma")
                        (UNSPEC_COND_FMLS "fnma")
                        (UNSPEC_COND_SCVTF "float")
                        (UNSPEC_COND_UCVTF "floatuns")])
 
-(define_int_attr  maxmin_uns [(UNSPEC_UMAXV "umax")
-                             (UNSPEC_UMINV "umin")
-                             (UNSPEC_SMAXV "smax")
-                             (UNSPEC_SMINV "smin")
-                             (UNSPEC_FMAX  "smax_nan")
-                             (UNSPEC_FMAXNMV "smax")
-                             (UNSPEC_FMAXV "smax_nan")
-                             (UNSPEC_FMIN "smin_nan")
-                             (UNSPEC_FMINNMV "smin")
-                             (UNSPEC_FMINV "smin_nan")
-                             (UNSPEC_FMAXNM "fmax")
-                             (UNSPEC_FMINNM "fmin")
-                             (UNSPEC_COND_FMAX "fmax_nan")
-                             (UNSPEC_COND_FMAXNM "fmax")
-                             (UNSPEC_COND_FMIN "fmin_nan")
-                             (UNSPEC_COND_FMINNM "fmin")])
+(define_int_attr fmaxmin [(UNSPEC_FMAX "fmax_nan")
+                         (UNSPEC_FMAXNM "fmax")
+                         (UNSPEC_FMIN "fmin_nan")
+                         (UNSPEC_FMINNM "fmin")
+                         (UNSPEC_COND_FMAXNM "fmax")
+                         (UNSPEC_COND_FMINNM "fmin")])
 
 (define_int_attr  maxmin_uns_op [(UNSPEC_UMAXV "umax")
                                 (UNSPEC_UMINV "umin")