]> git.ipfire.org Git - thirdparty/gcc.git/commitdiff
aarch64: Represent SQXTUN with RTL operations
authorKyrylo Tkachov <kyrylo.tkachov@arm.com>
Wed, 7 Jun 2023 15:20:57 +0000 (16:20 +0100)
committerKyrylo Tkachov <kyrylo.tkachov@arm.com>
Wed, 7 Jun 2023 15:20:57 +0000 (16:20 +0100)
This patch removes UNSPEC_SQXTUN and uses organic RTL codes to represent the operation.
SQXTUN is an odd one. It's described in the architecture as "Signed saturating extract Unsigned Narrow".
It's not a straightforward ss_truncate nor a us_truncate.
It is a sort of truncating signed clamp operation with limits derived from the unsigned extrema of the narrow mode:
(truncate:N
  (smin:M
    (smax:M (reg:M) (const_int 0))
    (const_int <unsigned-max-for-mode-N>)))

This patch implements these semantics. I've checked that the vqmovun tests in advsimd-intrinsics.exp
now get constant-folded and still pass validation, so I'm pretty confident in the semantics.

Bootstrapped and tested on aarch64-none-linux-gnu and aarch64_be-none-elf.

gcc/ChangeLog:

* config/aarch64/aarch64-simd.md (aarch64_sqmovun<mode><vczle><vczbe>):
Rename to...
(*aarch64_sqmovun<mode>_insn<vczle><vczbe>): ... This.  Reimplement
with RTL codes.
(aarch64_sqmovun<mode> [SD_HSDI]): Reimplement with RTL codes.
(aarch64_sqxtun2<mode>_le): Likewise.
(aarch64_sqxtun2<mode>_be): Likewise.
(aarch64_sqxtun2<mode>): Adjust for the above.
(aarch64_sqmovun<mode>): New define_expand.
* config/aarch64/iterators.md (UNSPEC_SQXTUN): Delete.
(half_mask): New mode attribute.
* config/aarch64/predicates.md (aarch64_simd_umax_half_mode):
New predicate.

gcc/config/aarch64/aarch64-simd.md
gcc/config/aarch64/iterators.md
gcc/config/aarch64/predicates.md

index b23067c6754e6e4c82f86dda27499bc7a4b676c3..3cecc10f3e8877c0bbe4cbe9cef577a5691bdaac 100644 (file)
 
 (define_insn "aarch64_sqmovun<mode>"
   [(set (match_operand:<VNARROWQ> 0 "register_operand" "=w")
-       (unspec:<VNARROWQ> [(match_operand:SD_HSDI 1 "register_operand" "w")]
-                          UNSPEC_SQXTUN))]
+       (truncate:<VNARROWQ>
+         (smin:SD_HSDI
+           (smax:SD_HSDI
+             (match_operand:SD_HSDI 1 "register_operand" "w")
+             (const_int 0))
+           (const_int <half_mask>))))]
    "TARGET_SIMD"
    "sqxtun\\t%<vn2>0<Vmntype>, %<v>1<Vmtype>"
    [(set_attr "type" "neon_sat_shift_imm_narrow_q")]
 )
 
-(define_insn "aarch64_sqmovun<mode><vczle><vczbe>"
+(define_insn "*aarch64_sqmovun<mode>_insn<vczle><vczbe>"
   [(set (match_operand:<VNARROWQ> 0 "register_operand" "=w")
-       (unspec:<VNARROWQ> [(match_operand:VQN 1 "register_operand" "w")]
-         UNSPEC_SQXTUN))]
+       (truncate:<VNARROWQ>
+         (smin:VQN
+           (smax:VQN (match_operand:VQN 1 "register_operand" "w")
+                     (match_operand:VQN 2 "aarch64_simd_or_scalar_imm_zero"))
+           (match_operand:VQN 3 "aarch64_simd_umax_half_mode"))))]
   "TARGET_SIMD"
   "sqxtun\\t%<vn2>0<Vmntype>, %<v>1<Vmtype>"
   [(set_attr "type" "neon_sat_shift_imm_narrow_q")]
 )
 
+(define_expand "aarch64_sqmovun<mode>"
+  [(set (match_operand:<VNARROWQ> 0 "register_operand" "=w")
+       (truncate:<VNARROWQ>
+         (smin:VQN
+           (smax:VQN (match_operand:VQN 1 "register_operand" "w")
+                     (match_dup 2))
+           (match_dup 3))))]
+  "TARGET_SIMD"
+  {
+    operands[2] = CONST0_RTX (<MODE>mode);
+    operands[3]
+      = aarch64_simd_gen_const_vector_dup (<MODE>mode,
+                       GET_MODE_MASK (GET_MODE_INNER (<VNARROWQ>mode)));
+  }
+)
+
 (define_insn "aarch64_sqxtun2<mode>_le"
   [(set (match_operand:<VNARROWQ2> 0 "register_operand" "=w")
        (vec_concat:<VNARROWQ2>
          (match_operand:<VNARROWQ> 1 "register_operand" "0")
-         (unspec:<VNARROWQ>
-           [(match_operand:VQN 2 "register_operand" "w")] UNSPEC_SQXTUN)))]
+         (truncate:<VNARROWQ>
+           (smin:VQN
+             (smax:VQN
+               (match_operand:VQN 2 "register_operand" "w")
+               (match_operand:VQN 3 "aarch64_simd_or_scalar_imm_zero"))
+             (match_operand:VQN 4 "aarch64_simd_umax_half_mode")))))]
   "TARGET_SIMD && !BYTES_BIG_ENDIAN"
   "sqxtun2\\t%0.<V2ntype>, %2.<Vtype>"
    [(set_attr "type" "neon_sat_shift_imm_narrow_q")]
 (define_insn "aarch64_sqxtun2<mode>_be"
   [(set (match_operand:<VNARROWQ2> 0 "register_operand" "=w")
        (vec_concat:<VNARROWQ2>
-         (unspec:<VNARROWQ>
-           [(match_operand:VQN 2 "register_operand" "w")] UNSPEC_SQXTUN)
+         (truncate:<VNARROWQ>
+           (smin:VQN
+             (smax:VQN
+               (match_operand:VQN 2 "register_operand" "w")
+               (match_operand:VQN 3 "aarch64_simd_or_scalar_imm_zero"))
+             (match_operand:VQN 4 "aarch64_simd_umax_half_mode")))
          (match_operand:<VNARROWQ> 1 "register_operand" "0")))]
   "TARGET_SIMD && BYTES_BIG_ENDIAN"
   "sqxtun2\\t%0.<V2ntype>, %2.<Vtype>"
 (define_expand "aarch64_sqxtun2<mode>"
   [(match_operand:<VNARROWQ2> 0 "register_operand")
    (match_operand:<VNARROWQ> 1 "register_operand")
-   (unspec:<VNARROWQ>
-     [(match_operand:VQN 2 "register_operand")] UNSPEC_SQXTUN)]
+   (match_operand:VQN 2 "register_operand")]
   "TARGET_SIMD"
   {
+    rtx zeros = CONST0_RTX (<MODE>mode);
+    rtx half_umax = aarch64_simd_gen_const_vector_dup (<MODE>mode,
+                       GET_MODE_MASK (GET_MODE_INNER (<VNARROWQ>mode)));
     if (BYTES_BIG_ENDIAN)
       emit_insn (gen_aarch64_sqxtun2<mode>_be (operands[0], operands[1],
-                                             operands[2]));
+                                              operands[2], zeros, half_umax));
     else
       emit_insn (gen_aarch64_sqxtun2<mode>_le (operands[0], operands[1],
-                                              operands[2]));
+                                              operands[2], zeros, half_umax));
     DONE;
   }
 )
index 9e1e17bc1b9c9e2fa488f3962841be8075673db6..56ce1251e80551905d577d951dfc76513069e261 100644 (file)
     UNSPEC_FMULX       ; Used in aarch64-simd.md.
     UNSPEC_USQADD      ; Used in aarch64-simd.md.
     UNSPEC_SUQADD      ; Used in aarch64-simd.md.
-    UNSPEC_SQXTUN      ; Used in aarch64-simd.md.
     UNSPEC_SSRA                ; Used in aarch64-simd.md.
     UNSPEC_USRA                ; Used in aarch64-simd.md.
     UNSPEC_SRSHR       ; Used in aarch64-simd.md.
 
 (define_mode_attr short_mask [(HI "65535") (QI "255")])
 
+(define_mode_attr half_mask [(HI "255") (SI "65535") (DI "4294967295")])
+
 ;; For constraints used in scalar immediate vector moves
 (define_mode_attr hq [(HI "h") (QI "q")])
 
index d93fd86fa279e5c1010470ed837559f47291bea0..9391aba40c44f4e20a6075b5fa5b34681c9a3357 100644 (file)
                        GET_MODE_UNIT_BITSIZE (GET_MODE (op)) / 2,
                        GET_MODE_UNIT_BITSIZE (GET_MODE (op)) / 2)")))
 
+(define_predicate "aarch64_simd_umax_half_mode"
+  (and (match_code "const_vector")
+       (match_test "aarch64_const_vec_all_same_in_range_p (op,
+                               (HOST_WIDE_INT_1U
+                               << (GET_MODE_UNIT_BITSIZE  (mode) / 2)) - 1,
+                               (HOST_WIDE_INT_1U
+                               << (GET_MODE_UNIT_BITSIZE  (mode) / 2)) - 1)")))
+
 (define_predicate "aarch64_simd_shift_imm_vec_qi"
   (and (match_code "const_vector")
        (match_test "aarch64_const_vec_all_same_in_range_p (op, 1, 8)")))