]> git.ipfire.org Git - thirdparty/gcc.git/commitdiff
aarch64: [US]Q(R)SHR(U)N scalar forms refactoring
authorKyrylo Tkachov <kyrylo.tkachov@arm.com>
Tue, 6 Jun 2023 22:35:52 +0000 (23:35 +0100)
committerKyrylo Tkachov <kyrylo.tkachov@arm.com>
Fri, 16 Jun 2023 12:52:23 +0000 (13:52 +0100)
Some instructions from the previous patch have scalar forms:
SQSHRN,SQRSHRN,UQSHRN,UQRSHRN,SQSHRUN,SQRSHRUN.
This patch converts the patterns for these to use standard RTL codes.
Their MD patterns deviate slightly from the vector forms mostly due to
things like operands being scalar rather than vectors.
One nuance is in the SQSHRUN,SQRSHRUN patterns. These end in a truncate
to the scalar narrow mode e.g. SI -> QI.  This gets simplified by the
RTL passes to a subreg rather than keeping it as a truncate.
So we end up representing these without the truncate and in the expander
read the narrow subreg in order to comply with the expected width of the
intrinsic.

Bootstrapped and tested on aarch64-none-linux-gnu and
aarch64_be-none-elf.

gcc/ChangeLog:

* config/aarch64/aarch64-simd.md (aarch64_<sur>q<r>shr<u>n_n<mode>):
Rename to...
(aarch64_<shrn_op>shrn_n<mode>): ... This.  Reimplement with RTL codes.
(*aarch64_<shrn_op>rshrn_n<mode>_insn): New define_insn.
(aarch64_sqrshrun_n<mode>_insn): Likewise.
(aarch64_sqshrun_n<mode>_insn): Likewise.
(aarch64_<shrn_op>rshrn_n<mode>): New define_expand.
(aarch64_sqshrun_n<mode>): Likewise.
(aarch64_sqrshrun_n<mode>): Likewise.
* config/aarch64/iterators.md (V2XWIDE): Add HI and SI modes.

gcc/config/aarch64/aarch64-simd.md
gcc/config/aarch64/iterators.md

index 8b92981bebbb49b14c3ffeff923ce08eb7c6e817..bbb54344eb73f1f54877d6a8993a22332e9b9d7d 100644 (file)
 
 ;; vq(r)shr(u)n_n
 
-(define_insn "aarch64_<sur>q<r>shr<u>n_n<mode>"
+(define_insn "aarch64_<shrn_op>shrn_n<mode>"
   [(set (match_operand:<VNARROWQ> 0 "register_operand" "=w")
-        (unspec:<VNARROWQ> [(match_operand:SD_HSDI 1 "register_operand" "w")
-                           (match_operand:SI 2
-                             "aarch64_simd_shift_imm_offset_<ve_mode>" "i")]
-                          VQSHRN_N))]
+       (SAT_TRUNC:<VNARROWQ>
+         (<TRUNC_SHIFT>:SD_HSDI
+           (match_operand:SD_HSDI 1 "register_operand" "w")
+           (match_operand:SI 2 "aarch64_simd_shift_imm_offset_<ve_mode>"))))]
   "TARGET_SIMD"
-  "<sur>q<r>shr<u>n\\t%<vn2>0<Vmntype>, %<v>1<Vmtype>, %2"
-  [(set_attr "type" "neon_sat_shift_imm_narrow_q")]
+  "<shrn_op>shrn\t%<vn2>0<Vmntype>, %<v>1<Vmtype>, %2"
+  [(set_attr "type" "neon_shift_imm_narrow_q")]
 )
 
 (define_insn "*aarch64_<shrn_op>shrn_n<mode>_insn<vczle><vczbe>"
   [(set_attr "type" "neon_shift_imm_narrow_q")]
 )
 
+(define_insn "*aarch64_<shrn_op>rshrn_n<mode>_insn"
+  [(set (match_operand:<VNARROWQ> 0 "register_operand" "=w")
+       (SAT_TRUNC:<VNARROWQ>
+         (<TRUNC_SHIFT>:<DWI>
+           (plus:<DWI>
+             (<TRUNCEXTEND>:<DWI>
+               (match_operand:SD_HSDI 1 "register_operand" "w"))
+             (match_operand:<DWI> 3 "aarch64_simd_rsra_rnd_imm_vec"))
+           (match_operand:SI 2 "aarch64_simd_shift_imm_offset_<ve_mode>"))))]
+  "TARGET_SIMD
+   && aarch64_const_vec_rnd_cst_p (operands[3], operands[2])"
+  "<shrn_op>rshrn\t%<vn2>0<Vmntype>, %<v>1<Vmtype>, %2"
+  [(set_attr "type" "neon_shift_imm_narrow_q")]
+)
+
+(define_expand "aarch64_<shrn_op>rshrn_n<mode>"
+  [(set (match_operand:<VNARROWQ> 0 "register_operand")
+       (SAT_TRUNC:<VNARROWQ>
+         (<TRUNC_SHIFT>:<V2XWIDE>
+           (plus:<V2XWIDE>
+             (<TRUNCEXTEND>:<V2XWIDE>
+               (match_operand:SD_HSDI 1 "register_operand"))
+             (match_dup 3))
+           (match_operand:SI 2 "aarch64_simd_shift_imm_offset_<ve_mode>"))))]
+  "TARGET_SIMD"
+  {
+    /* Use this expander to create the rounding constant vector, which is
+       1 << (shift - 1).  Use wide_int here to ensure that the right TImode
+       RTL is generated when handling the DImode expanders.  */
+    int prec = GET_MODE_UNIT_PRECISION (<V2XWIDE>mode);
+    wide_int rnd_wi = wi::set_bit_in_zero (INTVAL (operands[2]) - 1, prec);
+    operands[3] = immed_wide_int_const (rnd_wi, GET_MODE_INNER (<V2XWIDE>mode));
+  }
+)
+
 (define_expand "aarch64_<shrn_op>rshrn_n<mode>"
   [(set (match_operand:<VNARROWQ> 0 "register_operand")
        (ALL_TRUNC:<VNARROWQ>
   [(set_attr "type" "neon_shift_imm_narrow_q")]
 )
 
+(define_insn "aarch64_sqshrun_n<mode>_insn"
+  [(set (match_operand:SD_HSDI 0 "register_operand" "=w")
+       (smin:SD_HSDI
+         (smax:SD_HSDI
+           (ashiftrt:SD_HSDI
+             (match_operand:SD_HSDI 1 "register_operand" "w")
+             (match_operand:SI 2 "aarch64_simd_shift_imm_offset_<ve_mode>"))
+           (const_int 0))
+         (const_int <half_mask>)))]
+  "TARGET_SIMD"
+  "sqshrun\t%<vn2>0<Vmntype>, %<v>1<Vmtype>, %2"
+  [(set_attr "type" "neon_shift_imm_narrow_q")]
+)
+
+(define_expand "aarch64_sqshrun_n<mode>"
+  [(match_operand:<VNARROWQ> 0 "register_operand")
+   (match_operand:SD_HSDI 1 "register_operand")
+   (match_operand:SI 2 "aarch64_simd_shift_imm_offset_<ve_mode>")]
+  "TARGET_SIMD"
+  {
+    rtx dst = gen_reg_rtx (<MODE>mode);
+    emit_insn (gen_aarch64_sqshrun_n<mode>_insn (dst, operands[1],
+                                                operands[2]));
+    emit_move_insn (operands[0], gen_lowpart (<VNARROWQ>mode, dst));
+    DONE;
+  }
+)
+
 (define_expand "aarch64_sqshrun_n<mode>"
   [(set (match_operand:<VNARROWQ> 0 "register_operand")
        (truncate:<VNARROWQ>
   [(set_attr "type" "neon_shift_imm_narrow_q")]
 )
 
+(define_insn "aarch64_sqrshrun_n<mode>_insn"
+  [(set (match_operand:<V2XWIDE> 0 "register_operand" "=w")
+       (smin:<V2XWIDE>
+         (smax:<V2XWIDE>
+           (ashiftrt:<V2XWIDE>
+             (plus:<V2XWIDE>
+               (sign_extend:<V2XWIDE>
+                 (match_operand:SD_HSDI 1 "register_operand" "w"))
+               (match_operand:<V2XWIDE> 3 "aarch64_simd_rsra_rnd_imm_vec"))
+             (match_operand:SI 2 "aarch64_simd_shift_imm_offset_<ve_mode>"))
+           (const_int 0))
+         (const_int <half_mask>)))]
+  "TARGET_SIMD
+   && aarch64_const_vec_rnd_cst_p (operands[3], operands[2])"
+  "sqrshrun\t%<vn2>0<Vmntype>, %<v>1<Vmtype>, %2"
+  [(set_attr "type" "neon_shift_imm_narrow_q")]
+)
+
+(define_expand "aarch64_sqrshrun_n<mode>"
+  [(match_operand:<VNARROWQ> 0 "register_operand")
+   (match_operand:SD_HSDI 1 "register_operand")
+   (match_operand:SI 2 "aarch64_simd_shift_imm_offset_<ve_mode>")]
+  "TARGET_SIMD"
+  {
+    int prec = GET_MODE_UNIT_PRECISION (<V2XWIDE>mode);
+    wide_int rnd_wi = wi::set_bit_in_zero (INTVAL (operands[2]) - 1, prec);
+    rtx rnd = immed_wide_int_const (rnd_wi, <V2XWIDE>mode);
+    rtx dst = gen_reg_rtx (<V2XWIDE>mode);
+    emit_insn (gen_aarch64_sqrshrun_n<mode>_insn (dst, operands[1], operands[2], rnd));
+    emit_move_insn (operands[0], gen_lowpart (<VNARROWQ>mode, dst));
+    DONE;
+  }
+)
+
 (define_expand "aarch64_sqrshrun_n<mode>"
   [(set (match_operand:<VNARROWQ> 0 "register_operand")
        (truncate:<VNARROWQ>
index e8c62c88b149aef9a8f56cfa592e20f29e4b3e17..acc7a3ec46ecd0ec47f4433e086b535fd33d854e 100644 (file)
 (define_mode_attr V2XWIDE [(V8QI "V8HI") (V4HI "V4SI")
                           (V16QI "V16HI") (V8HI "V8SI")
                           (V2SI "V2DI") (V4SI "V4DI")
-                          (V2DI "V2TI") (DI "TI")])
+                          (V2DI "V2TI") (DI "TI")
+                          (HI "SI") (SI "DI")])
 
 ;; Predicate mode associated with VWIDE.
 (define_mode_attr VWIDE_PRED [(VNx8HF "VNx4BI") (VNx4SF "VNx2BI")])