]> git.ipfire.org Git - thirdparty/gcc.git/commitdiff
simplify-rtx: Push sign/zero-extension inside vec_duplicate
authorJonathan Wright <jonathan.wright@arm.com>
Fri, 16 Jul 2021 14:34:38 +0000 (15:34 +0100)
committerJonathan Wright <jonathan.wright@arm.com>
Tue, 27 Jul 2021 09:42:33 +0000 (10:42 +0100)
As a general principle, vec_duplicate should be as close to the root
of an expression as possible. Where unary operations have
vec_duplicate as an argument, these operations should be pushed
inside the vec_duplicate.

This patch modifies unary operation simplification to push
sign/zero-extension of a scalar inside vec_duplicate.

This patch also updates all RTL patterns in aarch64-simd.md to use
the new canonical form.

gcc/ChangeLog:

2021-07-19  Jonathan Wright  <jonathan.wright@arm.com>

* config/aarch64/aarch64-simd.md: Push sign/zero-extension
inside vec_duplicate for all patterns.
* simplify-rtx.c (simplify_context::simplify_unary_operation_1):
Push sign/zero-extension inside vec_duplicate.

gcc/config/aarch64/aarch64-simd.md
gcc/simplify-rtx.c

index 13c86984df147f2033b81a2a5278252f5ac52779..c5638d096fa84a27b4ea397f62cd0d05a28e7c8c 100644 (file)
 
 (define_insn "aarch64_<su>mlal_hi_n<mode>_insn"
   [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
-        (plus:<VWIDE>
-          (mult:<VWIDE>
-              (ANY_EXTEND:<VWIDE> (vec_select:<VHALF>
-                 (match_operand:VQ_HSI 2 "register_operand" "w")
-                 (match_operand:VQ_HSI 3 "vect_par_cnst_hi_half" "")))
-              (ANY_EXTEND:<VWIDE> (vec_duplicate:<VCOND>
-                      (match_operand:<VEL> 4 "register_operand" "<h_con>"))))
-          (match_operand:<VWIDE> 1 "register_operand" "0")))]
+       (plus:<VWIDE>
+         (mult:<VWIDE>
+           (ANY_EXTEND:<VWIDE>
+             (vec_select:<VHALF>
+               (match_operand:VQ_HSI 2 "register_operand" "w")
+               (match_operand:VQ_HSI 3 "vect_par_cnst_hi_half" "")))
+           (vec_duplicate:<VWIDE>
+             (ANY_EXTEND:<VWIDE_S>
+               (match_operand:<VEL> 4 "register_operand" "<h_con>"))))
+         (match_operand:<VWIDE> 1 "register_operand" "0")))]
   "TARGET_SIMD"
   "<su>mlal2\t%0.<Vwtype>, %2.<Vtype>, %4.<Vetype>[0]"
   [(set_attr "type" "neon_mla_<Vetype>_long")]
 
 (define_insn "aarch64_<su>mlsl_hi_n<mode>_insn"
   [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
-        (minus:<VWIDE>
-          (match_operand:<VWIDE> 1 "register_operand" "0")
-          (mult:<VWIDE>
-            (ANY_EXTEND:<VWIDE> (vec_select:<VHALF>
-              (match_operand:VQ_HSI 2 "register_operand" "w")
-              (match_operand:VQ_HSI 3 "vect_par_cnst_hi_half" "")))
-            (ANY_EXTEND:<VWIDE> (vec_duplicate:<VCOND>
-                   (match_operand:<VEL> 4 "register_operand" "<h_con>"))))))]
+       (minus:<VWIDE>
+         (match_operand:<VWIDE> 1 "register_operand" "0")
+         (mult:<VWIDE>
+           (ANY_EXTEND:<VWIDE>
+             (vec_select:<VHALF>
+               (match_operand:VQ_HSI 2 "register_operand" "w")
+               (match_operand:VQ_HSI 3 "vect_par_cnst_hi_half" "")))
+           (vec_duplicate:<VWIDE>
+             (ANY_EXTEND:<VWIDE_S>
+               (match_operand:<VEL> 4 "register_operand" "<h_con>"))))))]
   "TARGET_SIMD"
   "<su>mlsl2\t%0.<Vwtype>, %2.<Vtype>, %4.<Vetype>[0]"
   [(set_attr "type" "neon_mla_<Vetype>_long")]
 
 (define_insn "aarch64_<su>mlal_n<mode>"
   [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
-        (plus:<VWIDE>
-          (mult:<VWIDE>
-            (ANY_EXTEND:<VWIDE>
-              (match_operand:VD_HSI 2 "register_operand" "w"))
-            (ANY_EXTEND:<VWIDE>
-              (vec_duplicate:VD_HSI
-                     (match_operand:<VEL> 3 "register_operand" "<h_con>"))))
-          (match_operand:<VWIDE> 1 "register_operand" "0")))]
+       (plus:<VWIDE>
+         (mult:<VWIDE>
+           (ANY_EXTEND:<VWIDE>
+             (match_operand:VD_HSI 2 "register_operand" "w"))
+           (vec_duplicate:<VWIDE>
+             (ANY_EXTEND:<VWIDE_S>
+               (match_operand:<VEL> 3 "register_operand" "<h_con>"))))
+         (match_operand:<VWIDE> 1 "register_operand" "0")))]
   "TARGET_SIMD"
   "<su>mlal\t%0.<Vwtype>, %2.<Vtype>, %3.<Vetype>[0]"
   [(set_attr "type" "neon_mla_<Vetype>_long")]
 
 (define_insn "aarch64_<su>mlsl_n<mode>"
   [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
-        (minus:<VWIDE>
-          (match_operand:<VWIDE> 1 "register_operand" "0")
-          (mult:<VWIDE>
-            (ANY_EXTEND:<VWIDE>
-              (match_operand:VD_HSI 2 "register_operand" "w"))
-            (ANY_EXTEND:<VWIDE>
-              (vec_duplicate:VD_HSI
-                     (match_operand:<VEL> 3 "register_operand" "<h_con>"))))))]
+       (minus:<VWIDE>
+         (match_operand:<VWIDE> 1 "register_operand" "0")
+         (mult:<VWIDE>
+           (ANY_EXTEND:<VWIDE>
+             (match_operand:VD_HSI 2 "register_operand" "w"))
+           (vec_duplicate:<VWIDE>
+             (ANY_EXTEND:<VWIDE_S>
+               (match_operand:<VEL> 3 "register_operand" "<h_con>"))))))]
   "TARGET_SIMD"
   "<su>mlsl\t%0.<Vwtype>, %2.<Vtype>, %3.<Vetype>[0]"
   [(set_attr "type" "neon_mla_<Vetype>_long")]
        (mult:<VWIDE>
          (ANY_EXTEND:<VWIDE>
            (match_operand:<VCOND> 1 "register_operand" "w"))
-         (ANY_EXTEND:<VWIDE>
-           (vec_duplicate:<VCOND>
+         (vec_duplicate:<VWIDE>
+           (ANY_EXTEND:<VWIDE_S>
              (vec_select:<VEL>
                (match_operand:VDQHS 2 "register_operand" "<vwx>")
                (parallel [(match_operand:SI 3 "immediate_operand" "i")]))))))]
 (define_insn "aarch64_<su>mull_hi_lane<mode>_insn"
   [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
        (mult:<VWIDE>
-         (ANY_EXTEND:<VWIDE> (vec_select:<VHALF>
-           (match_operand:VQ_HSI 1 "register_operand" "w")
-           (match_operand:VQ_HSI 2 "vect_par_cnst_hi_half" "")))
-         (ANY_EXTEND:<VWIDE> (vec_duplicate:<VHALF>
-           (vec_select:<VEL>
-             (match_operand:<VCOND> 3 "register_operand" "<vwx>")
-             (parallel [(match_operand:SI 4 "immediate_operand" "i")]))))))]
+         (ANY_EXTEND:<VWIDE>
+           (vec_select:<VHALF>
+             (match_operand:VQ_HSI 1 "register_operand" "w")
+             (match_operand:VQ_HSI 2 "vect_par_cnst_hi_half" "")))
+         (vec_duplicate:<VWIDE>
+           (ANY_EXTEND:<VWIDE_S>
+             (vec_select:<VEL>
+               (match_operand:<VCOND> 3 "register_operand" "<vwx>")
+               (parallel [(match_operand:SI 4 "immediate_operand" "i")]))))))]
   "TARGET_SIMD"
   {
     operands[4] = aarch64_endian_lane_rtx (<VCOND>mode, INTVAL (operands[4]));
 (define_insn "aarch64_<su>mull_hi_laneq<mode>_insn"
   [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
        (mult:<VWIDE>
-         (ANY_EXTEND:<VWIDE> (vec_select:<VHALF>
-           (match_operand:VQ_HSI 1 "register_operand" "w")
-           (match_operand:VQ_HSI 2 "vect_par_cnst_hi_half" "")))
-         (ANY_EXTEND:<VWIDE> (vec_duplicate:<VHALF>
-           (vec_select:<VEL>
-             (match_operand:<VCONQ> 3 "register_operand" "<vwx>")
-             (parallel [(match_operand:SI 4 "immediate_operand" "i")]))))))]
+         (ANY_EXTEND:<VWIDE>
+           (vec_select:<VHALF>
+             (match_operand:VQ_HSI 1 "register_operand" "w")
+             (match_operand:VQ_HSI 2 "vect_par_cnst_hi_half" "")))
+         (vec_duplicate:<VWIDE>
+           (ANY_EXTEND:<VWIDE_S>
+             (vec_select:<VEL>
+               (match_operand:<VCONQ> 3 "register_operand" "<vwx>")
+               (parallel [(match_operand:SI 4 "immediate_operand" "i")]))))))]
   "TARGET_SIMD"
   {
     operands[4] = aarch64_endian_lane_rtx (<VCONQ>mode, INTVAL (operands[4]));
 
 (define_insn "aarch64_<su>mull_n<mode>"
   [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
-        (mult:<VWIDE>
-          (ANY_EXTEND:<VWIDE>
-            (match_operand:VD_HSI 1 "register_operand" "w"))
-          (ANY_EXTEND:<VWIDE>
-            (vec_duplicate:<VCOND>
+       (mult:<VWIDE>
+         (ANY_EXTEND:<VWIDE>
+           (match_operand:VD_HSI 1 "register_operand" "w"))
+         (vec_duplicate:<VWIDE>
+           (ANY_EXTEND:<VWIDE_S>
              (match_operand:<VEL> 2 "register_operand" "<h_con>")))))]
   "TARGET_SIMD"
   "<su>mull\t%0.<Vwtype>, %1.<Vtype>, %2.<Vetype>[0]"
 (define_insn "aarch64_<su>mull_hi_n<mode>_insn"
   [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
        (mult:<VWIDE>
-         (ANY_EXTEND:<VWIDE> (vec_select:<VHALF>
-           (match_operand:VQ_HSI 1 "register_operand" "w")
-           (match_operand:VQ_HSI 3 "vect_par_cnst_hi_half" "")))
          (ANY_EXTEND:<VWIDE>
-           (vec_duplicate:<VCOND>
+           (vec_select:<VHALF>
+             (match_operand:VQ_HSI 1 "register_operand" "w")
+             (match_operand:VQ_HSI 3 "vect_par_cnst_hi_half" "")))
+         (vec_duplicate:<VWIDE>
+           (ANY_EXTEND:<VWIDE_S>
              (match_operand:<VEL> 2 "register_operand" "<h_con>")))))]
   "TARGET_SIMD"
   "<su>mull2\\t%0.<Vwtype>, %1.<Vtype>, %2.<Vetype>[0]"
          (mult:<VWIDE>
            (ANY_EXTEND:<VWIDE>
              (match_operand:<VCOND> 2 "register_operand" "w"))
-           (ANY_EXTEND:<VWIDE>
-             (vec_duplicate:<VCOND>
+           (vec_duplicate:<VWIDE>
+             (ANY_EXTEND:<VWIDE_S>
                (vec_select:<VEL>
                  (match_operand:VDQHS 3 "register_operand" "<vwx>")
                  (parallel [(match_operand:SI 4 "immediate_operand" "i")])))))
   [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
        (plus:<VWIDE>
          (mult:<VWIDE>
-           (ANY_EXTEND:<VWIDE> (vec_select:<VHALF>
-             (match_operand:VQ_HSI 2 "register_operand" "w")
-             (match_operand:VQ_HSI 3 "vect_par_cnst_hi_half" "")))
-           (ANY_EXTEND:<VWIDE> (vec_duplicate:<VHALF>
-             (vec_select:<VEL>
-               (match_operand:<VCOND> 4 "register_operand" "<vwx>")
-               (parallel [(match_operand:SI 5 "immediate_operand" "i")])))))
+           (ANY_EXTEND:<VWIDE>
+             (vec_select:<VHALF>
+               (match_operand:VQ_HSI 2 "register_operand" "w")
+               (match_operand:VQ_HSI 3 "vect_par_cnst_hi_half" "")))
+           (vec_duplicate:<VWIDE>
+             (ANY_EXTEND:<VWIDE_S>
+               (vec_select:<VEL>
+                 (match_operand:<VCOND> 4 "register_operand" "<vwx>")
+                 (parallel [(match_operand:SI 5 "immediate_operand" "i")])))))
          (match_operand:<VWIDE> 1 "register_operand" "0")))]
   "TARGET_SIMD"
   {
   [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
        (plus:<VWIDE>
          (mult:<VWIDE>
-           (ANY_EXTEND:<VWIDE> (vec_select:<VHALF>
-             (match_operand:VQ_HSI 2 "register_operand" "w")
-             (match_operand:VQ_HSI 3 "vect_par_cnst_hi_half" "")))
-           (ANY_EXTEND:<VWIDE> (vec_duplicate:<VHALF>
-             (vec_select:<VEL>
-               (match_operand:<VCONQ> 4 "register_operand" "<vwx>")
-               (parallel [(match_operand:SI 5 "immediate_operand" "i")])))))
+           (ANY_EXTEND:<VWIDE>
+             (vec_select:<VHALF>
+               (match_operand:VQ_HSI 2 "register_operand" "w")
+               (match_operand:VQ_HSI 3 "vect_par_cnst_hi_half" "")))
+           (vec_duplicate:<VWIDE>
+             (ANY_EXTEND:<VWIDE_S>
+               (vec_select:<VEL>
+                 (match_operand:<VCONQ> 4 "register_operand" "<vwx>")
+                 (parallel [(match_operand:SI 5 "immediate_operand" "i")])))))
          (match_operand:<VWIDE> 1 "register_operand" "0")))]
   "TARGET_SIMD"
   {
      (mult:<VWIDE>
        (ANY_EXTEND:<VWIDE>
         (match_operand:<VCOND> 2 "register_operand" "w"))
-       (ANY_EXTEND:<VWIDE>
-        (vec_duplicate:<VCOND>
+       (vec_duplicate:<VWIDE>
+        (ANY_EXTEND:<VWIDE_S>
           (vec_select:<VEL>
             (match_operand:VDQHS 3 "register_operand" "<vwx>")
             (parallel [(match_operand:SI 4 "immediate_operand" "i")])))))))]
        (minus:<VWIDE>
          (match_operand:<VWIDE> 1 "register_operand" "0")
          (mult:<VWIDE>
-           (ANY_EXTEND:<VWIDE> (vec_select:<VHALF>
-             (match_operand:VQ_HSI 2 "register_operand" "w")
-             (match_operand:VQ_HSI 3 "vect_par_cnst_hi_half" "")))
-           (ANY_EXTEND:<VWIDE> (vec_duplicate:<VHALF>
-             (vec_select:<VEL>
-               (match_operand:<VCOND> 4 "register_operand" "<vwx>")
-               (parallel [(match_operand:SI 5 "immediate_operand" "i")]))))
+           (ANY_EXTEND:<VWIDE>
+             (vec_select:<VHALF>
+               (match_operand:VQ_HSI 2 "register_operand" "w")
+               (match_operand:VQ_HSI 3 "vect_par_cnst_hi_half" "")))
+           (vec_duplicate:<VWIDE>
+             (ANY_EXTEND:<VWIDE_S>
+               (vec_select:<VEL>
+                 (match_operand:<VCOND> 4 "register_operand" "<vwx>")
+                 (parallel [(match_operand:SI 5 "immediate_operand" "i")]))))
          )))]
   "TARGET_SIMD"
   {
        (minus:<VWIDE>
          (match_operand:<VWIDE> 1 "register_operand" "0")
          (mult:<VWIDE>
-           (ANY_EXTEND:<VWIDE> (vec_select:<VHALF>
-             (match_operand:VQ_HSI 2 "register_operand" "w")
-             (match_operand:VQ_HSI 3 "vect_par_cnst_hi_half" "")))
-           (ANY_EXTEND:<VWIDE> (vec_duplicate:<VHALF>
-             (vec_select:<VEL>
-               (match_operand:<VCONQ> 4 "register_operand" "<vwx>")
-               (parallel [(match_operand:SI 5 "immediate_operand" "i")]))))
+           (ANY_EXTEND:<VWIDE>
+             (vec_select:<VHALF>
+               (match_operand:VQ_HSI 2 "register_operand" "w")
+               (match_operand:VQ_HSI 3 "vect_par_cnst_hi_half" "")))
+           (vec_duplicate:<VWIDE>
+             (ANY_EXTEND:<VWIDE_S>
+               (vec_select:<VEL>
+                 (match_operand:<VCONQ> 4 "register_operand" "<vwx>")
+                 (parallel [(match_operand:SI 5 "immediate_operand" "i")]))))
          )))]
   "TARGET_SIMD"
   {
            (mult:<VWIDE>
              (sign_extend:<VWIDE>
                (match_operand:VD_HSI 2 "register_operand" "w"))
-             (sign_extend:<VWIDE>
-               (vec_duplicate:VD_HSI
+             (vec_duplicate:<VWIDE>
+               (sign_extend:<VWIDE_S>
                  (vec_select:<VEL>
                    (match_operand:<VCOND> 3 "register_operand" "<vwx>")
                    (parallel [(match_operand:SI 4 "immediate_operand" "i")])))
-              ))
+             ))
            (const_int 1))
          (match_operand:<VWIDE> 1 "register_operand" "0")))]
   "TARGET_SIMD"
            (mult:<VWIDE>
              (sign_extend:<VWIDE>
                (match_operand:VD_HSI 2 "register_operand" "w"))
-             (sign_extend:<VWIDE>
-               (vec_duplicate:VD_HSI
+             (vec_duplicate:<VWIDE>
+               (sign_extend:<VWIDE_S>
                  (vec_select:<VEL>
                    (match_operand:<VCOND> 3 "register_operand" "<vwx>")
                    (parallel [(match_operand:SI 4 "immediate_operand" "i")])))
-              ))
+             ))
            (const_int 1))))]
   "TARGET_SIMD"
   {
            (mult:<VWIDE>
              (sign_extend:<VWIDE>
                (match_operand:VD_HSI 2 "register_operand" "w"))
-             (sign_extend:<VWIDE>
-               (vec_duplicate:VD_HSI
+             (vec_duplicate:<VWIDE>
+               (sign_extend:<VWIDE_S>
                  (vec_select:<VEL>
                    (match_operand:<VCONQ> 3 "register_operand" "<vwx>")
                    (parallel [(match_operand:SI 4 "immediate_operand" "i")])))
-              ))
+             ))
            (const_int 1))))]
   "TARGET_SIMD"
   {
            (mult:<VWIDE>
              (sign_extend:<VWIDE>
                (match_operand:VD_HSI 2 "register_operand" "w"))
-             (sign_extend:<VWIDE>
-               (vec_duplicate:VD_HSI
+             (vec_duplicate:<VWIDE>
+               (sign_extend:<VWIDE_S>
                  (vec_select:<VEL>
                    (match_operand:<VCONQ> 3 "register_operand" "<vwx>")
                    (parallel [(match_operand:SI 4 "immediate_operand" "i")])))
-              ))
+             ))
            (const_int 1))
          (match_operand:<VWIDE> 1 "register_operand" "0")))]
   "TARGET_SIMD"
              (mult:<VWIDE>
                (sign_extend:<VWIDE>
                      (match_operand:VD_HSI 2 "register_operand" "w"))
-               (sign_extend:<VWIDE>
-                 (vec_duplicate:VD_HSI
+               (vec_duplicate:<VWIDE>
+                 (sign_extend:<VWIDE_S>
                    (match_operand:<VEL> 3 "register_operand" "<vwx>"))))
              (const_int 1))))]
   "TARGET_SIMD"
              (mult:<VWIDE>
                (sign_extend:<VWIDE>
                      (match_operand:VD_HSI 2 "register_operand" "w"))
-               (sign_extend:<VWIDE>
-                 (vec_duplicate:VD_HSI
+               (vec_duplicate:<VWIDE>
+                 (sign_extend:<VWIDE_S>
                    (match_operand:<VEL> 3 "register_operand" "<vwx>"))))
              (const_int 1))
          (match_operand:<VWIDE> 1 "register_operand" "0")))]
          (ss_ashift:<VWIDE>
              (mult:<VWIDE>
                (sign_extend:<VWIDE>
-                  (vec_select:<VHALF>
-                    (match_operand:VQ_HSI 2 "register_operand" "w")
-                    (match_operand:VQ_HSI 5 "vect_par_cnst_hi_half" "")))
-               (sign_extend:<VWIDE>
-                  (vec_duplicate:<VHALF>
+                 (vec_select:<VHALF>
+                   (match_operand:VQ_HSI 2 "register_operand" "w")
+                   (match_operand:VQ_HSI 5 "vect_par_cnst_hi_half" "")))
+               (vec_duplicate:<VWIDE>
+                 (sign_extend:<VWIDE_S>
                    (vec_select:<VEL>
                      (match_operand:<VCOND> 3 "register_operand" "<vwx>")
                      (parallel [(match_operand:SI 4 "immediate_operand" "i")])
 
 (define_insn "aarch64_sqdmlal2_lane<mode>_internal"
   [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
-        (ss_plus:<VWIDE>
+       (ss_plus:<VWIDE>
          (ss_ashift:<VWIDE>
              (mult:<VWIDE>
                (sign_extend:<VWIDE>
-                  (vec_select:<VHALF>
-                    (match_operand:VQ_HSI 2 "register_operand" "w")
-                    (match_operand:VQ_HSI 5 "vect_par_cnst_hi_half" "")))
-               (sign_extend:<VWIDE>
-                  (vec_duplicate:<VHALF>
+                 (vec_select:<VHALF>
+                   (match_operand:VQ_HSI 2 "register_operand" "w")
+                   (match_operand:VQ_HSI 5 "vect_par_cnst_hi_half" "")))
+               (vec_duplicate:<VWIDE>
+                 (sign_extend:<VWIDE_S>
                    (vec_select:<VEL>
                      (match_operand:<VCOND> 3 "register_operand" "<vwx>")
                      (parallel [(match_operand:SI 4 "immediate_operand" "i")])
 
 (define_insn "aarch64_sqdmlsl2_laneq<mode>_internal"
   [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
-        (ss_minus:<VWIDE>
+       (ss_minus:<VWIDE>
          (match_operand:<VWIDE> 1 "register_operand" "0")
          (ss_ashift:<VWIDE>
              (mult:<VWIDE>
                (sign_extend:<VWIDE>
-                  (vec_select:<VHALF>
-                    (match_operand:VQ_HSI 2 "register_operand" "w")
-                    (match_operand:VQ_HSI 5 "vect_par_cnst_hi_half" "")))
-               (sign_extend:<VWIDE>
-                  (vec_duplicate:<VHALF>
+                 (vec_select:<VHALF>
+                   (match_operand:VQ_HSI 2 "register_operand" "w")
+                   (match_operand:VQ_HSI 5 "vect_par_cnst_hi_half" "")))
+               (vec_duplicate:<VWIDE>
+                 (sign_extend:<VWIDE_S>
                    (vec_select:<VEL>
                      (match_operand:<VCONQ> 3 "register_operand" "<vwx>")
                      (parallel [(match_operand:SI 4 "immediate_operand" "i")])
 
 (define_insn "aarch64_sqdmlal2_laneq<mode>_internal"
   [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
-        (ss_plus:<VWIDE>
+       (ss_plus:<VWIDE>
          (ss_ashift:<VWIDE>
              (mult:<VWIDE>
                (sign_extend:<VWIDE>
-                  (vec_select:<VHALF>
-                    (match_operand:VQ_HSI 2 "register_operand" "w")
-                    (match_operand:VQ_HSI 5 "vect_par_cnst_hi_half" "")))
-               (sign_extend:<VWIDE>
-                  (vec_duplicate:<VHALF>
+                 (vec_select:<VHALF>
+                   (match_operand:VQ_HSI 2 "register_operand" "w")
+                   (match_operand:VQ_HSI 5 "vect_par_cnst_hi_half" "")))
+               (vec_duplicate:<VWIDE>
+                 (sign_extend:<VWIDE_S>
                    (vec_select:<VEL>
                      (match_operand:<VCONQ> 3 "register_operand" "<vwx>")
                      (parallel [(match_operand:SI 4 "immediate_operand" "i")])
 
 (define_insn "aarch64_sqdmlsl2_n<mode>_internal"
   [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
-        (ss_minus:<VWIDE>
+       (ss_minus:<VWIDE>
          (match_operand:<VWIDE> 1 "register_operand" "0")
          (ss_ashift:<VWIDE>
            (mult:<VWIDE>
              (sign_extend:<VWIDE>
-                (vec_select:<VHALF>
-                  (match_operand:VQ_HSI 2 "register_operand" "w")
-                  (match_operand:VQ_HSI 4 "vect_par_cnst_hi_half" "")))
-             (sign_extend:<VWIDE>
-                (vec_duplicate:<VHALF>
+               (vec_select:<VHALF>
+                 (match_operand:VQ_HSI 2 "register_operand" "w")
+                 (match_operand:VQ_HSI 4 "vect_par_cnst_hi_half" "")))
+             (vec_duplicate:<VWIDE>
+               (sign_extend:<VWIDE_S>
                  (match_operand:<VEL> 3 "register_operand" "<vwx>"))))
            (const_int 1))))]
   "TARGET_SIMD"
 
 (define_insn "aarch64_sqdmlal2_n<mode>_internal"
   [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
-        (ss_plus:<VWIDE>
+       (ss_plus:<VWIDE>
          (ss_ashift:<VWIDE>
            (mult:<VWIDE>
              (sign_extend:<VWIDE>
-                (vec_select:<VHALF>
-                  (match_operand:VQ_HSI 2 "register_operand" "w")
-                  (match_operand:VQ_HSI 4 "vect_par_cnst_hi_half" "")))
-             (sign_extend:<VWIDE>
-                (vec_duplicate:<VHALF>
+               (vec_select:<VHALF>
+                 (match_operand:VQ_HSI 2 "register_operand" "w")
+                 (match_operand:VQ_HSI 4 "vect_par_cnst_hi_half" "")))
+             (vec_duplicate:<VWIDE>
+               (sign_extend:<VWIDE_S>
                  (match_operand:<VEL> 3 "register_operand" "<vwx>"))))
            (const_int 1))
          (match_operand:<VWIDE> 1 "register_operand" "0")))]
 
 (define_insn "aarch64_sqdmull_lane<mode>"
   [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
-        (ss_ashift:<VWIDE>
+       (ss_ashift:<VWIDE>
             (mult:<VWIDE>
               (sign_extend:<VWIDE>
                 (match_operand:VD_HSI 1 "register_operand" "w"))
-              (sign_extend:<VWIDE>
-                 (vec_duplicate:VD_HSI
-                   (vec_select:<VEL>
+              (vec_duplicate:<VWIDE>
+                (sign_extend:<VWIDE_S>
+                  (vec_select:<VEL>
                     (match_operand:<VCOND> 2 "register_operand" "<vwx>")
                     (parallel [(match_operand:SI 3 "immediate_operand" "i")])))
               ))
 
 (define_insn "aarch64_sqdmull_laneq<mode>"
   [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
-        (ss_ashift:<VWIDE>
+       (ss_ashift:<VWIDE>
             (mult:<VWIDE>
               (sign_extend:<VWIDE>
                 (match_operand:VD_HSI 1 "register_operand" "w"))
-              (sign_extend:<VWIDE>
-                 (vec_duplicate:VD_HSI
-                   (vec_select:<VEL>
+              (vec_duplicate:<VWIDE>
+                (sign_extend:<VWIDE_S>
+                  (vec_select:<VEL>
                     (match_operand:<VCONQ> 2 "register_operand" "<vwx>")
                     (parallel [(match_operand:SI 3 "immediate_operand" "i")])))
               ))
 
 (define_insn "aarch64_sqdmull_n<mode>"
   [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
-        (ss_ashift:<VWIDE>
+       (ss_ashift:<VWIDE>
             (mult:<VWIDE>
               (sign_extend:<VWIDE>
                 (match_operand:VD_HSI 1 "register_operand" "w"))
-              (sign_extend:<VWIDE>
-                 (vec_duplicate:VD_HSI
-                   (match_operand:<VEL> 2 "register_operand" "<vwx>")))
+              (vec_duplicate:<VWIDE>
+                (sign_extend:<VWIDE_S>
+                  (match_operand:<VEL> 2 "register_operand" "<vwx>")))
               )
             (const_int 1)))]
   "TARGET_SIMD"
 
 ;; vqdmull2
 
-
-
 (define_insn "aarch64_sqdmull2<mode>_internal"
   [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
         (ss_ashift:<VWIDE>
 
 (define_insn "aarch64_sqdmull2_lane<mode>_internal"
   [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
-        (ss_ashift:<VWIDE>
+       (ss_ashift:<VWIDE>
             (mult:<VWIDE>
               (sign_extend:<VWIDE>
                 (vec_select:<VHALF>
-                   (match_operand:VQ_HSI 1 "register_operand" "w")
-                   (match_operand:VQ_HSI 4 "vect_par_cnst_hi_half" "")))
-              (sign_extend:<VWIDE>
-                 (vec_duplicate:<VHALF>
-                   (vec_select:<VEL>
+                  (match_operand:VQ_HSI 1 "register_operand" "w")
+                  (match_operand:VQ_HSI 4 "vect_par_cnst_hi_half" "")))
+              (vec_duplicate:<VWIDE>
+                (sign_extend:<VWIDE_S>
+                  (vec_select:<VEL>
                     (match_operand:<VCOND> 2 "register_operand" "<vwx>")
                     (parallel [(match_operand:SI 3 "immediate_operand" "i")])))
               ))
 
 (define_insn "aarch64_sqdmull2_laneq<mode>_internal"
   [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
-        (ss_ashift:<VWIDE>
+       (ss_ashift:<VWIDE>
             (mult:<VWIDE>
               (sign_extend:<VWIDE>
                 (vec_select:<VHALF>
-                   (match_operand:VQ_HSI 1 "register_operand" "w")
-                   (match_operand:VQ_HSI 4 "vect_par_cnst_hi_half" "")))
-              (sign_extend:<VWIDE>
-                 (vec_duplicate:<VHALF>
-                   (vec_select:<VEL>
+                  (match_operand:VQ_HSI 1 "register_operand" "w")
+                  (match_operand:VQ_HSI 4 "vect_par_cnst_hi_half" "")))
+              (vec_duplicate:<VWIDE>
+                (sign_extend:<VWIDE_S>
+                  (vec_select:<VEL>
                     (match_operand:<VCONQ> 2 "register_operand" "<vwx>")
                     (parallel [(match_operand:SI 3 "immediate_operand" "i")])))
               ))
 
 (define_insn "aarch64_sqdmull2_n<mode>_internal"
   [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
-        (ss_ashift:<VWIDE>
+       (ss_ashift:<VWIDE>
             (mult:<VWIDE>
               (sign_extend:<VWIDE>
                 (vec_select:<VHALF>
-                   (match_operand:VQ_HSI 1 "register_operand" "w")
-                   (match_operand:VQ_HSI 3 "vect_par_cnst_hi_half" "")))
-              (sign_extend:<VWIDE>
-                 (vec_duplicate:<VHALF>
-                   (match_operand:<VEL> 2 "register_operand" "<vwx>")))
+                  (match_operand:VQ_HSI 1 "register_operand" "w")
+                  (match_operand:VQ_HSI 3 "vect_par_cnst_hi_half" "")))
+              (vec_duplicate:<VWIDE>
+                (sign_extend:<VWIDE_S>
+                  (match_operand:<VEL> 2 "register_operand" "<vwx>")))
               )
             (const_int 1)))]
   "TARGET_SIMD"
index fd306bfbba6d81d1c8cf2aec9191a10852ecfbb7..a719f57870fc5e1d363486e9c4c8df23aa006849 100644 (file)
@@ -1717,22 +1717,35 @@ simplify_context::simplify_unary_operation_1 (rtx_code code, machine_mode mode,
       && vec_duplicate_p (op, &elt)
       && code != VEC_DUPLICATE)
     {
-      /* Try applying the operator to ELT and see if that simplifies.
-        We can duplicate the result if so.
+      if (code == SIGN_EXTEND || code == ZERO_EXTEND)
+       /* Enforce a canonical order of VEC_DUPLICATE wrt other unary
+          operations by promoting VEC_DUPLICATE to the root of the expression
+          (as far as possible).  */
+       temp = simplify_gen_unary (code, GET_MODE_INNER (mode),
+                                  elt, GET_MODE_INNER (GET_MODE (op)));
+      else
+       /* Try applying the operator to ELT and see if that simplifies.
+          We can duplicate the result if so.
 
-        The reason we don't use simplify_gen_unary is that it isn't
-        necessarily a win to convert things like:
+          The reason we traditionally haven't used simplify_gen_unary
+          for these codes is that it didn't necessarily seem to be a
+          win to convert things like:
 
-          (neg:V (vec_duplicate:V (reg:S R)))
+            (neg:V (vec_duplicate:V (reg:S R)))
 
-        to:
+          to:
 
-          (vec_duplicate:V (neg:S (reg:S R)))
+            (vec_duplicate:V (neg:S (reg:S R)))
 
-        The first might be done entirely in vector registers while the
-        second might need a move between register files.  */
-      temp = simplify_unary_operation (code, GET_MODE_INNER (mode),
-                                      elt, GET_MODE_INNER (GET_MODE (op)));
+          The first might be done entirely in vector registers while the
+          second might need a move between register files.
+
+          However, there also cases where promoting the vec_duplicate is
+          more efficient, and there is definite value in having a canonical
+          form when matching instruction patterns.  We should consider
+          extending the simplify_gen_unary code above to more cases.  */
+       temp = simplify_unary_operation (code, GET_MODE_INNER (mode),
+                                        elt, GET_MODE_INNER (GET_MODE (op)));
       if (temp)
        return gen_vec_duplicate (mode, temp);
     }