]> git.ipfire.org Git - thirdparty/gcc.git/commitdiff
IBM Z: Get rid of vec merge unspec
authorAndreas Krebbel <krebbel@linux.ibm.com>
Wed, 4 Aug 2021 16:40:09 +0000 (18:40 +0200)
committerAndreas Krebbel <krebbel@linux.ibm.com>
Wed, 4 Aug 2021 16:40:09 +0000 (18:40 +0200)
This patch gets rid of the unspecs we were using for the vector merge
instruction and replaces it with generic rtx.

gcc/ChangeLog:

* config/s390/s390-modes.def: Add more vector modes to support
concatenation of two vectors.
* config/s390/s390-protos.h (s390_expand_merge_perm_const): Add
prototype.
(s390_expand_merge): Likewise.
* config/s390/s390.c (s390_expand_merge_perm_const): New function.
(s390_expand_merge): New function.
* config/s390/s390.md (UNSPEC_VEC_MERGEH, UNSPEC_VEC_MERGEL):
Remove constant definitions.
* config/s390/vector.md (V_HW_2): Add mode iterators.
(VI_HW_4, V_HW_4): Rename VI_HW_4 to V_HW_4.
(vec_2x_nelts, vec_2x_wide): New mode attributes.
(*vmrhb, *vmrlb, *vmrhh, *vmrlh, *vmrhf, *vmrlf, *vmrhg, *vmrlg):
New pattern definitions.
(vec_widen_umult_lo_<mode>, vec_widen_umult_hi_<mode>)
(vec_widen_smult_lo_<mode>, vec_widen_smult_hi_<mode>)
(vec_unpacks_lo_v4sf, vec_unpacks_hi_v4sf, vec_unpacks_lo_v2df)
(vec_unpacks_hi_v2df): Adjust expanders to emit non-unspec RTX for
vec merge.
* config/s390/vx-builtins.md (V_HW_4): Remove mode iterator. Now
in vector.md.
(vec_mergeh<mode>, vec_mergel<mode>): Use s390_expand_merge to
emit vec merge pattern.

gcc/testsuite/ChangeLog:

* gcc.target/s390/vector/long-double-asm-in-out-hard-fp-reg.c:
Instead of vpdi with 0 and 5 vmrlg and vmrhg are used now.
* gcc.target/s390/vector/long-double-asm-inout-hard-fp-reg.c: Likewise.
* gcc.target/s390/zvector/vec-types.h: New test.
* gcc.target/s390/zvector/vec_merge.c: New test.

gcc/config/s390/s390-modes.def
gcc/config/s390/s390-protos.h
gcc/config/s390/s390.c
gcc/config/s390/s390.md
gcc/config/s390/vector.md
gcc/config/s390/vx-builtins.md
gcc/testsuite/gcc.target/s390/vector/long-double-asm-in-out-hard-fp-reg.c
gcc/testsuite/gcc.target/s390/vector/long-double-asm-inout-hard-fp-reg.c
gcc/testsuite/gcc.target/s390/zvector/vec-types.h [new file with mode: 0644]
gcc/testsuite/gcc.target/s390/zvector/vec_merge.c [new file with mode: 0644]

index 6d814fc490c94508a6f26c61d2b0fe88f7d99333..245c2b811d49c0a30b8ec355294bc094babd46ae 100644 (file)
@@ -259,14 +259,17 @@ CC_MODE (CCVFANY);
 
 /* Vector modes.  */
 
-VECTOR_MODES (INT, 2);        /*                 V2QI */
-VECTOR_MODES (INT, 4);        /*            V4QI V2HI */
-VECTOR_MODES (INT, 8);        /*       V8QI V4HI V2SI */
-VECTOR_MODES (INT, 16);       /* V16QI V8HI V4SI V2DI */
+VECTOR_MODES (INT, 2);        /*                       V2QI */
+VECTOR_MODES (INT, 4);        /*                  V4QI V2HI */
+VECTOR_MODES (INT, 8);        /*             V8QI V4HI V2SI */
+VECTOR_MODES (INT, 16);       /*       V16QI V8HI V4SI V2DI */
+VECTOR_MODES (INT, 32);       /* V32QI V16HI V8SI V4DI V2TI */
 
 VECTOR_MODE (FLOAT, SF, 2);   /* V2SF */
 VECTOR_MODE (FLOAT, SF, 4);   /* V4SF */
+VECTOR_MODE (FLOAT, SF, 8);   /* V8SF */
 VECTOR_MODE (FLOAT, DF, 2);   /* V2DF */
+VECTOR_MODE (FLOAT, DF, 4);   /* V4DF */
 
 VECTOR_MODE (INT, QI, 1);     /* V1QI */
 VECTOR_MODE (INT, HI, 1);     /* V1HI */
index 289e018cf0fdb0bf35c93cdbaafbe946de5387b2..4b03c6e99f5e5b3558849f9d444a6daf328562ce 100644 (file)
@@ -122,6 +122,8 @@ extern void s390_expand_vec_compare_cc (rtx, enum rtx_code, rtx, rtx, bool);
 extern enum rtx_code s390_reverse_condition (machine_mode, enum rtx_code);
 extern void s390_expand_vcond (rtx, rtx, rtx, enum rtx_code, rtx, rtx);
 extern void s390_expand_vec_init (rtx, rtx);
+extern rtx s390_expand_merge_perm_const (machine_mode, bool);
+extern void s390_expand_merge (rtx, rtx, rtx, bool);
 extern rtx s390_build_signbit_mask (machine_mode);
 extern rtx s390_return_addr_rtx (int, rtx);
 extern rtx s390_back_chain_rtx (void);
index 8c7d36675f5270e91278c92ec6987661e6dc1725..3f4521ebf8e12a6b20532680a01b02723f3b9b33 100644 (file)
@@ -7014,6 +7014,42 @@ s390_expand_vec_init (rtx target, rtx vals)
     }
 }
 
+/* Return a parallel of constant integers to be used as permutation
+   vector for a vector merge operation in MODE.  If HIGH_P is true the
+   left-most elements of the source vectors are merged otherwise the
+   right-most elements.  */
+rtx
+s390_expand_merge_perm_const (machine_mode mode, bool high_p)
+{
+  int nelts = GET_MODE_NUNITS (mode);
+  rtx perm[16];
+  int addend = high_p ? 0 : nelts;
+
+  for (int i = 0; i < nelts; i++)
+    perm[i] = GEN_INT ((i + addend) / 2 + (i % 2) * nelts);
+
+  return gen_rtx_PARALLEL (VOIDmode, gen_rtvec_v (nelts, perm));
+}
+
+/* Emit RTL to implement a vector merge operation of SRC1 and SRC2
+   which creates the result in TARGET. HIGH_P determines whether a
+   merge hi or lo will be generated.  */
+void
+s390_expand_merge (rtx target, rtx src1, rtx src2, bool high_p)
+{
+  machine_mode mode = GET_MODE (target);
+  opt_machine_mode opt_mode_2x = mode_for_vector (GET_MODE_INNER (mode),
+                                                 2 * GET_MODE_NUNITS (mode));
+  gcc_assert (opt_mode_2x.exists ());
+  machine_mode mode_double_nelts = opt_mode_2x.require ();
+  rtx constv = s390_expand_merge_perm_const (mode, high_p);
+  src1 = force_reg (GET_MODE (src1), src1);
+  src2 = force_reg (GET_MODE (src2), src2);
+  rtx x = gen_rtx_VEC_CONCAT (mode_double_nelts, src1, src2);
+  x = gen_rtx_VEC_SELECT (mode, x, constv);
+  emit_insn (gen_rtx_SET (target, x));
+}
+
 /* Emit a vector constant that contains 1s in each element's sign bit position
    and 0s in other positions.  MODE is the desired constant's mode.  */
 extern rtx
index 8ad21b0f4f70080b54a47cfd4663bd4d632bdcbf..d896faee0fb3cfd3d8b19547e24c9eddbaf89380 100644 (file)
    UNSPEC_VEC_LOAD_BNDRY
    UNSPEC_VEC_LOAD_LEN
    UNSPEC_VEC_LOAD_LEN_R
-   UNSPEC_VEC_MERGEH
-   UNSPEC_VEC_MERGEL
    UNSPEC_VEC_PACK
    UNSPEC_VEC_PACK_SATURATE
    UNSPEC_VEC_PACK_SATURATE_CC
index ab605b3d2cf3e679f22ff92c488434880c24f244..51c63328896779c197be99c8a3fd2d7803f233d6 100644 (file)
 (define_mode_iterator VI_HW_HSD [V8HI  V4SI V2DI])
 (define_mode_iterator VI_HW_HS  [V8HI  V4SI])
 (define_mode_iterator VI_HW_QH  [V16QI V8HI])
-(define_mode_iterator VI_HW_4   [V4SI V4SF])
+
+; Directly supported vector modes with a certain number of elements
+(define_mode_iterator V_HW_2   [V2DI V2DF])
+(define_mode_iterator V_HW_4   [V4SI V4SF])
 
 ; All integer vector modes supported in a vector register + TImode
 (define_mode_iterator VIT [V1QI V2QI V4QI V8QI V16QI V1HI V2HI V4HI V8HI V1SI V2SI V4SI V1DI V2DI V1TI TI])
                       (DF "d") (V1DF "d") (V2DF "d")
                       (TF "x") (V1TF "x")])
 
-; Vector with doubled element size.
+; Vector with widened element size but half the number of elements.
 (define_mode_attr vec_double [(V1QI "V1HI") (V2QI "V1HI") (V4QI "V2HI") (V8QI "V4HI") (V16QI "V8HI")
                              (V1HI "V1SI") (V2HI "V1SI") (V4HI "V2SI") (V8HI "V4SI")
                              (V1SI "V1DI") (V2SI "V1DI") (V4SI "V2DI")
                              (V1DI "V1TI") (V2DI "V1TI")
                              (V1SF "V1DF") (V2SF "V1DF") (V4SF "V2DF")])
 
-; Vector with half the element size.
+; Vector with shrinked element size but twice the number of elements.
 (define_mode_attr vec_half [(V1HI "V2QI") (V2HI "V4QI") (V4HI "V8QI") (V8HI "V16QI")
                            (V1SI "V2HI") (V2SI "V4HI") (V4SI "V8HI")
                            (V1DI "V2SI") (V2DI "V4SI")
                            (V1DF "V2SF") (V2DF "V4SF")
                            (V1TF "V1DF")])
 
+; Vector with twice the number of elements but same element size.
+(define_mode_attr vec_2x_nelts [(V1QI "V2QI") (V2QI "V4QI") (V4QI "V8QI") (V8QI "V16QI") (V16QI "V32QI")
+                               (V1HI "V2HI") (V2HI "V4HI") (V4HI "V8HI") (V8HI "V16HI")
+                               (V1SI "V2SI") (V2SI "V4SI") (V4SI "V8SI")
+                               (V1DI "V2DI") (V2DI "V4DI")
+                               (V1SF "V2SF") (V2SF "V4SF") (V4SF "V8SF")
+                               (V1DF "V2DF") (V2DF "V4DF")])
+
+; Vector with widened element size and the same number of elements.
+(define_mode_attr vec_2x_wide [(V1QI "V1HI") (V2QI "V2HI") (V4QI "V4HI") (V8QI "V8HI") (V16QI "V16HI")
+                              (V1HI "V1SI") (V2HI "V2SI") (V4HI "V4SI") (V8HI "V8SI")
+                              (V1SI "V1DI") (V2SI "V2DI") (V4SI "V4DI")
+                              (V1DI "V1TI") (V2DI "V2TI")
+                              (V1SF "V1DF") (V2SF "V2DF") (V4SF "V4DF")
+                              (V1DF "V1TF") (V2DF "V2TF")])
+
 ; Vector with half the element size AND half the number of elements.
 (define_mode_attr vec_halfhalf
   [(V2HI "V2QI") (V4HI "V4QI") (V8HI "V8QI")
 })
 
 (define_insn "*vec_vllezlf<mode>"
-  [(set (match_operand:VI_HW_4              0 "register_operand" "=v")
-       (vec_concat:VI_HW_4
+  [(set (match_operand:V_HW_4              0 "register_operand" "=v")
+       (vec_concat:V_HW_4
         (vec_concat:<vec_halfnumelts>
          (match_operand:<non_vec> 1 "memory_operand"    "R")
          (const_int 0))
   "vperm\t%v0,%v1,%v2,%v3"
   [(set_attr "op_type" "VRR")])
 
+(define_insn "*vmrhb"
+  [(set (match_operand:V16QI                     0 "register_operand" "=v")
+        (vec_select:V16QI
+         (vec_concat:V32QI (match_operand:V16QI 1 "register_operand"  "v")
+                           (match_operand:V16QI 2 "register_operand"  "v"))
+         (parallel [(const_int 0) (const_int 16)
+                    (const_int 1) (const_int 17)
+                    (const_int 2) (const_int 18)
+                    (const_int 3) (const_int 19)
+                    (const_int 4) (const_int 20)
+                    (const_int 5) (const_int 21)
+                    (const_int 6) (const_int 22)
+                    (const_int 7) (const_int 23)])))]
+  "TARGET_VX"
+  "vmrhb\t%0,%1,%2";
+  [(set_attr "op_type" "VRR")])
+
+(define_insn "*vmrlb"
+  [(set (match_operand:V16QI                     0 "register_operand" "=v")
+        (vec_select:V16QI
+         (vec_concat:V32QI (match_operand:V16QI 1 "register_operand"  "v")
+                           (match_operand:V16QI 2 "register_operand"  "v"))
+         (parallel [(const_int  8) (const_int 24)
+                    (const_int  9) (const_int 25)
+                    (const_int 10) (const_int 26)
+                    (const_int 11) (const_int 27)
+                    (const_int 12) (const_int 28)
+                    (const_int 13) (const_int 29)
+                    (const_int 14) (const_int 30)
+                    (const_int 15) (const_int 31)])))]
+  "TARGET_VX"
+  "vmrlb\t%0,%1,%2";
+  [(set_attr "op_type" "VRR")])
+
+(define_insn "*vmrhh"
+  [(set (match_operand:V8HI                     0 "register_operand" "=v")
+        (vec_select:V8HI
+         (vec_concat:V16HI (match_operand:V8HI 1 "register_operand"  "v")
+                           (match_operand:V8HI 2 "register_operand"  "v"))
+         (parallel [(const_int 0) (const_int 8)
+                    (const_int 1) (const_int 9)
+                    (const_int 2) (const_int 10)
+                    (const_int 3) (const_int 11)])))]
+  "TARGET_VX"
+  "vmrhh\t%0,%1,%2";
+  [(set_attr "op_type" "VRR")])
+
+(define_insn "*vmrlh"
+  [(set (match_operand:V8HI                     0 "register_operand" "=v")
+        (vec_select:V8HI
+         (vec_concat:V16HI (match_operand:V8HI 1 "register_operand"  "v")
+                           (match_operand:V8HI 2 "register_operand"  "v"))
+         (parallel [(const_int 4) (const_int 12)
+                    (const_int 5) (const_int 13)
+                    (const_int 6) (const_int 14)
+                    (const_int 7) (const_int 15)])))]
+  "TARGET_VX"
+  "vmrlh\t%0,%1,%2";
+  [(set_attr "op_type" "VRR")])
+
+(define_insn "*vmrhf"
+  [(set (match_operand:V_HW_4                              0 "register_operand" "=v")
+        (vec_select:V_HW_4
+         (vec_concat:<vec_2x_nelts> (match_operand:V_HW_4 1 "register_operand"  "v")
+                                    (match_operand:V_HW_4 2 "register_operand"  "v"))
+         (parallel [(const_int 0) (const_int 4)
+                    (const_int 1) (const_int 5)])))]
+  "TARGET_VX"
+  "vmrhf\t%0,%1,%2";
+  [(set_attr "op_type" "VRR")])
+
+(define_insn "*vmrlf"
+  [(set (match_operand:V_HW_4                              0 "register_operand" "=v")
+        (vec_select:V_HW_4
+         (vec_concat:<vec_2x_nelts> (match_operand:V_HW_4 1 "register_operand"  "v")
+                                    (match_operand:V_HW_4 2 "register_operand"  "v"))
+         (parallel [(const_int 2) (const_int 6)
+                    (const_int 3) (const_int 7)])))]
+  "TARGET_VX"
+  "vmrlf\t%0,%1,%2";
+  [(set_attr "op_type" "VRR")])
+
+(define_insn "*vmrhg"
+  [(set (match_operand:V_HW_2                              0 "register_operand" "=v")
+        (vec_select:V_HW_2
+         (vec_concat:<vec_2x_nelts> (match_operand:V_HW_2 1 "register_operand"  "v")
+                                    (match_operand:V_HW_2 2 "register_operand"  "v"))
+         (parallel [(const_int 0) (const_int 2)])))]
+  "TARGET_VX"
+  "vmrhg\t%0,%1,%2";
+  [(set_attr "op_type" "VRR")])
+
+(define_insn "*vmrlg"
+  [(set (match_operand:V_HW_2                              0 "register_operand" "=v")
+        (vec_select:V_HW_2
+         (vec_concat:<vec_2x_nelts> (match_operand:V_HW_2 1 "register_operand"  "v")
+                                    (match_operand:V_HW_2 2 "register_operand"  "v"))
+         (parallel [(const_int 1) (const_int 3)])))]
+  "TARGET_VX"
+  "vmrlg\t%0,%1,%2";
+  [(set_attr "op_type" "VRR")])
+
+
 (define_insn "*tf_to_fprx2_0"
   [(set (subreg:DF (match_operand:FPRX2 0 "nonimmediate_operand" "+f") 0)
        (subreg:DF (match_operand:TF    1 "general_operand"       "v") 0))]
        (unspec:<vec_double> [(match_dup 1) (match_dup 2)]
                             UNSPEC_VEC_UMULT_ODD))
    (set (match_operand:<vec_double>                 0 "register_operand" "")
-       (unspec:<vec_double> [(match_dup 3) (match_dup 4)]
-                            UNSPEC_VEC_MERGEL))]
+        (vec_select:<vec_double>
+        (vec_concat:<vec_2x_wide> (match_dup 3) (match_dup 4))
+        (match_dup 5)))]
   "TARGET_VX"
  {
    operands[3] = gen_reg_rtx (<vec_double>mode);
    operands[4] = gen_reg_rtx (<vec_double>mode);
+   operands[5] = s390_expand_merge_perm_const (<vec_double>mode, false);
  })
 
 (define_expand "vec_widen_umult_hi_<mode>"
        (unspec:<vec_double> [(match_dup 1) (match_dup 2)]
                             UNSPEC_VEC_UMULT_ODD))
    (set (match_operand:<vec_double>                 0 "register_operand" "")
-       (unspec:<vec_double> [(match_dup 3) (match_dup 4)]
-                            UNSPEC_VEC_MERGEH))]
+        (vec_select:<vec_double>
+        (vec_concat:<vec_2x_wide> (match_dup 3) (match_dup 4))
+        (match_dup 5)))]
   "TARGET_VX"
  {
    operands[3] = gen_reg_rtx (<vec_double>mode);
    operands[4] = gen_reg_rtx (<vec_double>mode);
+   operands[5] = s390_expand_merge_perm_const (<vec_double>mode, true);
  })
 
 (define_expand "vec_widen_smult_lo_<mode>"
        (unspec:<vec_double> [(match_dup 1) (match_dup 2)]
                             UNSPEC_VEC_SMULT_ODD))
    (set (match_operand:<vec_double>                 0 "register_operand" "")
-       (unspec:<vec_double> [(match_dup 3) (match_dup 4)]
-                            UNSPEC_VEC_MERGEL))]
+        (vec_select:<vec_double>
+        (vec_concat:<vec_2x_wide> (match_dup 3) (match_dup 4))
+        (match_dup 5)))]
   "TARGET_VX"
  {
    operands[3] = gen_reg_rtx (<vec_double>mode);
    operands[4] = gen_reg_rtx (<vec_double>mode);
+   operands[5] = s390_expand_merge_perm_const (<vec_double>mode, false);
  })
 
 (define_expand "vec_widen_smult_hi_<mode>"
        (unspec:<vec_double> [(match_dup 1) (match_dup 2)]
                             UNSPEC_VEC_SMULT_ODD))
    (set (match_operand:<vec_double>                 0 "register_operand" "")
-       (unspec:<vec_double> [(match_dup 3) (match_dup 4)]
-                            UNSPEC_VEC_MERGEH))]
+        (vec_select:<vec_double>
+        (vec_concat:<vec_2x_wide> (match_dup 3) (match_dup 4))
+        (match_dup 5)))]
   "TARGET_VX"
  {
    operands[3] = gen_reg_rtx (<vec_double>mode);
    operands[4] = gen_reg_rtx (<vec_double>mode);
+   operands[5] = s390_expand_merge_perm_const (<vec_double>mode, true);
  })
 
 ; vec_widen_ushiftl_hi
 
 (define_expand "vec_unpacks_lo_v4sf"
   [(set (match_dup 2)
-       (unspec:V4SF [(match_operand:V4SF 1 "register_operand" "v")
-                     (match_dup 1)]
-                    UNSPEC_VEC_MERGEL))
-   (set (match_operand:V2DF               0 "register_operand" "=v")
+        (vec_select:V4SF
+        (vec_concat:V8SF (match_operand:V4SF 1 "register_operand" "") (match_dup 1))
+        (match_dup 3)))
+   (set (match_operand:V2DF                   0 "register_operand" "")
        (float_extend:V2DF
         (vec_select:V2SF
          (match_dup 2)
          (parallel [(const_int 0) (const_int 2)]))))]
   "TARGET_VX"
-{ operands[2] = gen_reg_rtx(V4SFmode); })
+{
+  operands[2] = gen_reg_rtx(V4SFmode);
+  operands[3] = s390_expand_merge_perm_const (V4SFmode, false);
+})
 
 (define_expand "vec_unpacks_hi_v4sf"
   [(set (match_dup 2)
-       (unspec:V4SF [(match_operand:V4SF 1 "register_operand" "v")
-                     (match_dup 1)]
-                    UNSPEC_VEC_MERGEH))
-   (set (match_operand:V2DF               0 "register_operand" "=v")
+        (vec_select:V4SF
+        (vec_concat:V8SF (match_operand:V4SF 1 "register_operand" "") (match_dup 1))
+        (match_dup 3)))
+   (set (match_operand:V2DF                   0 "register_operand" "")
        (float_extend:V2DF
         (vec_select:V2SF
          (match_dup 2)
          (parallel [(const_int 0) (const_int 2)]))))]
   "TARGET_VX"
-{ operands[2] = gen_reg_rtx(V4SFmode); })
+{
+  operands[2] = gen_reg_rtx(V4SFmode);
+  operands[3] = s390_expand_merge_perm_const (V4SFmode, true);
+})
 
 
 ; double -> long double
 
 (define_expand "vec_unpacks_lo_v2df"
   [(set (match_dup 2)
-       (unspec:V2DF [(match_operand:V2DF 1 "register_operand" "v")
-                     (match_dup 1)]
-                    UNSPEC_VEC_MERGEL))
-   (set (match_operand:V1TF               0 "register_operand" "=v")
+        (vec_select:V2DF
+        (vec_concat:V4DF (match_operand:V2DF 1 "register_operand" "") (match_dup 1))
+        (match_dup 3)))
+   (set (match_operand:V1TF                   0 "register_operand" "")
        (float_extend:V1TF
         (vec_select:V1DF
          (match_dup 2)
          (parallel [(const_int 0)]))))]
   "TARGET_VXE"
-{ operands[2] = gen_reg_rtx (V2DFmode); })
+{
+  operands[2] = gen_reg_rtx (V2DFmode);
+  operands[3] = s390_expand_merge_perm_const (V2DFmode, false);
+})
 
 (define_expand "vec_unpacks_hi_v2df"
   [(set (match_dup 2)
-       (unspec:V2DF [(match_operand:V2DF 1 "register_operand" "v")
-                     (match_dup 1)]
-                    UNSPEC_VEC_MERGEH))
-   (set (match_operand:V1TF               0 "register_operand" "=v")
+        (vec_select:V2DF
+        (vec_concat:V4DF (match_operand:V2DF 1 "register_operand" "") (match_dup 1))
+        (match_dup 3)))
+   (set (match_operand:V1TF                   0 "register_operand" "")
        (float_extend:V1TF
         (vec_select:V1DF
          (match_dup 2)
          (parallel [(const_int 0)]))))]
   "TARGET_VXE"
-{ operands[2] = gen_reg_rtx (V2DFmode); })
+{
+  operands[2] = gen_reg_rtx (V2DFmode);
+  operands[3] = s390_expand_merge_perm_const (V2DFmode, true);
+})
 
 
 ; 2 x v2df -> 1 x v4sf
index 3df501b562ca1e2bfea5bc8017e9c2ce531ac655..5abe43b9e536a20fccf7a1bcf0dcfe15a10b7b8a 100644 (file)
@@ -22,7 +22,7 @@
 
 (define_mode_iterator V_HW_32_64 [V4SI V2DI V2DF (V4SF "TARGET_VXE")])
 (define_mode_iterator VI_HW_SD [V4SI V2DI])
-(define_mode_iterator V_HW_4 [V4SI V4SF])
+
 ; Full size vector modes with more than one element which are directly supported in vector registers by the hardware.
 (define_mode_iterator VEC_HW  [V16QI V8HI V4SI V2DI V2DF (V4SF "TARGET_VXE")])
 (define_mode_iterator VECF_HW [(V4SF "TARGET_VXE") V2DF])
   [(set_attr "op_type" "VRS,VRX,VSI")])
 
 
-; FIXME: The following two patterns might using vec_merge. But what is
-; the canonical form: (vec_select (vec_merge op0 op1)) or (vec_merge
-; (vec_select op0) (vec_select op1)
 ; vmrhb, vmrhh, vmrhf, vmrhg
-(define_insn "vec_mergeh<mode>"
-  [(set (match_operand:V_128_NOSINGLE                         0 "register_operand" "=v")
-       (unspec:V_128_NOSINGLE [(match_operand:V_128_NOSINGLE 1 "register_operand"  "v")
-                       (match_operand:V_128_NOSINGLE         2 "register_operand"  "v")]
-                      UNSPEC_VEC_MERGEH))]
+(define_expand "vec_mergeh<mode>"
+  [(match_operand:V_128_NOSINGLE 0 "register_operand" "")
+   (match_operand:V_128_NOSINGLE 1 "register_operand" "")
+   (match_operand:V_128_NOSINGLE 2 "register_operand" "")]
   "TARGET_VX"
-  "vmrh<bhfgq>\t%v0,%1,%2"
-  [(set_attr "op_type" "VRR")])
+{
+  s390_expand_merge (operands[0], operands[1], operands[2], true);
+  DONE;
+})
 
 ; vmrlb, vmrlh, vmrlf, vmrlg
-(define_insn "vec_mergel<mode>"
-  [(set (match_operand:V_128_NOSINGLE                         0 "register_operand" "=v")
-       (unspec:V_128_NOSINGLE [(match_operand:V_128_NOSINGLE 1 "register_operand"  "v")
-                       (match_operand:V_128_NOSINGLE         2 "register_operand"  "v")]
-                    UNSPEC_VEC_MERGEL))]
+(define_expand "vec_mergel<mode>"
+  [(match_operand:V_128_NOSINGLE 0 "register_operand" "")
+   (match_operand:V_128_NOSINGLE 1 "register_operand" "")
+   (match_operand:V_128_NOSINGLE 2 "register_operand" "")]
   "TARGET_VX"
-  "vmrl<bhfgq>\t%v0,%1,%2"
-  [(set_attr "op_type" "VRR")])
+{
+  s390_expand_merge (operands[0], operands[1], operands[2], false);
+  DONE;
+})
 
 
 ; Vector pack
index 2dcaf08f00b1b538f263617f2ae7bf2c41adeb5a..a89dd460c699a72f00dd4c0ecc59c7f3b8ffdf2c 100644 (file)
@@ -16,13 +16,13 @@ sqxbr (long double x)
   return out;
 }
 
-/* Ideally `vpdi %v3,%v1,%v3,5` should be optimized away, but the compiler
+/* Ideally `vmrlg %v3,%v1,%v3` should be optimized away, but the compiler
  * can't do it, because the UNSPEC pattern operates on the whole register.
  * Using the SUBREG pattern solves this problem, but it's fragile.
  */
-/* { dg-final { scan-assembler-times {\n\tvpdi\t%v2,%v0,%v2,5\n} 1 } } */
-/* { dg-final { scan-assembler-times {\n\tvpdi\t%v1,%v1,%v3,0\n} 2 } } */
-/* { dg-final { scan-assembler-times {\n\tvpdi\t%v3,%v1,%v3,5\n} 1 } } */
+/* { dg-final { scan-assembler-times {\n\tvmrlg\t%v2,%v0,%v2\n} 1 } } */
+/* { dg-final { scan-assembler-times {\n\tvmrhg\t%v1,%v1,%v3\n} 2 } } */
+/* { dg-final { scan-assembler-times {\n\tvmrlg\t%v3,%v1,%v3\n} 1 } } */
 
 int
 main (void)
index 6c5f88d8652d2c743e6253b794eb2ffd17c3d86c..dd894c8136bb9afeda0059dc29d07d8752b6dac2 100644 (file)
@@ -15,12 +15,12 @@ sqxbr (long double x)
   return inout;
 }
 
-/* Ideally there should be just one `vpdi %v6,%v4,%v6,5`, but the compiler
+/* Ideally there should be just one `vmrlg %v6,%v4,%v6`, but the compiler
  * can't optimize it away, because the UNSPEC pattern operates on the whole
  * register.  Using the SUBREG pattern solves this problem, but it's fragile.
  */
-/* { dg-final { scan-assembler-times {\n\tvpdi\t%v6,%v4,%v6,5\n} 2 } } */
-/* { dg-final { scan-assembler-times {\n\tvpdi\t%v4,%v4,%v6,0\n} 2 } } */
+/* { dg-final { scan-assembler-times {\n\tvmrlg\t%v6,%v4,%v6\n} 2 } } */
+/* { dg-final { scan-assembler-times {\n\tvmrhg\t%v4,%v4,%v6\n} 2 } } */
 
 int
 main (void)
diff --git a/gcc/testsuite/gcc.target/s390/zvector/vec-types.h b/gcc/testsuite/gcc.target/s390/zvector/vec-types.h
new file mode 100644 (file)
index 0000000..35bd2a5
--- /dev/null
@@ -0,0 +1,37 @@
+#ifndef VEC_TYPES_H
+#define VEC_TYPES_H 1
+
+#include <vecintrin.h>
+
+typedef __vector signed char v16qi;
+typedef __vector unsigned char uv16qi;
+
+typedef __vector signed short v8hi;
+typedef __vector unsigned short uv8hi;
+
+typedef __vector signed int v4si;
+typedef __vector unsigned int uv4si;
+
+typedef __vector signed long long v2di;
+typedef __vector unsigned long long uv2di;
+
+#if __SIZEOF_INT128__ == 16
+typedef __vector __int128_t v1ti;
+#endif
+
+typedef __vector double v2df;
+typedef __vector long double v1tf;
+
+#if __ARCH__ >= 12
+typedef __vector float v4sf;
+#endif
+
+#define GEN_SEQ_VEC(VEC_TYPE, ADDEND)                                  \
+  ({ VEC_TYPE dummy;                                                   \
+    const int elts = sizeof(VEC_TYPE) / sizeof(dummy[0]);              \
+    typeof(dummy[0]) __attribute__((aligned(8))) ar[elts];             \
+    for (int i = 0; i < elts; i++)                                     \
+      ar[i] = (typeof(dummy[0]))(i + (ADDEND));                                \
+    *(VEC_TYPE*)ar;})
+
+#endif
diff --git a/gcc/testsuite/gcc.target/s390/zvector/vec_merge.c b/gcc/testsuite/gcc.target/s390/zvector/vec_merge.c
new file mode 100644 (file)
index 0000000..348d1f6
--- /dev/null
@@ -0,0 +1,88 @@
+/* { dg-do compile } */
+/* { dg-options "-O3 -mzarch -march=z14 -mzvector --save-temps" } */
+/* { dg-do run { target { s390_z14_hw } } } */
+
+/* { dg-final { scan-assembler-times "\tvmrhb\t" 2 } } */
+/* { dg-final { scan-assembler-times "\tvmrlb\t" 2 } } */
+/* { dg-final { scan-assembler-times "\tvmrhh\t" 2 } } */
+/* { dg-final { scan-assembler-times "\tvmrlh\t" 2 } } */
+/* { dg-final { scan-assembler-times "\tvmrhf\t" 3 } } */
+/* { dg-final { scan-assembler-times "\tvmrlf\t" 3 } } */
+/* { dg-final { scan-assembler-times "\tvmrhg\t" 3 } } */
+/* { dg-final { scan-assembler-times "\tvmrlg\t" 3 } } */
+
+#include "vec-types.h"
+#include <vecintrin.h>
+
+#define GEN_MERGE(VEC_TYPE, HILO)                                      \
+  VEC_TYPE __attribute__((noinline))                                   \
+  merge_##HILO##_##VEC_TYPE(VEC_TYPE a, VEC_TYPE b) {                  \
+    return vec_merge##HILO (a, b); }
+
+GEN_MERGE(v16qi, l)
+GEN_MERGE(v16qi, h)
+GEN_MERGE(uv16qi, l)
+GEN_MERGE(uv16qi, h)
+
+GEN_MERGE(v8hi, l)
+GEN_MERGE(v8hi, h)
+GEN_MERGE(uv8hi, l)
+GEN_MERGE(uv8hi, h)
+
+GEN_MERGE(v4si, l)
+GEN_MERGE(v4si, h)
+GEN_MERGE(uv4si, l)
+GEN_MERGE(uv4si, h)
+
+GEN_MERGE(v4sf, l)
+GEN_MERGE(v4sf, h)
+
+GEN_MERGE(v2di, l)
+GEN_MERGE(v2di, h)
+GEN_MERGE(uv2di, l)
+GEN_MERGE(uv2di, h)
+
+GEN_MERGE(v2df, l)
+GEN_MERGE(v2df, h)
+
+
+#define CHECK_MERGE_LO(VEC_TYPE, SRC1, SRC2)                           \
+  {                                                                    \
+    VEC_TYPE v = merge_l_##VEC_TYPE ((SRC1), (SRC2));                  \
+    int elts = sizeof(v) / sizeof(v[0]);                               \
+    for (int i = 0; i < elts; i++)                                     \
+      if (v[i] != (i + elts) / 2 + (i % 2) * elts)                     \
+       __builtin_abort();                                              \
+  }
+
+#define CHECK_MERGE_HI(VEC_TYPE, SRC1, SRC2)                           \
+  {                                                                    \
+    VEC_TYPE v = merge_h_##VEC_TYPE ((SRC1), (SRC2));                  \
+    int elts = sizeof(v) / sizeof(v[0]);                               \
+    for (int i = 0; i < elts; i++)                                     \
+      if (v[i] != i / 2 + (i % 2) * elts)                              \
+       __builtin_abort();                                              \
+  }
+
+#define CHECK_MERGE(VEC_TYPE)                                          \
+  {                                                                    \
+    VEC_TYPE a = GEN_SEQ_VEC (VEC_TYPE, 0);                            \
+    VEC_TYPE b = GEN_SEQ_VEC (VEC_TYPE, sizeof(VEC_TYPE) / sizeof(a[0])); \
+    CHECK_MERGE_LO (VEC_TYPE, a, b);                                   \
+    CHECK_MERGE_HI (VEC_TYPE, a, b);                                   \
+  }
+
+int
+main ()
+{
+  CHECK_MERGE(v16qi);
+  CHECK_MERGE(uv16qi);
+  CHECK_MERGE(v8hi);
+  CHECK_MERGE(uv8hi);
+  CHECK_MERGE(v4si);
+  CHECK_MERGE(uv4si);
+  CHECK_MERGE(v4sf);
+  CHECK_MERGE(v2di);
+  CHECK_MERGE(uv2di);
+  CHECK_MERGE(v2df);
+}