static const unsigned char lo_perm_qi_swap[16]
= {17, 1, 19, 3, 21, 5, 23, 7, 25, 9, 27, 11, 29, 13, 31, 15};
+ static const unsigned char hi_perm_qi_di[16]
+ = {0, 1, 2, 3, 4, 5, 6, 7, 16, 17, 18, 19, 20, 21, 22, 23};
+ static const unsigned char hi_perm_qi_si[16]
+ = {0, 1, 2, 3, 16, 17, 18, 19, 4, 5, 6, 7, 20, 21, 22, 23};
+ static const unsigned char hi_perm_qi_hi[16]
+ = {0, 1, 16, 17, 2, 3, 18, 19, 4, 5, 20, 21, 6, 7, 22, 23};
+
+ static const unsigned char lo_perm_qi_di[16]
+ = {8, 9, 10, 11, 12, 13, 14, 15, 24, 25, 26, 27, 28, 29, 30, 31};
+ static const unsigned char lo_perm_qi_si[16]
+ = {8, 9, 10, 11, 24, 25, 26, 27, 12, 13, 14, 15, 28, 29, 30, 31};
+ static const unsigned char lo_perm_qi_hi[16]
+ = {8, 9, 24, 25, 10, 11, 26, 27, 12, 13, 28, 29, 14, 15, 30, 31};
+
+ static const unsigned char hi_perm_hi_si[8] = {0, 1, 8, 9, 2, 3, 10, 11};
+ static const unsigned char hi_perm_hi_di[8] = {0, 1, 2, 3, 8, 9, 10, 11};
+
+ static const unsigned char lo_perm_hi_si[8] = {4, 5, 12, 13, 6, 7, 14, 15};
+ static const unsigned char lo_perm_hi_di[8] = {4, 5, 6, 7, 12, 13, 14, 15};
+
+ static const unsigned char hi_perm_si_di[4] = {0, 1, 4, 5};
+
+ static const unsigned char lo_perm_si_di[4] = {2, 3, 6, 7};
+
bool merge_lo_p = false;
bool merge_hi_p = false;
bool swap_operands_p = false;
+ machine_mode mergemode = d.vmode;
if ((d.nelt == 2 && memcmp (d.perm, hi_perm_di, 2) == 0)
|| (d.nelt == 4 && memcmp (d.perm, hi_perm_si, 4) == 0)
merge_lo_p = true;
swap_operands_p = true;
}
+ else if (d.nelt == 16)
+ {
+ if (memcmp (d.perm, hi_perm_qi_di, 16) == 0)
+ {
+ merge_hi_p = true;
+ mergemode = E_V2DImode;
+ }
+ else if (memcmp (d.perm, hi_perm_qi_si, 16) == 0)
+ {
+ merge_hi_p = true;
+ mergemode = E_V4SImode;
+ }
+ else if (memcmp (d.perm, hi_perm_qi_hi, 16) == 0)
+ {
+ merge_hi_p = true;
+ mergemode = E_V8HImode;
+ }
+ else if (memcmp (d.perm, lo_perm_qi_di, 16) == 0)
+ {
+ merge_lo_p = true;
+ mergemode = E_V2DImode;
+ }
+ else if (memcmp (d.perm, lo_perm_qi_si, 16) == 0)
+ {
+ merge_lo_p = true;
+ mergemode = E_V4SImode;
+ }
+ else if (memcmp (d.perm, lo_perm_qi_hi, 16) == 0)
+ {
+ merge_lo_p = true;
+ mergemode = E_V8HImode;
+ }
+ }
+ else if (d.nelt == 8)
+ {
+ if (memcmp (d.perm, hi_perm_hi_di, 8) == 0)
+ {
+ merge_hi_p = true;
+ mergemode = E_V2DImode;
+ }
+ else if (memcmp (d.perm, hi_perm_hi_si, 8) == 0)
+ {
+ merge_hi_p = true;
+ mergemode = E_V4SImode;
+ }
+ else if (memcmp (d.perm, lo_perm_hi_di, 8) == 0)
+ {
+ merge_lo_p = true;
+ mergemode = E_V2DImode;
+ }
+ else if (memcmp (d.perm, lo_perm_hi_si, 8) == 0)
+ {
+ merge_lo_p = true;
+ mergemode = E_V4SImode;
+ }
+ }
+ else if (d.nelt == 4)
+ {
+ if (memcmp (d.perm, hi_perm_si_di, 4) == 0)
+ {
+ merge_hi_p = true;
+ mergemode = E_V2DImode;
+ }
+ else if (memcmp (d.perm, lo_perm_si_di, 4) == 0)
+ {
+ merge_lo_p = true;
+ mergemode = E_V2DImode;
+ }
+ }
if (!merge_lo_p && !merge_hi_p)
return false;
if (d.testing_p)
return merge_lo_p || merge_hi_p;
- rtx op0, op1;
+ rtx op0, op1, target = d.target;
if (swap_operands_p)
{
op0 = d.op1;
op0 = d.op0;
op1 = d.op1;
}
+ if (mergemode != d.vmode)
+ {
+ target = simplify_gen_subreg (mergemode, target, d.vmode, 0);
+ op0 = simplify_gen_subreg (mergemode, op0, d.vmode, 0);
+ op1 = simplify_gen_subreg (mergemode, op1, d.vmode, 0);
+ }
+
+ s390_expand_merge (target, op0, op1, merge_hi_p);
+
+ return true;
+}
+
+/* Try to expand the vector permute operation described by D using the vector
+ pack instruction vpk. Return true if vector pack could be used. */
+static bool
+expand_perm_with_pack (const struct expand_vec_perm_d &d)
+{
+ static const unsigned char qi_hi[16]
+ = {1, 3, 5, 7, 9, 11, 13, 15, 17, 19, 21, 23, 25, 27, 29, 31};
+ static const unsigned char qi_si[16]
+ = {2, 3, 6, 7, 10, 11, 14, 15, 18, 19, 22, 23, 26, 27, 30, 31};
+ static const unsigned char qi_di[16]
+ = {4, 5, 6, 7, 12, 13, 14, 15, 20, 21, 22, 23, 28, 29, 30, 31};
+
+ static const unsigned char hi_si[8]
+ = {1, 3, 5, 7, 9, 11, 13, 15};
+ static const unsigned char hi_di[8]
+ = {2, 3, 6, 7, 10, 11, 14, 15};
+
+ static const unsigned char si_di[4]
+ = {1, 3, 5, 7};
+
+ machine_mode packmode, resmode;
+ enum insn_code code = CODE_FOR_nothing;
+
+ if (d.nelt == 16 && memcmp (d.perm, qi_hi, 16) == 0)
+ {
+ packmode = E_V8HImode;
+ resmode = E_V16QImode;
+ code = CODE_FOR_vec_pack_trunc_v8hi;
+ }
+ else if ((d.nelt == 16 && memcmp (d.perm, qi_si, 16) == 0)
+ || (d.nelt == 8 && memcmp (d.perm, hi_si, 8) == 0))
+ {
+ packmode = E_V4SImode;
+ resmode = E_V8HImode;
+ code = CODE_FOR_vec_pack_trunc_v4si;
+ }
+ else if ((d.nelt == 16 && memcmp (d.perm, qi_di, 16) == 0)
+ || (d.nelt == 8 && memcmp (d.perm, hi_di, 8) == 0)
+ || (d.nelt == 4 && memcmp (d.perm, si_di, 4) == 0))
+ {
+ packmode = E_V2DImode;
+ resmode = E_V4SImode;
+ code = CODE_FOR_vec_pack_trunc_v2di;
+ }
- s390_expand_merge (d.target, op0, op1, merge_hi_p);
+ if (code == CODE_FOR_nothing)
+ return false;
+ if (d.testing_p)
+ return true;
+ rtx target = simplify_gen_subreg (resmode, d.target, d.vmode, 0);
+ rtx op0 = simplify_gen_subreg (packmode,
+ force_reg (GET_MODE (d.op0), d.op0),
+ d.vmode, 0);
+ rtx op1 = simplify_gen_subreg (packmode,
+ force_reg (GET_MODE (d.op1), d.op1),
+ d.vmode, 0);
+ rtx pat = GEN_FCN (code) (target, op0, op1);
+ emit_insn (pat);
return true;
}
if (expand_perm_with_merge (d))
return true;
+ if (expand_perm_with_pack (d))
+ return true;
+
if (expand_perm_with_vpdi (d))
return true;
--- /dev/null
+/* { dg-do compile } */
+/* { dg-options "-O3 -mzarch -march=z14 -mzvector --save-temps -fno-stack-protector" } */
+/* { dg-do run { target { s390_z14_hw } } } */
+/* { dg-final {check-function-bodies "**" "" } } */
+
+#include "vec-types.h"
+
+/*
+** qi_via_hi_hi:
+** vmrhh %v24,%v24,%v26
+** br %r14
+*/
+v16qi __attribute__((noinline,noipa))
+qi_via_hi_hi (v16qi a, v16qi b)
+{
+ return (v16qi){a[0], a[1], b[0], b[1], a[2], a[3], b[2], b[3],
+ a[4], a[5], b[4], b[5], a[6], a[7], b[6], b[7]};
+}
+
+/*
+** qi_via_hi_lo:
+** vmrlh %v24,%v24,%v26
+** br %r14
+*/
+v16qi __attribute__((noinline,noipa))
+qi_via_hi_lo (v16qi a, v16qi b)
+{
+ return (v16qi){a[8], a[9], b[8], b[9], a[10], a[11], b[10], b[11],
+ a[12], a[13], b[12], b[13], a[14], a[15], b[14], b[15]};
+}
+
+/*
+** qi_via_si_hi:
+** vmrhf %v24,%v24,%v26
+** br %r14
+*/
+v16qi __attribute__((noinline,noipa))
+qi_via_si_hi (v16qi a, v16qi b)
+{
+ return (v16qi){a[0], a[1], a[2], a[3], b[0], b[1], b[2], b[3],
+ a[4], a[5], a[6], a[7], b[4], b[5], b[6], b[7]};
+}
+
+/*
+** qi_via_si_lo:
+** vmrlf %v24,%v24,%v26
+** br %r14
+*/
+v16qi __attribute__((noinline,noipa))
+qi_via_si_lo (v16qi a, v16qi b)
+{
+ return (v16qi){a[8], a[9], a[10], a[11], b[8], b[9], b[10], b[11],
+ a[12], a[13], a[14], a[15], b[12], b[13], b[14], b[15]};
+}
+
+/*
+** qi_via_di_hi:
+** vmrhg %v24,%v24,%v26
+** br %r14
+*/
+v16qi __attribute__((noinline,noipa))
+qi_via_di_hi (v16qi a, v16qi b)
+{
+ return (v16qi){a[0], a[1], a[2], a[3], a[4], a[5], a[6], a[7],
+ b[0], b[1], b[2], b[3], b[4], b[5], b[6], b[7]};
+}
+
+/*
+** qi_via_di_lo:
+** vmrlg %v24,%v24,%v26
+** br %r14
+*/
+v16qi __attribute__((noinline,noipa))
+qi_via_di_lo (v16qi a, v16qi b)
+{
+ return (v16qi){a[8], a[9], a[10], a[11], a[12], a[13], a[14], a[15],
+ b[8], b[9], b[10], b[11], b[12], b[13], b[14], b[15]};
+}
+
+/*
+** hi_via_si_hi:
+** vmrhf %v24,%v24,%v26
+** br %r14
+*/
+v8hi __attribute__((noinline,noipa))
+hi_via_si_hi (v8hi a, v8hi b)
+{
+ return (v8hi){a[0], a[1], b[0], b[1], a[2], a[3], b[2], b[3]};
+}
+
+/*
+** hi_via_si_lo:
+** vmrlf %v24,%v24,%v26
+** br %r14
+*/
+v8hi __attribute__((noinline,noipa))
+hi_via_si_lo (v8hi a, v8hi b)
+{
+ return (v8hi){a[4], a[5], b[4], b[5], a[6], a[7], b[6], b[7]};
+}
+
+/*
+** hi_via_di_hi:
+** vmrhg %v24,%v24,%v26
+** br %r14
+*/
+v8hi __attribute__((noinline,noipa))
+hi_via_di_hi (v8hi a, v8hi b)
+{
+ return (v8hi){a[0], a[1], a[2], a[3], b[0], b[1], b[2], b[3]};
+}
+
+/*
+** hi_via_di_lo:
+** vmrlg %v24,%v24,%v26
+** br %r14
+*/
+v8hi __attribute__((noinline,noipa))
+hi_via_di_lo (v8hi a, v8hi b)
+{
+ return (v8hi){a[4], a[5], a[6], a[7], b[4], b[5], b[6], b[7]};
+}
+
+/*
+** si_via_di_hi:
+** vmrhg %v24,%v24,%v26
+** br %r14
+*/
+v4si __attribute__((noinline,noipa))
+si_via_di_hi (v4si a, v4si b)
+{
+ return (v4si){a[0], a[1], b[0], b[1]};
+}
+
+/*
+** si_via_di_lo:
+** vmrlg %v24,%v24,%v26
+** br %r14
+*/
+v4si __attribute__((noinline,noipa))
+si_via_di_lo (v4si a, v4si b)
+{
+ return (v4si){a[2], a[3], b[2], b[3]};
+}
+
+int
+main ()
+{
+ static const signed char e_qi_via_hi_hi[16]
+ = {0, 1, 16, 17, 2, 3, 18, 19, 4, 5, 20, 21, 6, 7, 22, 23};
+ static const signed char e_qi_via_hi_lo[16]
+ = {8, 9, 24, 25, 10, 11, 26, 27, 12, 13, 28, 29, 14, 15, 30, 31};
+ static const signed char e_qi_via_si_hi[16]
+ = {0, 1, 2, 3, 16, 17, 18, 19, 4, 5, 6, 7, 20, 21, 22, 23};
+ static const signed char e_qi_via_si_lo[16]
+ = {8, 9, 10, 11, 24, 25, 26, 27, 12, 13, 14, 15, 28, 29, 30, 31};
+ static const signed char e_qi_via_di_hi[16]
+ = {0, 1, 2, 3, 4, 5, 6, 7, 16, 17, 18, 19, 20, 21, 22, 23};
+ static const signed char e_qi_via_di_lo[16]
+ = {8, 9, 10, 11, 12, 13, 14, 15, 24, 25, 26, 27, 28, 29, 30, 31};
+
+ static const short e_hi_via_si_hi[8] = {0, 1, 8, 9, 2, 3, 10, 11};
+ static const short e_hi_via_si_lo[8] = {4, 5, 12, 13, 6, 7, 14, 15};
+ static const short e_hi_via_di_hi[8] = {0, 1, 2, 3, 8, 9, 10, 11};
+ static const short e_hi_via_di_lo[8] = {4, 5, 6, 7, 12, 13, 14, 15};
+
+ static const int e_si_via_di_hi[4] = {0, 1, 4, 5};
+ static const int e_si_via_di_lo[4] = {2, 3, 6, 7};
+
+ v16qi a_qi = {0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15};
+ v16qi b_qi = {16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31};
+ v8hi a_hi = {0, 1, 2, 3, 4, 5, 6, 7};
+ v8hi b_hi = {8, 9, 10, 11, 12, 13, 14, 15};
+ v4si a_si = {0, 1, 2, 3};
+ v4si b_si = {4, 5, 6, 7};
+ v16qi r_qi;
+ v8hi r_hi;
+ v4si r_si;
+ int i;
+
+ r_qi = qi_via_hi_hi (a_qi, b_qi);
+ for (i = 0; i < 16; ++i)
+ if (r_qi[i] != e_qi_via_hi_hi[i])
+ __builtin_abort ();
+
+ r_qi = qi_via_hi_lo (a_qi, b_qi);
+ for (i = 0; i < 16; ++i)
+ if (r_qi[i] != e_qi_via_hi_lo[i])
+ __builtin_abort ();
+
+ r_qi = qi_via_si_hi (a_qi, b_qi);
+ for (i = 0; i < 16; ++i)
+ if (r_qi[i] != e_qi_via_si_hi[i])
+ __builtin_abort ();
+
+ r_qi = qi_via_si_lo (a_qi, b_qi);
+ for (i = 0; i < 16; ++i)
+ if (r_qi[i] != e_qi_via_si_lo[i])
+ __builtin_abort ();
+
+ r_qi = qi_via_di_hi (a_qi, b_qi);
+ for (i = 0; i < 16; ++i)
+ if (r_qi[i] != e_qi_via_di_hi[i])
+ __builtin_abort ();
+
+ r_qi = qi_via_di_lo (a_qi, b_qi);
+ for (i = 0; i < 16; ++i)
+ if (r_qi[i] != e_qi_via_di_lo[i])
+ __builtin_abort ();
+
+ r_hi = hi_via_si_hi (a_hi, b_hi);
+ for (i = 0; i < 8; ++i)
+ if (r_hi[i] != e_hi_via_si_hi[i])
+ __builtin_abort ();
+
+ r_hi = hi_via_si_lo (a_hi, b_hi);
+ for (i = 0; i < 8; ++i)
+ if (r_hi[i] != e_hi_via_si_lo[i])
+ __builtin_abort ();
+
+ r_hi = hi_via_di_hi (a_hi, b_hi);
+ for (i = 0; i < 8; ++i)
+ if (r_hi[i] != e_hi_via_di_hi[i])
+ __builtin_abort ();
+
+ r_hi = hi_via_di_lo (a_hi, b_hi);
+ for (i = 0; i < 8; ++i)
+ if (r_hi[i] != e_hi_via_di_lo[i])
+ __builtin_abort ();
+
+ r_si = si_via_di_hi (a_si, b_si);
+ for (i = 0; i < 4; ++i)
+ if (r_si[i] != e_si_via_di_hi[i])
+ __builtin_abort ();
+
+ r_si = si_via_di_lo (a_si, b_si);
+ for (i = 0; i < 4; ++i)
+ if (r_si[i] != e_si_via_di_lo[i])
+ __builtin_abort ();
+
+ return 0;
+}
--- /dev/null
+/* { dg-do compile } */
+/* { dg-options "-O3 -mzarch -march=z14 -mzvector --save-temps -fno-stack-protector" } */
+/* { dg-do run { target { s390_z14_hw } } } */
+/* { dg-final { check-function-bodies "**" "" } } */
+
+#include "vec-types.h"
+
+/*
+** qi_via_hi:
+** vpkh %v24,%v24,%v26
+** br %r14
+*/
+v16qi __attribute__((noinline,noipa))
+qi_via_hi (v16qi a, v16qi b)
+{
+ return (v16qi){a[1], a[3], a[5], a[7], a[9], a[11], a[13], a[15],
+ b[1], b[3], b[5], b[7], b[9], b[11], b[13], b[15]};
+}
+
+/*
+** qi_via_si:
+** vpkf %v24,%v24,%v26
+** br %r14
+*/
+v16qi __attribute__((noinline,noipa))
+qi_via_si (v16qi a, v16qi b)
+{
+ return (v16qi){a[2], a[3], a[6], a[7], a[10], a[11], a[14], a[15],
+ b[2], b[3], b[6], b[7], b[10], b[11], b[14], b[15]};
+}
+
+/*
+** qi_via_di:
+** vpkg %v24,%v24,%v26
+** br %r14
+*/
+v16qi __attribute__((noinline,noipa))
+qi_via_di (v16qi a, v16qi b)
+{
+ return (v16qi){a[4], a[5], a[6], a[7], a[12], a[13], a[14], a[15],
+ b[4], b[5], b[6], b[7], b[12], b[13], b[14], b[15]};
+}
+
+/*
+** hi_via_si:
+** vpkf %v24,%v24,%v26
+** br %r14
+*/
+v8hi __attribute__((noinline,noipa))
+hi_via_si (v8hi a, v8hi b)
+{
+ return (v8hi){a[1], a[3], a[5], a[7], b[1], b[3], b[5], b[7]};
+}
+
+/*
+** hi_via_di:
+** vpkg %v24,%v24,%v26
+** br %r14
+*/
+v8hi __attribute__((noinline,noipa))
+hi_via_di (v8hi a, v8hi b)
+{
+ return (v8hi){a[2], a[3], a[6], a[7], b[2], b[3], b[6], b[7]};
+}
+
+/*
+** si_via_di:
+** vpkg %v24,%v24,%v26
+** br %r14
+*/
+v4si __attribute__((noinline,noipa))
+si_via_di (v4si a, v4si b)
+{
+ return (v4si){a[1], a[3], b[1], b[3]};
+}
+
+int
+main ()
+{
+ static const signed char e_qi_via_hi[16]
+ = {1, 3, 5, 7, 9, 11, 13, 15, 17, 19, 21, 23, 25, 27, 29, 31};
+ static const signed char e_qi_via_si[16]
+ = {2, 3, 6, 7, 10, 11, 14, 15, 18, 19, 22, 23, 26, 27, 30, 31};
+ static const signed char e_qi_via_di[16]
+ = {4, 5, 6, 7, 12, 13, 14, 15, 20, 21, 22, 23, 28, 29, 30, 31};
+
+ static const short e_hi_via_si[8] = {1, 3, 5, 7, 9, 11, 13, 15};
+ static const short e_hi_via_di[8] = {2, 3, 6, 7, 10, 11, 14, 15};
+
+ static const int e_si_via_di[4] = {1, 3, 5, 7};
+
+ v16qi a_qi = {0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15};
+ v16qi b_qi = {16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31};
+ v8hi a_hi = {0, 1, 2, 3, 4, 5, 6, 7};
+ v8hi b_hi = {8, 9, 10, 11, 12, 13, 14, 15};
+ v4si a_si = {0, 1, 2, 3};
+ v4si b_si = {4, 5, 6, 7};
+ v16qi r_qi;
+ v8hi r_hi;
+ v4si r_si;
+ int i;
+
+ r_qi = qi_via_hi (a_qi, b_qi);
+ for (i = 0; i < 16; ++i)
+ if (r_qi[i] != e_qi_via_hi[i])
+ __builtin_abort ();
+
+ r_qi = qi_via_si (a_qi, b_qi);
+ for (i = 0; i < 16; ++i)
+ if (r_qi[i] != e_qi_via_si[i])
+ __builtin_abort ();
+
+ r_qi = qi_via_di (a_qi, b_qi);
+ for (i = 0; i < 16; ++i)
+ if (r_qi[i] != e_qi_via_di[i])
+ __builtin_abort ();
+
+ r_hi = hi_via_si (a_hi, b_hi);
+ for (i = 0; i < 8; ++i)
+ if (r_hi[i] != e_hi_via_si[i])
+ __builtin_abort ();
+
+ r_hi = hi_via_di (a_hi, b_hi);
+ for (i = 0; i < 8; ++i)
+ if (r_hi[i] != e_hi_via_di[i])
+ __builtin_abort ();
+
+ r_si = si_via_di (a_si, b_si);
+ for (i = 0; i < 4; ++i)
+ if (r_si[i] != e_si_via_di[i])
+ __builtin_abort ();
+ return 0;
+}