unsigned int vec_flags;
unsigned int op_vec_flags;
bool one_vector_p;
+ bool zero_op0_p, zero_op1_p;
bool testing_p;
};
/* to_constant is safe since this routine is specific to Advanced SIMD
vectors. */
unsigned int nelt = d->perm.length ().to_constant ();
+
+ /* If one register is the constant vector of 0 then we only need
+ a one reg TBL and we map any accesses to the vector of 0 to -1. We can't
+ do this earlier since vec_perm_indices clamps elements to within range so
+ we can only do it during codegen. */
+ if (d->zero_op0_p)
+ d->op0 = d->op1;
+ else if (d->zero_op1_p)
+ d->op1 = d->op0;
+
for (unsigned int i = 0; i < nelt; ++i)
- /* If big-endian and two vectors we end up with a weird mixed-endian
- mode on NEON. Reverse the index within each word but not the word
- itself. to_constant is safe because we checked is_constant above. */
- rperm[i] = GEN_INT (BYTES_BIG_ENDIAN
- ? d->perm[i].to_constant () ^ (nelt - 1)
- : d->perm[i].to_constant ());
+ {
+ auto val = d->perm[i].to_constant ();
+
+ /* If we're selecting from a 0 vector, we can just use an out of range
+ index instead. */
+ if ((d->zero_op0_p && val < nelt) || (d->zero_op1_p && val >= nelt))
+ rperm[i] = constm1_rtx;
+ else
+ {
+ /* If we are remapping a zero register as the first parameter we need
+ to adjust the indices of the non-zero register. */
+ if (d->zero_op0_p)
+ val = val % nelt;
+
+ /* If big-endian and two vectors we end up with a weird mixed-endian
+ mode on NEON. Reverse the index within each word but not the word
+ itself. to_constant is safe because we checked is_constant
+ above. */
+ rperm[i] = GEN_INT (BYTES_BIG_ENDIAN ? val ^ (nelt - 1) : val);
+ }
+ }
sel = gen_rtx_CONST_VECTOR (vmode, gen_rtvec_v (nelt, rperm));
sel = force_reg (vmode, sel);
else
d.one_vector_p = false;
+ d.zero_op0_p = op0 == CONST0_RTX (op_mode);
+ d.zero_op1_p = op1 == CONST0_RTX (op_mode);
d.perm.new_vector (sel.encoding (), d.one_vector_p ? 1 : 2,
sel.nelts_per_input ());
d.vmode = vmode;
--- /dev/null
+/* { dg-do compile } */
+/* { dg-additional-options "-O1" } */
+
+typedef unsigned int v4si __attribute__ ((vector_size (16)));
+
+v4si f1 (v4si a)
+{
+ v4si zeros = {0,0,0,0};
+ return __builtin_shufflevector (a, zeros, 0, 5, 1, 6);
+}
+
+typedef unsigned short v8hi __attribute__ ((vector_size (16)));
+
+v8hi f2a (v8hi a)
+{
+ v8hi zeros = {0,0,0,0,0,0,0,0};
+ return __builtin_shufflevector (a, zeros, 0, 9, 1, 10, 2, 11, 3, 12);
+}
+
+v8hi f2b (v8hi a)
+{
+ v8hi zeros = {0,0,0,0,0,0,0,0};
+ return __builtin_shufflevector (a, zeros, 0, 5, 1, 6, 2, 7, 3, 8);
+}
+
+typedef unsigned char v16qi __attribute__ ((vector_size (16)));
+
+v16qi f3a (v16qi a)
+{
+ v16qi zeros = {0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0};
+ return __builtin_shufflevector (a, zeros, 0, 17, 1, 18, 2, 19, 3, 20, 4, 21, 5, 22, 6, 23, 7, 24);
+}
+
+v16qi f3b (v16qi a)
+{
+ v16qi zeros = {0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0};
+ return __builtin_shufflevector (a, zeros, 0, 5, 1, 6, 2, 7, 3, 8, 4, 9, 5, 10, 6, 11, 7, 12);
+}
+
+/* { dg-final { scan-assembler-times {tbl\tv[0-9]+.16b, \{v[0-9]+.16b\}, v[0-9]+.16b} 5 } } */
--- /dev/null
+/* { dg-do compile } */
+/* { dg-require-effective-target le } */
+/* { dg-additional-options "-O1" } */
+
+typedef unsigned int v4si __attribute__ ((vector_size (16)));
+
+v4si f1 (v4si a)
+{
+ v4si zeros = {0,0,0,0};
+ return __builtin_shufflevector (zeros, a, 0, 5, 1, 6);
+}
+
+v4si f2 (v4si a)
+{
+ v4si zeros = {0,0,0,0};
+ return __builtin_shufflevector (a, zeros, 0, 5, 1, 6);
+}
+
+/* { dg-final { scan-assembler-times {tbl\tv[0-9]+.16b, \{v[0-9]+.16b\}, v[0-9]+.16b} 2 } } */
+/* { dg-final { scan-assembler-times {(\.byte\s+-1\n\s+){4}(\.byte\s+[4-7]+\n\s+){4}(\.byte\s+-1\n\s+){4}(\.byte\s+(8|9|10|11)+\n?\s*){4}} 1 } } */