--- /dev/null
+/* { dg-additional-options "-mavx2" { target avx2_runtime } } */
+
+#include "tree-vect.h"
+
+#define B 0
+#define G 1
+#define R 2
+
+int red = 153;
+int green = 66;
+int blue = 187;
+
+static void __attribute__((noipa))
+sub_left_prediction_bgr32(int *restrict dst, int *restrict src)
+{
+ for (int i = 0; i < 8; i++) {
+ int rt = src[i * 3 + R];
+ int gt = src[i * 3 + G];
+ int bt = src[i * 3 + B];
+
+ dst[i * 3 + R] = rt - red;
+ dst[i * 3 + G] = gt - green;
+ dst[i * 3 + B] = bt - blue;
+
+ red = rt;
+ green = gt;
+ blue = bt;
+ }
+}
+
+int main()
+{
+ int dst[8*3];
+ int src[8*3] = { 160, 73, 194, 17, 33, 99, 0, 12, 283, 87, 73, 11,
+ 9, 7, 1, 23, 19, 13, 77, 233, 97, 78, 2, 5 };
+ int dst2[8*3] = {-27, 7, 41, -143, -40, -95, -17, -21, 184, 87, 61,
+ -272, -78, -66, -10, 14, 12, 12, 54, 214, 84, 1, -231, -92};
+
+ check_vect ();
+
+ sub_left_prediction_bgr32(dst, src);
+
+#pragma GCC novector
+ for (int i = 0; i < 8*3; ++i)
+ if (dst[i] != dst2[i])
+ __builtin_abort();
+
+ return 0;
+}
--- /dev/null
+/* { dg-additional-options "-mavx2" { target avx2_runtime } } */
+
+#include <stddef.h>
+#include <stdint.h>
+#include <stdlib.h>
+#include "tree-vect.h"
+
+#define B 0
+#define G 1
+#define R 2
+#define A 3
+
+int red = 153;
+int green = 66;
+int blue = 187;
+int alpha = 255;
+
+static void __attribute__((noipa))
+sub_left_prediction_bgr32(uint8_t *restrict dst, uint8_t *restrict src, int w)
+{
+ for (int i = 0; i < 8; i++) {
+ int rt = src[i * 4 + R];
+ int gt = src[i * 4 + G];
+ int bt = src[i * 4 + B];
+ int at = src[i * 4 + A];
+
+ dst[i * 4 + R] = rt - red;
+ dst[i * 4 + G] = gt - green;
+ dst[i * 4 + B] = bt - blue;
+ dst[i * 4 + A] = at - alpha;
+
+ red = rt;
+ green = gt;
+ blue = bt;
+ alpha = at;
+ }
+}
+
+int main()
+{
+ check_vect ();
+
+ uint8_t *dst = calloc(36, sizeof(uint8_t));
+ uint8_t *src = calloc(36, sizeof(uint8_t));
+
+ src[R] = 160;
+ src[G] = 73;
+ src[B] = 194;
+ src[A] = 255;
+
+ sub_left_prediction_bgr32(dst, src, 33);
+ if (dst[R] != 7 || dst[B] != 7 || dst[A] != 0)
+ __builtin_abort();
+}
return false;
}
+ /* We need to be able to build a { ..., a, b } init vector with
+ dist number of distinct trailing values. Always possible
+ when dist == 1 or when nunits is constant or when the initializations
+ are uniform. */
+ tree uniform_initval = NULL_TREE;
+ edge pe = loop_preheader_edge (LOOP_VINFO_LOOP (loop_vinfo));
+ for (stmt_vec_info s : SLP_TREE_SCALAR_STMTS (slp_node))
+ {
+ gphi *phi = as_a <gphi *> (s->stmt);
+ if (! uniform_initval)
+ uniform_initval = PHI_ARG_DEF_FROM_EDGE (phi, pe);
+ else if (! operand_equal_p (uniform_initval,
+ PHI_ARG_DEF_FROM_EDGE (phi, pe)))
+ {
+ uniform_initval = NULL_TREE;
+ break;
+ }
+ }
+ if (!uniform_initval && !nunits.is_constant ())
+ {
+ if (dump_enabled_p ())
+ dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
+ "cannot build initialization vector for "
+ "first order recurrence\n");
+ return false;
+ }
+
/* First-order recurrence autovectorization needs to handle permutation
with indices = [nunits-1, nunits, nunits+1, ...]. */
vec_perm_builder sel (nunits, 1, 3);
return true;
}
- edge pe = loop_preheader_edge (LOOP_VINFO_LOOP (loop_vinfo));
- basic_block bb = gimple_bb (phi);
- tree preheader = PHI_ARG_DEF_FROM_EDGE (phi, pe);
- if (!useless_type_conversion_p (TREE_TYPE (vectype), TREE_TYPE (preheader)))
+ tree vec_init;
+ if (! uniform_initval)
{
- gimple_seq stmts = NULL;
- preheader = gimple_convert (&stmts, TREE_TYPE (vectype), preheader);
- gsi_insert_seq_on_edge_immediate (pe, stmts);
+ vec<constructor_elt, va_gc> *v = NULL;
+ vec_alloc (v, nunits.to_constant ());
+ for (unsigned i = 0; i < nunits.to_constant () - dist; ++i)
+ CONSTRUCTOR_APPEND_ELT (v, NULL_TREE,
+ build_zero_cst (TREE_TYPE (vectype)));
+ for (stmt_vec_info s : SLP_TREE_SCALAR_STMTS (slp_node))
+ {
+ gphi *phi = as_a <gphi *> (s->stmt);
+ tree preheader = PHI_ARG_DEF_FROM_EDGE (phi, pe);
+ if (!useless_type_conversion_p (TREE_TYPE (vectype),
+ TREE_TYPE (preheader)))
+ {
+ gimple_seq stmts = NULL;
+ preheader = gimple_convert (&stmts,
+ TREE_TYPE (vectype), preheader);
+ gsi_insert_seq_on_edge_immediate (pe, stmts);
+ }
+ CONSTRUCTOR_APPEND_ELT (v, NULL_TREE, preheader);
+ }
+ vec_init = build_constructor (vectype, v);
}
- tree vec_init = build_vector_from_val (vectype, preheader);
+ else
+ vec_init = uniform_initval;
vec_init = vect_init_vector (loop_vinfo, stmt_info, vec_init, vectype, NULL);
/* Create the vectorized first-order PHI node. */
tree vec_dest = vect_get_new_vect_var (vectype,
vect_simple_var, "vec_recur_");
+ basic_block bb = gimple_bb (phi);
gphi *new_phi = create_phi_node (vec_dest, bb);
add_phi_arg (new_phi, vec_init, pe, UNKNOWN_LOCATION);