--- /dev/null
+/* PR tree-optimization/103126. */
+/* { dg-do compile } */
+/* { dg-options "-Ofast -mavx2 -ftree-vectorize -fdump-tree-vect-details" } */
+/* { dg-final { scan-tree-dump-times "vectorized \[1-3] loops" 3 "vect" } } */
+#include<stdint.h>
+
+void xor_bit_arr_nolcd (uint64_t *__restrict mat, uint64_t* a,uint64_t* b, uint64_t *__restrict ans,
+ int64_t n)
+{
+ int64_t i;
+ uint64_t vec1, sum1;
+ uint64_t vec2, sum2;
+
+ while (n > 0) {
+ sum1 = 0;
+ vec1 = a[n];
+ sum2 = 0;
+ vec2 = b[n];
+
+ for (i = 0; i < 64; i++) {
+ uint64_t tmp = mat[i];
+ uint64_t vec1_i = (vec1 >> i);
+ uint64_t vec2_i = (vec2 >> i);
+ sum1 ^= (vec1_i & 1) ? tmp : 0;
+ if (vec2_i&1) sum2 ^= tmp;
+ }
+ *ans++ ^= sum1; n--;
+ *ans++ ^= sum2; n--;
+ }
+}
+
+void ior_bit_arr_nolcd (uint64_t *__restrict mat, uint64_t* a,uint64_t* b, uint64_t *__restrict ans,
+ int64_t n)
+{
+ int64_t i;
+ uint64_t vec1, sum1;
+ uint64_t vec2, sum2;
+
+ while (n > 0) {
+ sum1 = 0;
+ vec1 = a[n];
+ sum2 = 0;
+ vec2 = b[n];
+
+ for (i = 0; i < 64; i++) {
+ uint64_t tmp = mat[i];
+ uint64_t vec1_i = (vec1 >> i);
+ uint64_t vec2_i = (vec2 >> i);
+ sum1 |= (vec1_i & 1) ? tmp : 0;
+ if (vec2_i&1) sum2 |= tmp;
+ }
+ *ans++ |= sum1; n--;
+ *ans++ |= sum2; n--;
+ }
+}
+
+void and_bit_arr_nolcd (uint64_t *__restrict mat, uint64_t* a,uint64_t* b, uint64_t *__restrict ans,
+ int64_t n)
+{
+ int64_t i;
+ uint64_t vec1, sum1;
+ uint64_t vec2, sum2;
+
+ while (n > 0) {
+ sum1 = -1;
+ vec1 = a[n];
+ sum2 = 0;
+ vec2 = b[n];
+
+ for (i = 0; i < 64; i++) {
+ uint64_t tmp = mat[i];
+ uint64_t vec1_i = (vec1 >> i);
+ uint64_t vec2_i = (vec2 >> i);
+ sum1 &= (vec1_i & 1) ? tmp : -1;
+ if (vec2_i&1) sum2 &= tmp;
+ }
+ *ans++ &= sum1; n--;
+ *ans++ &= sum2; n--;
+ }
+}
#include "tree-ssa-sccvn.h"
#include "tree-cfgcleanup.h"
#include "tree-ssa-dse.h"
+#include "tree-vectorizer.h"
/* Only handle PHIs with no more arguments unless we are asked to by
simd pragma. */
reduction_op = gimple_assign_rhs_code (stmt);
}
- if (reduction_op != PLUS_EXPR && reduction_op != MINUS_EXPR)
+ if (reduction_op != PLUS_EXPR
+ && reduction_op != MINUS_EXPR
+ && reduction_op != BIT_IOR_EXPR
+ && reduction_op != BIT_XOR_EXPR
+ && reduction_op != BIT_AND_EXPR)
return false;
r_op1 = gimple_assign_rhs1 (stmt);
r_op2 = gimple_assign_rhs2 (stmt);
/* Make R_OP1 to hold reduction variable. */
if (r_nop2 == PHI_RESULT (header_phi)
- && reduction_op == PLUS_EXPR)
+ && commutative_tree_code (reduction_op))
{
std::swap (r_op1, r_op2);
std::swap (r_nop1, r_nop2);
tree rhs1 = gimple_assign_rhs1 (reduc);
tree tmp = make_temp_ssa_name (TREE_TYPE (rhs1), NULL, "_ifc_");
tree c;
- tree zero = build_zero_cst (TREE_TYPE (rhs1));
+ enum tree_code reduction_op = gimple_assign_rhs_code (reduc);
+ tree op_nochange = neutral_op_for_reduction (TREE_TYPE (rhs1), reduction_op, NULL);
gimple_seq stmts = NULL;
if (dump_file && (dump_flags & TDF_DETAILS))
of reduction rhs. */
c = fold_build_cond_expr (TREE_TYPE (rhs1),
unshare_expr (cond),
- swap ? zero : op1,
- swap ? op1 : zero);
+ swap ? op_nochange : op1,
+ swap ? op1 : op_nochange);
/* Create assignment stmt and insert it at GSI. */
new_assign = gimple_build_assign (tmp, c);
gsi_insert_before (gsi, new_assign, GSI_SAME_STMT);
- /* Build rhs for unconditional increment/decrement. */
- rhs = gimple_build (&stmts, gimple_assign_rhs_code (reduc),
+ /* Build rhs for unconditional increment/decrement/logic_operation. */
+ rhs = gimple_build (&stmts, reduction_op,
TREE_TYPE (rhs1), op0, tmp);
if (has_nop)