DONE;
})
+;; -------------------------------------------------------------------------
+;; ---- [INT] Mask reductions
+;; -------------------------------------------------------------------------
+
+(define_expand "reduc_sbool_and_scal_<mode>"
+ [(match_operand:QI 0 "register_operand")
+ (match_operand:VB_VLS 1 "register_operand")]
+ "TARGET_VECTOR"
+{
+ riscv_vector::expand_mask_reduction (operands, AND);
+ DONE;
+})
+
+(define_expand "reduc_sbool_ior_scal_<mode>"
+ [(match_operand:QI 0 "register_operand")
+ (match_operand:VB_VLS 1 "register_operand")]
+ "TARGET_VECTOR"
+{
+ riscv_vector::expand_mask_reduction (operands, IOR);
+ DONE;
+})
+
+(define_expand "reduc_sbool_xor_scal_<mode>"
+ [(match_operand:QI 0 "register_operand")
+ (match_operand:VB_VLS 1 "register_operand")]
+ "TARGET_VECTOR"
+{
+ riscv_vector::expand_mask_reduction (operands, XOR);
+ DONE;
+})
+
;; -------------------------------------------------------------------------
;; ---- [FP] Tree reductions
;; -------------------------------------------------------------------------
void expand_cond_len_unop (unsigned, rtx *);
void expand_cond_len_binop (unsigned, rtx *);
void expand_reduction (unsigned, unsigned, unsigned, rtx *, rtx);
+void expand_mask_reduction (rtx *, rtx_code);
void expand_vec_ceil (rtx, rtx, machine_mode, machine_mode);
void expand_vec_floor (rtx, rtx, machine_mode, machine_mode);
void expand_vec_nearbyint (rtx, rtx, machine_mode, machine_mode);
emit_insn (gen_pred_extract_first (m1_mode, scalar_dest, m1_tmp2));
}
+/* Expand mask reductions. OPS are {dest, src} where DEST's mode
+ is QImode and SRC's mode is a mask mode.
+ CODE is one of AND, IOR, XOR. */
+
+void
+expand_mask_reduction (rtx *ops, rtx_code code)
+{
+ machine_mode mode = GET_MODE (ops[1]);
+ rtx dest = ops[0];
+ gcc_assert (GET_MODE (dest) == QImode);
+
+ rtx tmp = gen_reg_rtx (Xmode);
+ rtx cpop_ops[] = {tmp, ops[1]};
+ emit_vlmax_insn (code_for_pred_popcount (mode, Xmode), CPOP_OP, cpop_ops);
+
+ bool eq_zero = false;
+
+ /* AND reduction is popcount (mask) == len,
+ IOR reduction is popcount (mask) != 0,
+ XOR reduction is popcount (mask) & 1 != 0. */
+ if (code == AND)
+ {
+ rtx len = gen_int_mode (GET_MODE_NUNITS (mode), HImode);
+ tmp = expand_binop (Xmode, sub_optab, tmp, len, NULL, true,
+ OPTAB_DIRECT);
+ eq_zero = true;
+ }
+ else if (code == IOR)
+ ;
+ else if (code == XOR)
+ tmp = expand_binop (Xmode, and_optab, tmp, GEN_INT (1), NULL, true,
+ OPTAB_DIRECT);
+ else
+ gcc_unreachable ();
+
+ rtx els = gen_label_rtx ();
+ rtx end = gen_label_rtx ();
+
+ riscv_expand_conditional_branch (els, eq_zero ? EQ : NE, tmp, const0_rtx);
+ emit_move_insn (dest, const0_rtx);
+ emit_jump_insn (gen_jump (end));
+ emit_barrier ();
+
+ emit_label (els);
+ emit_move_insn (dest, const1_rtx);
+ emit_label (end);
+}
+
/* Prepare ops for ternary operations.
It can be called before or after RA. */
void
--- /dev/null
+/* { dg-do run } */
+/* { dg-require-effective-target riscv_v_ok } */
+
+char p[128];
+
+bool __attribute__((noipa))
+fand (int n)
+{
+ bool r = true;
+ for (int i = 0; i < n; ++i)
+ r &= (p[i] != 0);
+ return r;
+}
+
+bool __attribute__((noipa))
+fior (int n)
+{
+ bool r = false;
+ for (int i = 0; i < n; ++i)
+ r |= (p[i] != 0);
+ return r;
+}
+
+int main()
+{
+ __builtin_memset (p, 1, sizeof(p));
+
+ for (int n = 0; n < 77; ++n)
+ if (!fand (n))
+ __builtin_abort ();
+
+ p[0] = 0;
+ for (int n = 1; n < 77; ++n)
+ if (fand (n))
+ __builtin_abort ();
+
+ __builtin_memset (p, 0, sizeof(p));
+
+ for (int n = 0; n < 77; ++n)
+ if (fior (n))
+ __builtin_abort ();
+
+ p[0] = 1;
+ for (int n = 1; n < 77; ++n)
+ if (!fior (n))
+ __builtin_abort ();
+
+ return 0;
+}
--- /dev/null
+/* { dg-do compile } */
+/* { dg-additional-options "-march=rv64gcv -mabi=lp64d -fdump-tree-vect-details" } */
+
+char p[128];
+
+bool __attribute__((noipa))
+fand (int n)
+{
+ bool r = true;
+ for (int i = 0; i < 16; ++i)
+ r &= (p[i] != 0);
+ return r;
+}
+
+bool __attribute__((noipa))
+fior (int n)
+{
+ bool r = false;
+ for (int i = 0; i < 16; ++i)
+ r |= (p[i] != 0);
+ return r;
+}
+
+/* { dg-final { scan-tree-dump-times "optimized: loop vectorized" 2 "vect" } } */
+/* { dg-final { scan-assembler-times {vcpop\.m\s+[atx][0-9]+,\s*v[0-9]+} 2 } } */
--- /dev/null
+/* { dg-do run } */
+/* { dg-require-effective-target riscv_v_ok } */
+
+short p[128];
+
+bool __attribute__((noipa))
+fand (int n)
+{
+ bool r = true;
+ for (int i = 0; i < n; ++i)
+ r &= (p[i] != 0);
+ return r;
+}
+
+bool __attribute__((noipa))
+fior (int n)
+{
+ bool r = false;
+ for (int i = 0; i < n; ++i)
+ r |= (p[i] != 0);
+ return r;
+}
+
+int main()
+{
+ __builtin_memset (p, 1, sizeof(p));
+
+ for (int n = 0; n < 77; ++n)
+ if (!fand (n))
+ __builtin_abort ();
+
+ p[0] = 0;
+ for (int n = 1; n < 77; ++n)
+ if (fand (n))
+ __builtin_abort ();
+
+ __builtin_memset (p, 0, sizeof(p));
+
+ for (int n = 0; n < 77; ++n)
+ if (fior (n))
+ __builtin_abort ();
+
+ p[0] = 1;
+ for (int n = 1; n < 77; ++n)
+ if (!fior (n))
+ __builtin_abort ();
+
+ return 0;
+}
--- /dev/null
+/* { dg-do compile } */
+/* { dg-additional-options "-march=rv64gcv -mabi=lp64d -fdump-tree-vect-details" } */
+
+short p[128];
+
+bool __attribute__((noipa))
+fand ()
+{
+ bool r = true;
+ for (int i = 0; i < 16; ++i)
+ r &= (p[i] != 0);
+ return r;
+}
+
+bool __attribute__((noipa))
+fior ()
+{
+ bool r = false;
+ for (int i = 0; i < 16; ++i)
+ r |= (p[i] != 0);
+ return r;
+}
+
+/* { dg-final { scan-tree-dump-times "optimized: loop vectorized" 2 "vect" } } */
+/* { dg-final { scan-assembler-times {vcpop\.m\s+[atx][0-9]+,\s*v[0-9]+} 2 } } */
--- /dev/null
+/* { dg-do run } */
+/* { dg-require-effective-target riscv_v_ok } */
+
+int p[128];
+
+bool __attribute__((noipa))
+fand (int n)
+{
+ bool r = true;
+ for (int i = 0; i < n; ++i)
+ r &= (p[i] != 0);
+ return r;
+}
+
+bool __attribute__((noipa))
+fior (int n)
+{
+ bool r = false;
+ for (int i = 0; i < n; ++i)
+ r |= (p[i] != 0);
+ return r;
+}
+
+int main()
+{
+ __builtin_memset (p, 1, sizeof(p));
+
+ for (int n = 0; n < 77; ++n)
+ if (!fand (n))
+ __builtin_abort ();
+
+ p[0] = 0;
+ for (int n = 1; n < 77; ++n)
+ if (fand (n))
+ __builtin_abort ();
+
+ __builtin_memset (p, 0, sizeof(p));
+
+ for (int n = 0; n < 77; ++n)
+ if (fior (n))
+ __builtin_abort ();
+
+ p[0] = 1;
+ for (int n = 1; n < 77; ++n)
+ if (!fior (n))
+ __builtin_abort ();
+
+ return 0;
+}
--- /dev/null
+/* { dg-do compile } */
+/* { dg-additional-options "-march=rv64gcv -mabi=lp64d -fdump-tree-vect-details" } */
+
+int p[128];
+
+bool __attribute__((noipa))
+fand ()
+{
+ bool r = true;
+ for (int i = 0; i < 16; ++i)
+ r &= (p[i] != 0);
+ return r;
+}
+
+bool __attribute__((noipa))
+fior ()
+{
+ bool r = false;
+ for (int i = 0; i < 16; ++i)
+ r |= (p[i] != 0);
+ return r;
+}
+
+/* { dg-final { scan-tree-dump-times "optimized: loop vectorized" 2 "vect" } } */
+/* { dg-final { scan-assembler-times {vcpop\.m\s+[atx][0-9]+,\s*v[0-9]+} 2 } } */
--- /dev/null
+/* { dg-do run } */
+/* { dg-require-effective-target riscv_v_ok } */
+
+long long p[128];
+
+bool __attribute__((noipa))
+fand (int n)
+{
+ bool r = true;
+ for (int i = 0; i < n; ++i)
+ r &= (p[i] != 0);
+ return r;
+}
+
+bool __attribute__((noipa))
+fior (int n)
+{
+ bool r = false;
+ for (int i = 0; i < n; ++i)
+ r |= (p[i] != 0);
+ return r;
+}
+
+int main()
+{
+ __builtin_memset (p, 1, sizeof(p));
+
+ for (int n = 0; n < 77; ++n)
+ if (!fand (n))
+ __builtin_abort ();
+
+ p[0] = 0;
+ for (int n = 1; n < 77; ++n)
+ if (fand (n))
+ __builtin_abort ();
+
+ __builtin_memset (p, 0, sizeof(p));
+
+ for (int n = 0; n < 77; ++n)
+ if (fior (n))
+ __builtin_abort ();
+
+ p[0] = 1;
+ for (int n = 1; n < 77; ++n)
+ if (!fior (n))
+ __builtin_abort ();
+
+ return 0;
+}
--- /dev/null
+/* { dg-do compile } */
+/* { dg-additional-options "-march=rv64gcv -mabi=lp64d -fdump-tree-vect-details" } */
+
+long long p[128];
+
+bool __attribute__((noipa))
+fand ()
+{
+ bool r = true;
+ for (int i = 0; i < 16; ++i)
+ r &= (p[i] != 0);
+ return r;
+}
+
+bool __attribute__((noipa))
+fior ()
+{
+ bool r = false;
+ for (int i = 0; i < 16; ++i)
+ r |= (p[i] != 0);
+ return r;
+}
+
+/* { dg-final { scan-tree-dump-times "optimized: loop vectorized" 2 "vect" } } */
+/* { dg-final { scan-assembler-times {vcpop\.m\s+[atx][0-9]+,\s*v[0-9]+} 2 } } */
--- /dev/null
+/* { dg-do run } */
+/* { dg-require-effective-target riscv_v_ok } */
+
+char p[128];
+
+bool __attribute__((noipa))
+fxort (int n)
+{
+ bool r = true;
+ for (int i = 0; i < n; ++i)
+ r ^= (p[i] != 0);
+ return r;
+}
+
+bool __attribute__((noipa))
+fxorf (int n)
+{
+ bool r = false;
+ for (int i = 0; i < n; ++i)
+ r ^= (p[i] != 0);
+ return r;
+}
+
+int main()
+{
+ __builtin_memset (p, 1, sizeof(p));
+
+ for (int n = 0; n < 77; ++n)
+ if (fxort (n) != !(n & 1))
+ __builtin_abort ();
+
+ for (int n = 0; n < 77; ++n)
+ if (fxorf (n) != (n & 1))
+ __builtin_abort ();
+
+ __builtin_memset (p, 0, sizeof(p));
+
+ for (int n = 0; n < 77; ++n)
+ if (!fxort (n))
+ __builtin_abort ();
+
+ for (int n = 0; n < 77; ++n)
+ if (fxorf (n))
+ __builtin_abort ();
+
+ return 0;
+}
--- /dev/null
+/* { dg-do compile } */
+/* { dg-additional-options "-march=rv64gcv -mabi=lp64d -fdump-tree-vect-details" } */
+
+char p[128];
+
+bool __attribute__((noipa))
+fxort ()
+{
+ bool r = true;
+ for (int i = 0; i < 16; ++i)
+ r ^= (p[i] != 0);
+ return r;
+}
+
+bool __attribute__((noipa))
+fxorf ()
+{
+ bool r = false;
+ for (int i = 0; i < 16; ++i)
+ r ^= (p[i] != 0);
+ return r;
+}
+
+/* { dg-final { scan-tree-dump-times "optimized: loop vectorized" 2 "vect" } } */
+/* { dg-final { scan-assembler-times {vcpop\.m\s+[atx][0-9]+,\s*v[0-9]+} 2 } } */
--- /dev/null
+/* { dg-do run } */
+/* { dg-require-effective-target riscv_v_ok } */
+
+short p[128];
+
+bool __attribute__((noipa))
+fxort (int n)
+{
+ bool r = true;
+ for (int i = 0; i < n; ++i)
+ r ^= (p[i] != 0);
+ return r;
+}
+
+bool __attribute__((noipa))
+fxorf (int n)
+{
+ bool r = false;
+ for (int i = 0; i < n; ++i)
+ r ^= (p[i] != 0);
+ return r;
+}
+
+int main()
+{
+ __builtin_memset (p, 1, sizeof(p));
+
+ for (int n = 0; n < 77; ++n)
+ if (fxort (n) != !(n & 1))
+ __builtin_abort ();
+
+ for (int n = 0; n < 77; ++n)
+ if (fxorf (n) != (n & 1))
+ __builtin_abort ();
+
+ __builtin_memset (p, 0, sizeof(p));
+
+ for (int n = 0; n < 77; ++n)
+ if (!fxort (n))
+ __builtin_abort ();
+
+ for (int n = 0; n < 77; ++n)
+ if (fxorf (n))
+ __builtin_abort ();
+
+ return 0;
+}
--- /dev/null
+/* { dg-do compile } */
+/* { dg-additional-options "-march=rv64gcv -mabi=lp64d -fdump-tree-vect-details" } */
+
+short p[128];
+
+bool __attribute__((noipa))
+fxort ()
+{
+ bool r = true;
+ for (int i = 0; i < 16; ++i)
+ r ^= (p[i] != 0);
+ return r;
+}
+
+bool __attribute__((noipa))
+fxorf ()
+{
+ bool r = false;
+ for (int i = 0; i < 16; ++i)
+ r ^= (p[i] != 0);
+ return r;
+}
+
+/* { dg-final { scan-tree-dump-times "optimized: loop vectorized" 2 "vect" } } */
+/* { dg-final { scan-assembler-times {vcpop\.m\s+[atx][0-9]+,\s*v[0-9]+} 2 } } */
--- /dev/null
+/* { dg-do run } */
+/* { dg-require-effective-target riscv_v_ok } */
+
+int p[128];
+
+bool __attribute__((noipa))
+fxort (int n)
+{
+ bool r = true;
+ for (int i = 0; i < n; ++i)
+ r ^= (p[i] != 0);
+ return r;
+}
+
+bool __attribute__((noipa))
+fxorf (int n)
+{
+ bool r = false;
+ for (int i = 0; i < n; ++i)
+ r ^= (p[i] != 0);
+ return r;
+}
+
+int main()
+{
+ __builtin_memset (p, 1, sizeof(p));
+
+ for (int n = 0; n < 77; ++n)
+ if (fxort (n) != !(n & 1))
+ __builtin_abort ();
+
+ for (int n = 0; n < 77; ++n)
+ if (fxorf (n) != (n & 1))
+ __builtin_abort ();
+
+ __builtin_memset (p, 0, sizeof(p));
+
+ for (int n = 0; n < 77; ++n)
+ if (!fxort (n))
+ __builtin_abort ();
+
+ for (int n = 0; n < 77; ++n)
+ if (fxorf (n))
+ __builtin_abort ();
+
+ return 0;
+}
--- /dev/null
+/* { dg-do compile } */
+/* { dg-additional-options "-march=rv64gcv -mabi=lp64d -fdump-tree-vect-details" } */
+
+int p[128];
+
+bool __attribute__((noipa))
+fxort ()
+{
+ bool r = true;
+ for (int i = 0; i < 16; ++i)
+ r ^= (p[i] != 0);
+ return r;
+}
+
+bool __attribute__((noipa))
+fxorf ()
+{
+ bool r = false;
+ for (int i = 0; i < 16; ++i)
+ r ^= (p[i] != 0);
+ return r;
+}
+
+/* { dg-final { scan-tree-dump-times "optimized: loop vectorized" 2 "vect" } } */
+/* { dg-final { scan-assembler-times {vcpop\.m\s+[atx][0-9]+,\s*v[0-9]+} 2 } } */
--- /dev/null
+/* { dg-do run } */
+/* { dg-require-effective-target riscv_v_ok } */
+
+long long p[128];
+
+bool __attribute__((noipa))
+fxort (int n)
+{
+ bool r = true;
+ for (int i = 0; i < n; ++i)
+ r ^= (p[i] != 0);
+ return r;
+}
+
+bool __attribute__((noipa))
+fxorf (int n)
+{
+ bool r = false;
+ for (int i = 0; i < n; ++i)
+ r ^= (p[i] != 0);
+ return r;
+}
+
+int main()
+{
+ __builtin_memset (p, 1, sizeof(p));
+
+ for (int n = 0; n < 77; ++n)
+ if (fxort (n) != !(n & 1))
+ __builtin_abort ();
+
+ for (int n = 0; n < 77; ++n)
+ if (fxorf (n) != (n & 1))
+ __builtin_abort ();
+
+ __builtin_memset (p, 0, sizeof(p));
+
+ for (int n = 0; n < 77; ++n)
+ if (!fxort (n))
+ __builtin_abort ();
+
+ for (int n = 0; n < 77; ++n)
+ if (fxorf (n))
+ __builtin_abort ();
+
+ return 0;
+}
--- /dev/null
+/* { dg-do compile } */
+/* { dg-additional-options "-march=rv64gcv -mabi=lp64d -fdump-tree-vect-details" } */
+
+long long p[128];
+
+bool __attribute__((noipa))
+fxort ()
+{
+ bool r = true;
+ for (int i = 0; i < 16; ++i)
+ r ^= (p[i] != 0);
+ return r;
+}
+
+bool __attribute__((noipa))
+fxorf ()
+{
+ bool r = false;
+ for (int i = 0; i < 16; ++i)
+ r ^= (p[i] != 0);
+ return r;
+}
+
+/* { dg-final { scan-tree-dump-times "optimized: loop vectorized" 2 "vect" } } */
+/* { dg-final { scan-assembler-times {vcpop\.m\s+[atx][0-9]+,\s*v[0-9]+} 2 } } */