MIPS: Support vector reduc for MSA

author YunQiang Su <syq@gcc.gnu.org>

Mon, 26 Aug 2024 00:45:36 +0000 (08:45 +0800)

committer YunQiang Su <yunqiang@isrc.iscas.ac.cn>

Tue, 3 Sep 2024 00:13:15 +0000 (08:13 +0800)
author YunQiang Su <syq@gcc.gnu.org>
Mon, 26 Aug 2024 00:45:36 +0000 (08:45 +0800)
committer YunQiang Su <yunqiang@isrc.iscas.ac.cn>
Tue, 3 Sep 2024 00:13:15 +0000 (08:13 +0800)
diff --git a/gcc/config/mips/mips-msa.md b/gcc/config/mips/mips-msa.md

index 377c63f0d357a458ce93153b1eb5a00092cedd75..976f296402ee190fa5fa9b28a22226e2916b36bf 100644 (file)
--- a/gcc/config/mips/mips-msa.md
+++ b/gcc/config/mips/mips-msa.md
@@ -125,6 +125,9 @@
  ;; Only floating-point modes.
  (define_mode_iterator FMSA     [V2DF V4SF])
  
+;; Only used for reduce_plus_scal: V4SI, V8HI, V16QI have HADD.
+(define_mode_iterator MSA_NO_HADD [V2DF V4SF V2DI])
+
  ;; The attribute gives the integer vector mode with same size.
  (define_mode_attr VIMODE
    [(V2DF "V2DI")
@@ -2802,3 +2805,128 @@
    (set_attr "mode" "TI")
    (set_attr "compact_form" "never")
    (set_attr "branch_likely" "no")])
+
+
+;; Vector reduction operation
+(define_expand "reduc_smin_scal_<mode>"
+  [(match_operand:<UNITMODE> 0 "register_operand")
+   (match_operand:MSA 1 "register_operand")]
+  "ISA_HAS_MSA"
+{
+  rtx tmp = gen_reg_rtx (<MODE>mode);
+  mips_expand_msa_reduc (gen_smin<mode>3, tmp, operands[1]);
+  emit_insn (gen_vec_extract<mode><unitmode> (operands[0], tmp,
+                                             const0_rtx));
+  DONE;
+})
+
+(define_expand "reduc_smax_scal_<mode>"
+  [(match_operand:<UNITMODE> 0 "register_operand")
+   (match_operand:MSA 1 "register_operand")]
+  "ISA_HAS_MSA"
+{
+  rtx tmp = gen_reg_rtx (<MODE>mode);
+  mips_expand_msa_reduc (gen_smax<mode>3, tmp, operands[1]);
+  emit_insn (gen_vec_extract<mode><unitmode> (operands[0], tmp,
+                                             const0_rtx));
+  DONE;
+})
+
+(define_expand "reduc_umin_scal_<mode>"
+  [(match_operand:<UNITMODE> 0 "register_operand")
+   (match_operand:IMSA 1 "register_operand")]
+  "ISA_HAS_MSA"
+{
+  rtx tmp = gen_reg_rtx (<MODE>mode);
+  mips_expand_msa_reduc (gen_umin<mode>3, tmp, operands[1]);
+  emit_insn (gen_vec_extract<mode><unitmode> (operands[0], tmp,
+                                             const0_rtx));
+  DONE;
+})
+
+(define_expand "reduc_umax_scal_<mode>"
+  [(match_operand:<UNITMODE> 0 "register_operand")
+   (match_operand:IMSA 1 "register_operand")]
+  "ISA_HAS_MSA"
+{
+  rtx tmp = gen_reg_rtx (<MODE>mode);
+  mips_expand_msa_reduc (gen_umax<mode>3, tmp, operands[1]);
+  emit_insn (gen_vec_extract<mode><unitmode> (operands[0], tmp,
+                                             const0_rtx));
+  DONE;
+})
+
+(define_expand "reduc_plus_scal_<mode>"
+  [(match_operand:<UNITMODE> 0 "register_operand")
+   (match_operand:MSA_NO_HADD 1 "register_operand")]
+  "ISA_HAS_MSA"
+{
+  rtx tmp = gen_reg_rtx (<MODE>mode);
+  mips_expand_msa_reduc (gen_add<mode>3, tmp, operands[1]);
+  emit_insn (gen_vec_extract<mode><unitmode> (operands[0], tmp,
+                                             const0_rtx));
+  DONE;
+})
+
+(define_expand "reduc_plus_scal_v4si"
+  [(match_operand:SI 0 "register_operand")
+   (match_operand:V4SI 1 "register_operand")]
+  "ISA_HAS_MSA"
+{
+  rtx tmp = gen_reg_rtx (SImode);
+  rtx tmp1 = gen_reg_rtx (V2DImode);
+  emit_insn (gen_msa_hadd_s_d (tmp1, operands[1], operands[1]));
+  emit_insn (gen_vec_extractv4sisi (operands[0], gen_lowpart (V4SImode, tmp1),
+                                   const0_rtx));
+  emit_insn (gen_vec_extractv4sisi (tmp, gen_lowpart (V4SImode, tmp1),
+                                   GEN_INT (2)));
+  emit_insn (gen_addsi3 (operands[0], operands[0], tmp));
+  DONE;
+})
+
+(define_expand "reduc_plus_scal_v8hi"
+  [(match_operand:HI 0 "register_operand")
+   (match_operand:V8HI 1 "register_operand")]
+  "ISA_HAS_MSA"
+{
+  rtx tmp1 = gen_reg_rtx (V4SImode);
+  rtx tmp2 = gen_reg_rtx (V2DImode);
+  rtx tmp3 = gen_reg_rtx (V2DImode);
+  emit_insn (gen_msa_hadd_s_w (tmp1, operands[1], operands[1]));
+  emit_insn (gen_msa_hadd_s_d (tmp2, tmp1, tmp1));
+  mips_expand_msa_reduc (gen_addv2di3, tmp3, tmp2);
+  emit_insn (gen_vec_extractv8hihi (operands[0], gen_lowpart (V8HImode, tmp3),
+                                   const0_rtx));
+  DONE;
+})
+
+(define_expand "reduc_plus_scal_v16qi"
+  [(match_operand:QI 0 "register_operand")
+   (match_operand:V16QI 1 "register_operand")]
+  "ISA_HAS_MSA"
+{
+  rtx tmp1 = gen_reg_rtx (V8HImode);
+  rtx tmp2 = gen_reg_rtx (V4SImode);
+  rtx tmp3 = gen_reg_rtx (V2DImode);
+  rtx tmp4 = gen_reg_rtx (V2DImode);
+  emit_insn (gen_msa_hadd_s_h (tmp1, operands[1], operands[1]));
+  emit_insn (gen_msa_hadd_s_w (tmp2, tmp1, tmp1));
+  emit_insn (gen_msa_hadd_s_d (tmp3, tmp2, tmp2));
+  mips_expand_msa_reduc (gen_addv2di3, tmp4, tmp3);
+  emit_insn (gen_vec_extractv16qiqi (operands[0], gen_lowpart (V16QImode, tmp4),
+                                   const0_rtx));
+  DONE;
+})
+
+(define_expand "reduc_<optab>_scal_<mode>"
+  [(any_bitwise:<UNITMODE>
+      (match_operand:<UNITMODE> 0 "register_operand")
+      (match_operand:IMSA 1 "register_operand"))]
+  "ISA_HAS_MSA"
+{
+  rtx tmp = gen_reg_rtx (<MODE>mode);
+  mips_expand_msa_reduc (gen_<optab><mode>3, tmp, operands[1]);
+  emit_insn (gen_vec_extract<mode><unitmode> (operands[0], tmp,
+                                             const0_rtx));
+  DONE;
+})
diff --git a/gcc/config/mips/mips-protos.h b/gcc/config/mips/mips-protos.h

index 90b4c87fdea15618ba8efec575fd084547281f33..96e084e6e6411d99e0d1fa5942544081e568e351 100644 (file)
--- a/gcc/config/mips/mips-protos.h
+++ b/gcc/config/mips/mips-protos.h
@@ -352,6 +352,7 @@ extern void mips_expand_atomic_qihi (union mips_gen_fn_ptrs,
  extern void mips_expand_vector_init (rtx, rtx);
  extern void mips_expand_vec_unpack (rtx op[2], bool, bool);
  extern void mips_expand_vec_reduc (rtx, rtx, rtx (*)(rtx, rtx, rtx));
+extern void mips_expand_msa_reduc (rtx (*)(rtx, rtx, rtx), rtx, rtx);
  extern void mips_expand_vec_minmax (rtx, rtx, rtx,
                                     rtx (*) (rtx, rtx, rtx), bool);
  
diff --git a/gcc/config/mips/mips.cc b/gcc/config/mips/mips.cc

index 6c797b6216435597ae9aa45b106dac209e6afbff..173f792bf55a3bedb6ec0895aa5c1ebcb4e85780 100644 (file)
--- a/gcc/config/mips/mips.cc
+++ b/gcc/config/mips/mips.cc
@@ -22239,6 +22239,47 @@ mips_vectorize_vec_perm_const (machine_mode vmode, machine_mode op_mode,
    return ok;
  }
  
+/* Expand a vector reduction.  FN is the binary pattern to reduce;
+   DEST is the destination; IN is the input vector.  */
+
+void
+mips_expand_msa_reduc (rtx (*fn) (rtx, rtx, rtx), rtx dest, rtx in)
+{
+  rtx swap, vec = in;
+  machine_mode mode = GET_MODE (in);
+  unsigned int i, gelt;
+  const unsigned nelt = GET_MODE_BITSIZE (mode) / GET_MODE_UNIT_BITSIZE (mode);
+  unsigned char perm[MAX_VECT_LEN];
+
+  /* We have no SHF.d.  */
+  if (nelt == 2)
+    {
+      perm[0] = 2;
+      perm[1] = 3;
+      perm[2] = 0;
+      perm[3] = 1;
+      rtx rsi = simplify_gen_subreg (V4SImode, in, mode, 0);
+      swap = gen_reg_rtx (V4SImode);
+      mips_expand_vselect (swap, rsi, perm, 4);
+      emit_move_insn (dest, gen_rtx_SUBREG (mode, swap, 0));
+      emit_insn (fn (dest, dest, vec));
+      return;
+    }
+
+  for (gelt=1; gelt<=nelt/2; gelt *= 2)
+    {
+      for (i = 0; i<nelt; i++)
+       perm[i] = ((i/gelt)%2) ? (i-gelt) : (i+gelt);
+      if (gelt == nelt/2)
+       swap = dest;
+      else
+       swap = gen_reg_rtx (mode);
+      mips_expand_vselect (swap, vec, perm, nelt);
+      emit_insn (fn (swap, swap, vec));
+      vec = swap;
+    }
+}
+
  /* Implement TARGET_SCHED_REASSOCIATION_WIDTH.  */
  
  static int
diff --git a/gcc/config/mips/mips.md b/gcc/config/mips/mips.md

index 737d2566ec809320377699e0c81d27c652abf1f2..f147667d63a8cd3657245d86c434f1a6f6d6ebdd 100644 (file)
--- a/gcc/config/mips/mips.md
+++ b/gcc/config/mips/mips.md
@@ -993,6 +993,10 @@
  ;; from the same template.
  (define_code_iterator any_shift [ashift ashiftrt lshiftrt])
  
+;; This code iterator allows the three bitwise instructions to be generated
+;; from the same template.
+(define_code_iterator any_bitwise [and ior xor])
+
  ;; This code iterator allows unsigned and signed division to be generated
  ;; from the same template.
  (define_code_iterator any_div [div udiv])
diff --git a/gcc/testsuite/gcc.target/mips/msa-reduc.c b/gcc/testsuite/gcc.target/mips/msa-reduc.c

new file mode 100644 (file)

index 0000000..27cc28e
--- /dev/null
+++ b/gcc/testsuite/gcc.target/mips/msa-reduc.c
@@ -0,0 +1,119 @@
+/* Test reduc support for MIPS MSA ASE with shf.FMT */
+/* { dg-do compile } */
+/* { dg-skip-if "auto-vectorization test" { *-*-* } { "-O0" "-O1" "-O2" "-Os"} { "-O3" } } */
+/* { dg-options "-mfp64 -mhard-float -mmsa -ffast-math" } */
+
+/* { dg-final { scan-assembler-times "min_int8_t:.*shf\\.b.*0xb1.*min_s\\.b.*shf\\.h.*0xb1.*min_s\\.b.*shf\\.w.*0xb1.*min_s\\.b.*shf\\.w.*0x4e.*min_s\\.b.*copy_s\\.b.*min_int8_t" 1 } } */
+/* { dg-final { scan-assembler-times "max_int8_t:.*shf\\.b.*0xb1.*max_s\\.b.*shf\\.h.*0xb1.*max_s\\.b.*shf\\.w.*0xb1.*max_s\\.b.*shf\\.w.*0x4e.*max_s\\.b.*copy_s\\.b.*max_int8_t" 1 } } */
+/* { dg-final { scan-assembler-times "min_uint8_t:.*shf\\.b.*0xb1.*min_u\\.b.*shf\\.h.*0xb1.*min_u\\.b.*shf\\.w.*0xb1.*min_u\\.b.*shf\\.w.*0x4e.*min_u\\.b.*copy_.\\.b.*min_uint8_t" 1 } } */
+/* { dg-final { scan-assembler-times "max_uint8_t:.*shf\\.b.*0xb1.*max_u\\.b.*shf\\.h.*0xb1.*max_u\\.b.*shf\\.w.*0xb1.*max_u\\.b.*shf\\.w.*0x4e.*max_u\\.b.*copy_.\\.b.*max_uint8_t" 1 } } */
+/* { dg-final { scan-assembler-times "min_int16_t:.*shf\\.h.*0xb1.*min_s\\.h.*shf\\.w.*0xb1.*min_s\\.h.*shf\\.w.*0x4e.*min_s\\.h.*copy_s\\.h.*min_int16_t" 1 } } */
+/* { dg-final { scan-assembler-times "max_int16_t:.*shf\\.h.*0xb1.*max_s\\.h.*shf\\.w.*0xb1.*max_s\\.h.*shf\\.w.*0x4e.*max_s\\.h.*copy_s\\.h.*max_int16_t" 1 } } */
+/* { dg-final { scan-assembler-times "min_uint16_t:.*shf\\.h.*0xb1.*min_u\\.h.*shf\\.w.*0xb1.*min_u\\.h.*shf\\.w.*0x4e.*min_u\\.h.*copy_.\\.h.*min_uint16_t" 1 } } */
+/* { dg-final { scan-assembler-times "max_uint16_t:.*shf\\.h.*0xb1.*max_u\\.h.*shf\\.w.*0xb1.*max_u\\.h.*shf\\.w.*0x4e.*max_u\\.h.*copy_.\\.h.*max_uint16_t" 1 } } */
+/* { dg-final { scan-assembler-times "min_int32_t:.*shf\\.w.*0xb1.*min_s\\.w.*shf\\.w.*0x4e.*min_s\\.w.*copy_s\\.w.*min_int32_t" 1 } } */
+/* { dg-final { scan-assembler-times "max_int32_t:.*shf\\.w.*0xb1.*max_s\\.w.*shf\\.w.*0x4e.*max_s\\.w.*copy_s\\.w.*max_int32_t" 1 } } */
+/* { dg-final { scan-assembler-times "min_uint32_t:.*shf\\.w.*0xb1.*min_u\\.w.*shf\\.w.*0x4e.*min_u\\.w.*copy_.\\.w.*min_uint32_t" 1 } } */
+/* { dg-final { scan-assembler-times "max_uint32_t:.*shf\\.w.*0xb1.*max_u\\.w.*shf\\.w.*0x4e.*max_u\\.w.*copy_.\\.w.*max_uint32_t" 1 } } */
+/* { dg-final { scan-assembler-times "min_int64_t:.*shf\\.w.*0x4e.*min_s\\.d.*copy_s\\.?.*min_int64_t" 1 } } */
+/* { dg-final { scan-assembler-times "max_int64_t:.*shf\\.w.*0x4e.*max_s\\.d.*copy_s\\.?.*max_int64_t" 1 } } */
+/* { dg-final { scan-assembler-times "min_uint64_t:.*shf\\.w.*0x4e.*min_u\\.d.*copy_.\\.?.*min_uint64_t" 1 } } */
+/* { dg-final { scan-assembler-times "max_uint64_t:.*shf\\.w.*0x4e.*max_u\\.d.*copy_.\\.?.*max_uint64_t" 1 } } */
+/* { dg-final { scan-assembler-times "min_float:.*shf\\.w.*0xb1.*fmin\\.w.*shf\\.w.*0x4e.*fmin\\.w.*min_float" 1 } } */
+/* { dg-final { scan-assembler-times "max_float:.*shf\\.w.*0xb1.*fmax\\.w.*shf\\.w.*0x4e.*fmax\\.w.*max_float" 1 } } */
+/* { dg-final { scan-assembler-times "min_double:.*shf\\.w.*0x4e.*fmin\\.d.*min_double" 1 } } */
+/* { dg-final { scan-assembler-times "max_double:.*shf\\.w.*0x4e.*fmax\\.d.*max_double" 1 } } */
+
+/* { dg-final { scan-assembler-times "plus_int8_t:.*hadd_s\\.h.*hadd_s\\.w.*hadd_s\\.d.*shf\\.w.*0x4e.*addv\\.d.*copy_s\\.b.*plus_int8_t" 1 } } */
+/* { dg-final { scan-assembler-times "plus_uint8_t:.*hadd_s\\.h.*hadd_s\\.w.*hadd_s\\.d.*shf\\.w.*0x4e.*addv\\.d.*copy_.\\.b.*plus_uint8_t" 1 } } */
+/* { dg-final { scan-assembler-times "plus_int16_t:.*hadd_s\\.w.*hadd_s\\.d.*shf\\.w.*0x4e.*addv\\.d.*copy_s\\.h.*plus_int16_t" 1 } } */
+/* { dg-final { scan-assembler-times "plus_uint16_t:.*hadd_s\\.w.*hadd_s\\.d.*shf\\.w.*0x4e.*addv\\.d.*copy_.\\.h.*plus_uint16_t" 1 } } */
+/* { dg-final { scan-assembler-times "plus_int32_t:.*hadd_s\\.d.*copy_s\\.w.*copy_s\\.w.*addu.*plus_int32_t" 1 } } */
+/* { dg-final { scan-assembler-times "plus_uint32_t:.*hadd_s\\.d.*copy_s\\.w.*copy_s\\.w.*addu.*plus_uint32_t" 1 } } */
+/* { dg-final { scan-assembler-times "plus_int64_t:.*shf\\.w.*0x4e.*addv\\.d.*copy_s\\.?.*plus_int64_t" 1 } } */
+/* { dg-final { scan-assembler-times "plus_uint64_t:.*shf\\.w.*0x4e.*addv\\.d.*copy_.\\.?.*plus_uint64_t" 1 } } */
+/* { dg-final { scan-assembler-times "plus_float:.*shf\\.w.*0xb1.*fadd\\.w.*shf\\.w.*0x4e.*fadd\\.w.*plus_float" 1 } } */
+/* { dg-final { scan-assembler-times "plus_double:.*shf\\.w.*0x4e.*fadd\\.d.*plus_double" 1 } } */
+
+/* { dg-final { scan-assembler-times "or_int8_t:.*shf\\.b.*0xb1.*or\\.v.*shf\\.h.*0xb1.*or\\.v.*shf\\.w.*0xb1.*or\\.v.*shf\\.w.*0x4e.*or\\.v.*copy_s\\.b.*or_int8_t" 1 } } */
+/* { dg-final { scan-assembler-times "xor_int8_t:.*shf\\.b.*0xb1.*xor\\.v.*shf\\.h.*0xb1.*xor\\.v.*shf\\.w.*0xb1.*xor\\.v.*shf\\.w.*0x4e.*xor\\.v.*copy_s\\.b.*xor_int8_t" 1 } } */
+/* { dg-final { scan-assembler-times "and_int8_t:.*shf\\.b.*0xb1.*and\\.v.*shf\\.h.*0xb1.*and\\.v.*shf\\.w.*0xb1.*and\\.v.*shf\\.w.*0x4e.*and\\.v.*copy_s\\.b.*and_int8_t" 1 } } */
+/* { dg-final { scan-assembler-times "or_uint8_t:.*shf\\.b.*0xb1.*or\\.v.*shf\\.h.*0xb1.*or\\.v.*shf\\.w.*0xb1.*or\\.v.*shf\\.w.*0x4e.*or\\.v.*copy_.\\.b.*or_uint8_t" 1 } } */
+/* { dg-final { scan-assembler-times "xor_uint8_t:.*shf\\.b.*0xb1.*xor\\.v.*shf\\.h.*0xb1.*xor\\.v.*shf\\.w.*0xb1.*xor\\.v.*shf\\.w.*0x4e.*xor\\.v.*copy_.\\.b.*xor_uint8_t" 1 } } */
+/* { dg-final { scan-assembler-times "and_uint8_t:.*shf\\.b.*0xb1.*and\\.v.*shf\\.h.*0xb1.*and\\.v.*shf\\.w.*0xb1.*and\\.v.*shf\\.w.*0x4e.*and\\.v.*copy_.\\.b.*and_uint8_t" 1 } } */
+/* { dg-final { scan-assembler-times "or_int16_t:.*shf\\.h.*0xb1.*or\\.v.*shf\\.w.*0xb1.*or\\.v.*shf\\.w.*0x4e.*or\\.v.*copy_s\\.h.*or_int16_t" 1 } } */
+/* { dg-final { scan-assembler-times "xor_int16_t:.*shf\\.h.*0xb1.*xor\\.v.*shf\\.w.*0xb1.*xor\\.v.*shf\\.w.*0x4e.*xor\\.v.*copy_s\\.h.*xor_int16_t" 1 } } */
+/* { dg-final { scan-assembler-times "and_int16_t:.*shf\\.h.*0xb1.*and\\.v.*shf\\.w.*0xb1.*and\\.v.*shf\\.w.*0x4e.*and\\.v.*copy_s\\.h.*and_int16_t" 1 } } */
+/* { dg-final { scan-assembler-times "or_uint16_t:.*shf\\.h.*0xb1.*or\\.v.*shf\\.w.*0xb1.*or\\.v.*shf\\.w.*0x4e.*or\\.v.*copy_.\\.h.*or_uint16_t" 1 } } */
+/* { dg-final { scan-assembler-times "xor_uint16_t:.*shf\\.h.*0xb1.*xor\\.v.*shf\\.w.*0xb1.*xor\\.v.*shf\\.w.*0x4e.*xor\\.v.*copy_.\\.h.*xor_uint16_t" 1 } } */
+/* { dg-final { scan-assembler-times "and_uint16_t:.*shf\\.h.*0xb1.*and\\.v.*shf\\.w.*0xb1.*and\\.v.*shf\\.w.*0x4e.*and\\.v.*copy_.\\.h.*and_uint16_t" 1 } } */
+/* { dg-final { scan-assembler-times "or_int32_t:.*shf\\.w.*0xb1.*or\\.v.*shf\\.w.*0x4e.*or\\.v.*copy_s\\.w.*or_int32_t" 1 } } */
+/* { dg-final { scan-assembler-times "xor_int32_t:.*shf\\.w.*0xb1.*xor\\.v.*shf\\.w.*0x4e.*xor\\.v.*copy_s\\.w.*xor_int32_t" 1 } } */
+/* { dg-final { scan-assembler-times "and_int32_t:.*shf\\.w.*0xb1.*and\\.v.*shf\\.w.*0x4e.*and\\.v.*copy_s\\.w.*and_int32_t" 1 } } */
+/* { dg-final { scan-assembler-times "or_uint32_t:.*shf\\.w.*0xb1.*or\\.v.*shf\\.w.*0x4e.*or\\.v.*copy_.\\.w.*or_uint32_t" 1 } } */
+/* { dg-final { scan-assembler-times "xor_uint32_t:.*shf\\.w.*0xb1.*xor\\.v.*shf\\.w.*0x4e.*xor\\.v.*copy_.\\.w.*xor_uint32_t" 1 } } */
+/* { dg-final { scan-assembler-times "and_uint32_t:.*shf\\.w.*0xb1.*and\\.v.*shf\\.w.*0x4e.*and\\.v.*copy_.\\.w.*and_uint32_t" 1 } } */
+/* { dg-final { scan-assembler-times "or_int64_t:.*shf\\.w.*0x4e.*or\\.v.*copy_s\\.?.*or_int64_t" 1 } } */
+/* { dg-final { scan-assembler-times "xor_int64_t:.*shf\\.w.*0x4e.*xor\\.v.*copy_s\\.?.*xor_int64_t" 1 } } */
+/* { dg-final { scan-assembler-times "and_int64_t:.*shf\\.w.*0x4e.*and\\.v.*copy_s\\.?.*and_int64_t" 1 } } */
+/* { dg-final { scan-assembler-times "or_uint64_t:.*shf\\.w.*0x4e.*or\\.v.*copy_.\\.?.*or_uint64_t" 1 } } */
+/* { dg-final { scan-assembler-times "xor_uint64_t:.*shf\\.w.*0x4e.*xor\\.v.*copy_.\\.?.*xor_uint64_t" 1 } } */
+/* { dg-final { scan-assembler-times "and_uint64_t:.*shf\\.w.*0x4e.*and\\.v.*copy_.\\.?.*and_uint64_t" 1 } } */
+
+#include <stdint.h>
+
+#define D_TY_CALC(type) \
+       type a_##type[32] __attribute__ ((aligned (16))); \
+       type min_##type () { \
+               type ret = a_##type[0]; \
+               for (int i=0; i<32; i++) \
+                       ret = (ret < a_##type[i]) ? ret : a_##type[i]; \
+               return ret;     \
+       }       \
+       type max_##type () { \
+               type ret = a_##type[0]; \
+               for (int i=0; i<32; i++) \
+                       ret = (ret > a_##type[i]) ? ret : a_##type[i]; \
+               return ret;     \
+       }       \
+       type plus_##type () { \
+               type ret = 0;   \
+               for (int i=0; i<32; i++) \
+                       ret += a_##type[i]; \
+               return ret;     \
+       }
+
+#define D_TY_BIT(type) \
+       type or_##type () {     \
+               type ret = 0;   \
+               for (int i=0; i<32; i++) \
+                       ret |= a_##type[i]; \
+               return ret;     \
+       }       \
+       type and_##type () {    \
+               type ret = (type)(long long)~0LL;       \
+               for (int i=0; i<32; i++) \
+                       ret &= a_##type[i]; \
+               return ret;     \
+       }       \
+       type xor_##type () {    \
+               type ret = (type)(long long)~0LL;       \
+               for (int i=0; i<32; i++) \
+                       ret ^= a_##type[i]; \
+               return ret;     \
+       }
+
+#define D_TY(type) D_TY_CALC(type) D_TY_BIT(type)
+
+D_TY (int8_t)
+D_TY (uint8_t)
+D_TY (int16_t)
+D_TY (uint16_t)
+D_TY (int32_t)
+D_TY (uint32_t)
+D_TY (int64_t)
+D_TY (uint64_t)
+D_TY_CALC (float)
+D_TY_CALC (double)
+
+
author	YunQiang Su <syq@gcc.gnu.org>
	Mon, 26 Aug 2024 00:45:36 +0000 (08:45 +0800)
committer	YunQiang Su <yunqiang@isrc.iscas.ac.cn>
	Tue, 3 Sep 2024 00:13:15 +0000 (08:13 +0800)
gcc/config/mips/mips-msa.md		patch \| blob \| blame \| history
gcc/config/mips/mips-protos.h		patch \| blob \| blame \| history
gcc/config/mips/mips.cc		patch \| blob \| blame \| history
gcc/config/mips/mips.md		patch \| blob \| blame \| history
gcc/testsuite/gcc.target/mips/msa-reduc.c	[new file with mode: 0644]	patch \| blob