LoongArch: Implement vector reduction from 256-bit to 128-bit

author Jiahao Xu <xujiahao@loongson.cn>

Thu, 23 Oct 2025 06:29:06 +0000 (14:29 +0800)

committer Lulu Cheng <chenglulu@loongson.cn>

Sat, 25 Oct 2025 07:51:59 +0000 (15:51 +0800)
author Jiahao Xu <xujiahao@loongson.cn>
Thu, 23 Oct 2025 06:29:06 +0000 (14:29 +0800)
committer Lulu Cheng <chenglulu@loongson.cn>
Sat, 25 Oct 2025 07:51:59 +0000 (15:51 +0800)
diff --git a/gcc/config/loongarch/lasx.md b/gcc/config/loongarch/lasx.md

index 3d71f30a54be83ff936e0bbf8fbb8eb744d6c4ad..eed4d2b186ba941b24ff9958daee0a792da4e00a 100644 (file)
--- a/gcc/config/loongarch/lasx.md
+++ b/gcc/config/loongarch/lasx.md
@@ -633,6 +633,48 @@
    [(set_attr "move_type" "fmove")
     (set_attr "mode" "<UNITMODE>")])
  
+(define_expand "vec_extract<mode><lasxhalf>"
+  [(match_operand:<VHMODE256_ALL> 0 "register_operand")
+   (match_operand:LASX 1 "register_operand")
+   (match_operand 2 "const_0_or_1_operand")]
+  "ISA_HAS_LASX"
+{
+  if (INTVAL (operands[2]))
+    {
+     operands[2] = loongarch_lsx_vec_parallel_const_half (<MODE>mode, true);
+     emit_insn (gen_vec_extract_hi_<mode> (operands[0], operands[1],
+                 operands[2]));
+    }
+  else
+    {
+     operands[2] = loongarch_lsx_vec_parallel_const_half (<MODE>mode, false);
+     emit_insn (gen_vec_extract_lo_<mode> (operands[0], operands[1],
+                 operands[2]));
+    }
+  DONE;
+})
+
+(define_insn_and_split "vec_extract_lo_<mode>"
+  [(set (match_operand:<VHMODE256_ALL> 0 "register_operand" "=f")
+    (vec_select:<VHMODE256_ALL>
+      (match_operand:LASX 1 "register_operand" "f")
+      (match_operand:LASX 2 "vect_par_cnst_low_half")))]
+  "ISA_HAS_LASX"
+  "#"
+  "&& reload_completed"
+  [(set (match_dup 0) (match_dup 1))]
+  "operands[1] = gen_lowpart (<VHMODE256_ALL>mode, operands[1]);")
+
+(define_insn "vec_extract_hi_<mode>"
+  [(set (match_operand:<VHMODE256_ALL> 0 "register_operand" "=f")
+    (vec_select:<VHMODE256_ALL>
+      (match_operand:LASX 1 "register_operand" "f")
+      (match_operand:LASX 2 "vect_par_cnst_high_half")))]
+  "ISA_HAS_LASX"
+  "xvpermi.d\t%u0,%u1,0xe"
+  [(set_attr "move_type" "fmove")
+   (set_attr "mode" "<MODE>")])
+
  (define_expand "vec_perm<mode>"
   [(match_operand:LASX 0 "register_operand")
    (match_operand:LASX 1 "register_operand")
diff --git a/gcc/config/loongarch/loongarch-protos.h b/gcc/config/loongarch/loongarch-protos.h

index 6139af48d7a6ed9d646d5b6c137ee6052b1f3fd5..6ecbe27218ca4105913a64c9de6fd6da02f777b3 100644 (file)
--- a/gcc/config/loongarch/loongarch-protos.h
+++ b/gcc/config/loongarch/loongarch-protos.h
@@ -121,6 +121,7 @@ extern bool loongarch_const_vector_same_int_p (rtx, machine_mode,
  extern bool loongarch_const_vector_shuffle_set_p (rtx, machine_mode);
  extern bool loongarch_const_vector_bitimm_set_p (rtx, machine_mode);
  extern bool loongarch_const_vector_bitimm_clr_p (rtx, machine_mode);
+extern bool loongarch_check_vect_par_cnst_half (rtx, machine_mode, bool);
  extern rtx loongarch_const_vector_vrepli (rtx, machine_mode);
  extern rtx loongarch_lsx_vec_parallel_const_half (machine_mode, bool);
  extern rtx loongarch_gen_const_int_vector (machine_mode, HOST_WIDE_INT);
diff --git a/gcc/config/loongarch/loongarch.cc b/gcc/config/loongarch/loongarch.cc

index 3fe8c766cc779407fff00a83e6dee0c7cf431cd0..c782cac0ff9e63a82649bd89d2de48e45573375c 100644 (file)
--- a/gcc/config/loongarch/loongarch.cc
+++ b/gcc/config/loongarch/loongarch.cc
@@ -1846,6 +1846,37 @@ loongarch_const_vector_shuffle_set_p (rtx op, machine_mode mode)
    return true;
  }
  
+/* Check if OP is a PARALLEL RTX with CONST_INT elements representing
+   the HIGH (high_p == TRUE) or LOW (high_p == FALSE) half of a vector
+   for mode MODE. Returns true if the pattern matches, false otherwise.  */
+
+bool
+loongarch_check_vect_par_cnst_half (rtx op, machine_mode mode, bool high_p)
+{
+  int nunits = XVECLEN (op, 0);
+  int nelts = GET_MODE_NUNITS (mode);
+
+  if (!known_eq (nelts, nunits * 2))
+    return false;
+
+  rtx first = XVECEXP (op, 0, 0);
+  if (!CONST_INT_P (first))
+    return false;
+
+  int base = high_p ? nelts / 2 : 0;
+  if (INTVAL (first) != base)
+    return false;
+
+  for (int i = 1; i < nunits; i++)
+    {
+      rtx elem = XVECEXP (op, 0, i);
+      if (!CONST_INT_P (elem) || INTVAL (elem) != INTVAL (first) + i)
+       return false;
+    }
+
+  return true;
+}
+
  rtx
  loongarch_const_vector_vrepli (rtx x, machine_mode mode)
  {
@@ -4143,6 +4174,19 @@ loongarch_rtx_costs (rtx x, machine_mode mode, int outer_code,
      }
  }
  
+/* All CPUs prefer to avoid cross-lane operations so perform reductions
+   upper against lower halves up to LSX reg size.  */
+
+machine_mode
+loongarch_split_reduction (machine_mode mode)
+{
+  if (LSX_SUPPORTED_MODE_P (mode))
+    return mode;
+
+  return mode_for_vector (as_a <scalar_mode> (GET_MODE_INNER (mode)),
+                         GET_MODE_NUNITS (mode) / 2).require ();
+}
+
  /* Implement targetm.vectorize.builtin_vectorization_cost.  */
  
  static int
@@ -11397,6 +11441,10 @@ loongarch_can_inline_p (tree caller, tree callee)
  #define TARGET_VECTORIZE_AUTOVECTORIZE_VECTOR_MODES \
    loongarch_autovectorize_vector_modes
  
+#undef TARGET_VECTORIZE_SPLIT_REDUCTION
+#define TARGET_VECTORIZE_SPLIT_REDUCTION \
+  loongarch_split_reduction
+
  #undef TARGET_OPTAB_SUPPORTED_P
  #define TARGET_OPTAB_SUPPORTED_P loongarch_optab_supported_p
  
diff --git a/gcc/config/loongarch/predicates.md b/gcc/config/loongarch/predicates.md

index fd2d7b9ab5511d27c5197685936ca4745142307c..34cf74d5d66e26fd30e34db41734152118b170b3 100644 (file)
--- a/gcc/config/loongarch/predicates.md
+++ b/gcc/config/loongarch/predicates.md
@@ -699,3 +699,19 @@
  
    return true;
  })
+
+;; PARALLEL for a vec_select that selects the low half
+;; elements of a vector of MODE.
+(define_special_predicate "vect_par_cnst_low_half"
+  (match_code "parallel")
+{
+  return loongarch_check_vect_par_cnst_half (op, mode, false);
+})
+
+;; PARALLEL for a vec_select that selects the high half
+;; elements of a vector of MODE.
+(define_special_predicate "vect_par_cnst_high_half"
+  (match_code "parallel")
+{
+  return loongarch_check_vect_par_cnst_half (op, mode, true);;
+})
diff --git a/gcc/testsuite/gcc.target/loongarch/lasx-reduc-1.c b/gcc/testsuite/gcc.target/loongarch/lasx-reduc-1.c

new file mode 100644 (file)

index 0000000..e449259
--- /dev/null
+++ b/gcc/testsuite/gcc.target/loongarch/lasx-reduc-1.c
@@ -0,0 +1,17 @@
+/* { dg-do compile } */
+/* { dg-options "-O3 -funsafe-math-optimizations -mlasx -fno-unroll-loops -fdump-tree-optimized" } */
+/* { dg-final { scan-tree-dump-times "\.REDUC_PLUS" 4 "optimized" } } */
+
+#define DEFINE_SUM_FUNCTION(T, FUNC_NAME, SIZE) \
+T FUNC_NAME(const T arr[]) { \
+    arr = __builtin_assume_aligned(arr, 64); \
+    T sum = 0; \
+    for (int i = 0; i < SIZE; i++) \
+        sum += arr[i]; \
+    return sum; \
+}
+
+DEFINE_SUM_FUNCTION (int, sum_int_1040, 1028)
+DEFINE_SUM_FUNCTION (float, sum_float_1040, 1028)
+DEFINE_SUM_FUNCTION (long, sum_long_1040, 1026)
+DEFINE_SUM_FUNCTION (double, sum_double_1040, 1026)
author	Jiahao Xu <xujiahao@loongson.cn>
	Thu, 23 Oct 2025 06:29:06 +0000 (14:29 +0800)
committer	Lulu Cheng <chenglulu@loongson.cn>
	Sat, 25 Oct 2025 07:51:59 +0000 (15:51 +0800)
gcc/config/loongarch/lasx.md		patch \| blob \| blame \| history
gcc/config/loongarch/loongarch-protos.h		patch \| blob \| blame \| history
gcc/config/loongarch/loongarch.cc		patch \| blob \| blame \| history
gcc/config/loongarch/predicates.md		patch \| blob \| blame \| history
gcc/testsuite/gcc.target/loongarch/lasx-reduc-1.c	[new file with mode: 0644]	patch \| blob