RISC-V: -mrvv-max-lmul=conv-dynamic [PR122846].

author Robin Dapp <rdapp@ventanamicro.com>

Wed, 10 Dec 2025 18:02:11 +0000 (19:02 +0100)

committer Robin Dapp <rdapp@oss.qualcomm.com>

Thu, 8 Jan 2026 20:48:18 +0000 (21:48 +0100)
author Robin Dapp <rdapp@ventanamicro.com>
Wed, 10 Dec 2025 18:02:11 +0000 (19:02 +0100)
committer Robin Dapp <rdapp@oss.qualcomm.com>
Thu, 8 Jan 2026 20:48:18 +0000 (21:48 +0100)
diff --git a/gcc/config/riscv/riscv-opts.h b/gcc/config/riscv/riscv-opts.h

index c8cfc4c59307c6396353a9b4e809406a8b307e84..2b6553ff87d29997bcf969ec531db6cf03d9701e 100644 (file)
--- a/gcc/config/riscv/riscv-opts.h
+++ b/gcc/config/riscv/riscv-opts.h
@@ -86,7 +86,10 @@ enum rvv_max_lmul_enum {
    RVV_M4 = 4,
    RVV_M8 = 8,
    /* For dynamic LMUL, we compare COST start with LMUL8.  */
-  RVV_DYNAMIC = 9
+  RVV_DYNAMIC = 9,
+  /* For dynamic LMUL based on conversions, set LMUL based on
+     type size ratio.  */
+  RVV_CONV_DYNAMIC = 10
  };
  
  enum riscv_multilib_select_kind {
@@ -155,7 +158,8 @@ enum rvv_vector_bits_enum {
  
  /* The maximum LMUL according to user configuration.  */
  #define TARGET_MAX_LMUL                                                        \
-  (int) (rvv_max_lmul == RVV_DYNAMIC ? RVV_M8 : rvv_max_lmul)
+  (int) ((rvv_max_lmul == RVV_DYNAMIC || rvv_max_lmul == RVV_CONV_DYNAMIC) \
+        ? RVV_M8 : rvv_max_lmul)
  
  /* TLS types.  */
  enum riscv_tls_type {
diff --git a/gcc/config/riscv/riscv-string.cc b/gcc/config/riscv/riscv-string.cc

index 74a103e8adf471763e8a0e524bccc720e09254f7..3e7896b36fc9db8ed024284cb47d68fab5171344 100644 (file)
--- a/gcc/config/riscv/riscv-string.cc
+++ b/gcc/config/riscv/riscv-string.cc
@@ -1089,13 +1089,17 @@ use_vector_stringop_p (struct stringop_info &info, HOST_WIDE_INT max_ew,
    if (!TARGET_VECTOR || !(stringop_strategy & STRATEGY_VECTOR))
      return false;
  
+  int max_lmul = TARGET_MAX_LMUL;
+  if (rvv_max_lmul == RVV_CONV_DYNAMIC)
+    max_lmul = RVV_M1;
+
    if (CONST_INT_P (length_in))
      {
        HOST_WIDE_INT length = INTVAL (length_in);
  
        /* If the VLEN and preferred LMUL allow the entire block to be copied in
          one go then no loop is needed.  */
-      if (known_le (length, BYTES_PER_RISCV_VECTOR * TARGET_MAX_LMUL))
+      if (known_le (length, BYTES_PER_RISCV_VECTOR * max_lmul))
         {
           need_loop = false;
  
@@ -1130,10 +1134,10 @@ use_vector_stringop_p (struct stringop_info &info, HOST_WIDE_INT max_ew,
           poly_int64 nunits;
  
           if (need_loop)
-           per_iter = BYTES_PER_RISCV_VECTOR * TARGET_MAX_LMUL;
+           per_iter = BYTES_PER_RISCV_VECTOR * max_lmul;
           else
             per_iter = length;
-         /* BYTES_PER_RISCV_VECTOR * TARGET_MAX_LMUL may not be divisible by
+         /* BYTES_PER_RISCV_VECTOR * MAX_LMUL may not be divisible by
              this potential_ew.  */
           if (!multiple_p (per_iter, potential_ew, &nunits))
             continue;
@@ -1164,7 +1168,7 @@ use_vector_stringop_p (struct stringop_info &info, HOST_WIDE_INT max_ew,
                  pointless.
                  Still, by choosing a lower LMUL factor that still allows
                  an entire transfer, we can reduce register pressure.  */
-             for (unsigned lmul = 1; lmul < TARGET_MAX_LMUL; lmul <<= 1)
+             for (int lmul = 1; lmul < max_lmul; lmul <<= 1)
                 if (known_le (length * BITS_PER_UNIT, TARGET_MIN_VLEN * lmul)
                     && multiple_p (BYTES_PER_RISCV_VECTOR * lmul, potential_ew,
                                    &mode_units)
@@ -1177,9 +1181,9 @@ use_vector_stringop_p (struct stringop_info &info, HOST_WIDE_INT max_ew,
           if (vmode != VOIDmode)
             break;
  
-         /* BYTES_PER_RISCV_VECTOR * TARGET_MAX_LMUL will at least be divisible
+         /* BYTES_PER_RISCV_VECTOR * MAX_LMUL will at least be divisible
              by potential_ew 1, so this should succeed eventually.  */
-         if (multiple_p (BYTES_PER_RISCV_VECTOR * TARGET_MAX_LMUL,
+         if (multiple_p (BYTES_PER_RISCV_VECTOR * max_lmul,
                           potential_ew, &mode_units)
               && riscv_vector::get_vector_mode (elem_mode,
                                                 mode_units).exists (&vmode))
@@ -1195,7 +1199,7 @@ use_vector_stringop_p (struct stringop_info &info, HOST_WIDE_INT max_ew,
      }
    else
      {
-      gcc_assert (get_lmul_mode (QImode, TARGET_MAX_LMUL).exists (&vmode));
+      gcc_assert (get_lmul_mode (QImode, max_lmul).exists (&vmode));
      }
  
    /* A memcpy libcall in the worst case takes 3 instructions to prepare the
@@ -1356,6 +1360,8 @@ expand_rawmemchr (machine_mode mode, rtx dst, rtx haystack, rtx needle,
  
    unsigned int isize = GET_MODE_SIZE (mode).to_constant ();
    int lmul = TARGET_MAX_LMUL;
+  if (rvv_max_lmul == RVV_CONV_DYNAMIC)
+    lmul = RVV_M1;
    poly_int64 nunits = exact_div (BYTES_PER_RISCV_VECTOR * lmul, isize);
  
    machine_mode vmode;
@@ -1455,6 +1461,8 @@ expand_strcmp (rtx result, rtx src1, rtx src2, rtx nbytes,
    machine_mode mode = E_QImode;
    unsigned int isize = GET_MODE_SIZE (mode).to_constant ();
    int lmul = TARGET_MAX_LMUL;
+  if (rvv_max_lmul == RVV_CONV_DYNAMIC)
+    lmul = RVV_M1;
    poly_int64 nunits = exact_div (BYTES_PER_RISCV_VECTOR * lmul, isize);
  
    machine_mode vmode;
@@ -1606,7 +1614,9 @@ check_vectorise_memory_operation (rtx length_in, HOST_WIDE_INT &lmul_out)
    if (rvv_max_lmul != RVV_DYNAMIC)
      {
        lmul_out = TARGET_MAX_LMUL;
-      return (length <= ((TARGET_MAX_LMUL * TARGET_MIN_VLEN) / 8));
+      if (rvv_max_lmul == RVV_CONV_DYNAMIC)
+       lmul_out = RVV_M1;
+      return (length <= ((lmul_out * TARGET_MIN_VLEN) / 8));
      }
  
    /* Find smallest lmul large enough for entire op.  */
diff --git a/gcc/config/riscv/riscv-vector-costs.cc b/gcc/config/riscv/riscv-vector-costs.cc

index f77292ae28cf89355141bb61d3884dd5bf54ba64..782796ed43e12defa23223c37f1767d505d049aa 100644 (file)
--- a/gcc/config/riscv/riscv-vector-costs.cc
+++ b/gcc/config/riscv/riscv-vector-costs.cc
@@ -258,6 +258,14 @@ get_biggest_mode (machine_mode mode1, machine_mode mode2)
    return mode1_size >= mode2_size ? mode1 : mode2;
  }
  
+static machine_mode
+get_smallest_mode (machine_mode mode1, machine_mode mode2)
+{
+  unsigned int mode1_size = GET_MODE_BITSIZE (mode1).to_constant ();
+  unsigned int mode2_size = GET_MODE_BITSIZE (mode2).to_constant ();
+  return mode1_size <= mode2_size ? mode1 : mode2;
+}
+
  /* Return true if OP is invariant.  */
  
  static bool
@@ -361,9 +369,11 @@ machine_mode
  costs::compute_local_live_ranges (
    loop_vec_info loop_vinfo,
    const hash_map<basic_block, vec<stmt_point>> &program_points_per_bb,
-  hash_map<basic_block, hash_map<tree, pair>> &live_ranges_per_bb)
+  hash_map<basic_block, hash_map<tree, pair>> &live_ranges_per_bb,
+  machine_mode *smallest_mode_out)
  {
    machine_mode biggest_mode = QImode;
+  machine_mode smallest_mode = TImode;
    class loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
    if (!program_points_per_bb.is_empty ())
      {
@@ -394,8 +404,12 @@ costs::compute_local_live_ranges (
               if (variable_vectorized_p (loop, program_point.stmt_info,
                                          *node, lhs, true))
                 {
-                 biggest_mode = get_biggest_mode (biggest_mode,
-                                                  TYPE_MODE (TREE_TYPE (lhs)));
+                 biggest_mode
+                   = get_biggest_mode (biggest_mode,
+                                       TYPE_MODE (TREE_TYPE (lhs)));
+                 smallest_mode
+                   = get_smallest_mode (smallest_mode,
+                                        TYPE_MODE (TREE_TYPE (lhs)));
                   bool existed_p = false;
                   pair &live_range
                     = live_ranges->get_or_insert (lhs, &existed_p);
@@ -415,6 +429,9 @@ costs::compute_local_live_ranges (
                       biggest_mode
                         = get_biggest_mode (biggest_mode,
                                             TYPE_MODE (TREE_TYPE (var)));
+                     smallest_mode
+                       = get_smallest_mode (smallest_mode,
+                                            TYPE_MODE (TREE_TYPE (var)));
                       bool existed_p = false;
                       pair &live_range
                         = live_ranges->get_or_insert (var, &existed_p);
@@ -445,6 +462,8 @@ costs::compute_local_live_ranges (
                                   (*r).second = MAX (point, (*r).second);
                                   biggest_mode = get_biggest_mode (
                                     biggest_mode, TYPE_MODE (TREE_TYPE (arg)));
+                                 smallest_mode = get_smallest_mode (
+                                   smallest_mode, TYPE_MODE (TREE_TYPE (arg)));
                                 }
                             }
                           else
@@ -464,8 +483,14 @@ costs::compute_local_live_ranges (
         }
      }
    if (dump_enabled_p ())
-    dump_printf_loc (MSG_NOTE, vect_location, "Biggest mode = %s\n",
-                    GET_MODE_NAME (biggest_mode));
+    {
+      dump_printf_loc (MSG_NOTE, vect_location, "Biggest mode = %s\n",
+                      GET_MODE_NAME (biggest_mode));
+      dump_printf_loc (MSG_NOTE, vect_location, "Smallest mode = %s\n",
+                      GET_MODE_NAME (smallest_mode));
+    }
+  if (smallest_mode_out)
+    *smallest_mode_out = smallest_mode;
    return biggest_mode;
  }
  
@@ -639,6 +664,25 @@ compute_estimated_lmul (loop_vec_info loop_vinfo, machine_mode mode)
    return 0;
  }
  
+/* Compute LMUL based on the ratio of biggest to smallest type size.
+   This is used for RVV_CONV_DYNAMIC.  */
+static int
+compute_lmul_from_conversion_ratio (machine_mode biggest_mode,
+                                   machine_mode smallest_mode)
+{
+  gcc_assert (GET_MODE_BITSIZE (biggest_mode).is_constant ());
+  gcc_assert (GET_MODE_BITSIZE (smallest_mode).is_constant ());
+
+  unsigned int biggest_size = GET_MODE_BITSIZE (biggest_mode).to_constant ();
+  unsigned int smallest_size = GET_MODE_BITSIZE (smallest_mode).to_constant ();
+
+  int lmul = biggest_size / smallest_size;
+  lmul = std::min (lmul, (int) RVV_M8);
+  lmul = std::max (lmul, (int) RVV_M1);
+
+  return lmul;
+}
+
  /* Update the live ranges according PHI.
  
     Loop:
@@ -825,56 +869,37 @@ costs::update_local_live_ranges (
      }
  }
  
-/* Compute the maximum live V_REGS.  */
-bool
-costs::has_unexpected_spills_p (loop_vec_info loop_vinfo)
+/* Helper to compute live ranges, modes, and LMUL.  */
+void
+costs::compute_live_ranges_and_lmul (loop_vec_info loop_vinfo,
+  hash_map<basic_block, vec<stmt_point>> &program_points_per_bb,
+  hash_map<basic_block, hash_map<tree, pair>> &live_ranges_per_bb,
+  machine_mode &biggest_mode, machine_mode &smallest_mode, int &lmul)
  {
-  /* Compute local program points.
-     It's a fast and effective computation.  */
-  hash_map<basic_block, vec<stmt_point>> program_points_per_bb;
    compute_local_program_points (loop_vinfo, program_points_per_bb);
  
-  /* Compute local live ranges.  */
-  hash_map<basic_block, hash_map<tree, pair>> live_ranges_per_bb;
-  machine_mode biggest_mode
-    = compute_local_live_ranges (loop_vinfo, program_points_per_bb,
-                                live_ranges_per_bb);
+  smallest_mode = TImode;
+  biggest_mode = compute_local_live_ranges (loop_vinfo, program_points_per_bb,
+                                           live_ranges_per_bb, &smallest_mode);
  
-  /* Update live ranges according to PHI.  */
    update_local_live_ranges (loop_vinfo, program_points_per_bb,
                             live_ranges_per_bb, &biggest_mode);
  
-  int lmul = compute_estimated_lmul (loop_vinfo, biggest_mode);
+  if (rvv_max_lmul == RVV_CONV_DYNAMIC)
+    lmul = compute_lmul_from_conversion_ratio (biggest_mode, smallest_mode);
+  else
+    lmul = compute_estimated_lmul (loop_vinfo, biggest_mode);
+
    gcc_assert (lmul <= RVV_M8);
-  /* TODO: We calculate the maximum live vars base on current STMTS
-     sequence.  We can support live range shrink if it can give us
-     big improvement in the future.  */
-  if (lmul > RVV_M1)
-    {
-      if (!live_ranges_per_bb.is_empty ())
-       {
-         unsigned int max_nregs = 0;
-         for (hash_map<basic_block, hash_map<tree, pair>>::iterator iter
-              = live_ranges_per_bb.begin ();
-              iter != live_ranges_per_bb.end (); ++iter)
-           {
-             basic_block bb = (*iter).first;
-             unsigned int max_point
-               = (*program_points_per_bb.get (bb)).length () + 1;
-             if ((*iter).second.is_empty ())
-               continue;
-             /* We prefer larger LMUL unless it causes register spillings. */
-             unsigned int nregs
-               = max_number_of_live_regs (loop_vinfo, bb, (*iter).second,
-                                          max_point, biggest_mode, lmul);
-             if (nregs > max_nregs)
-               max_nregs = nregs;
-           }
-         live_ranges_per_bb.empty ();
-         if (max_nregs > V_REG_NUM)
-           return true;
-       }
-    }
+}
+
+/* Helper to clean up live range data structures.  */
+void
+costs::cleanup_live_range_data (hash_map<basic_block, vec<stmt_point>>
+                               &program_points_per_bb,
+                               hash_map<basic_block, hash_map<tree, pair>>
+                               &live_ranges_per_bb)
+{
    if (!program_points_per_bb.is_empty ())
      {
        for (hash_map<basic_block, vec<stmt_point>>::iterator iter
@@ -887,7 +912,72 @@ costs::has_unexpected_spills_p (loop_vec_info loop_vinfo)
         }
        program_points_per_bb.empty ();
      }
-  return false;
+  live_ranges_per_bb.empty ();
+}
+
+/* Compute LMUL for RVV_CONV_DYNAMIC mode based on conversion ratio.  */
+void
+costs::compute_conversion_dynamic_lmul (loop_vec_info loop_vinfo)
+{
+  hash_map<basic_block, vec<stmt_point>> program_points_per_bb;
+  hash_map<basic_block, hash_map<tree, pair>> live_ranges_per_bb;
+  machine_mode biggest_mode, smallest_mode;
+  int lmul;
+
+  compute_live_ranges_and_lmul (loop_vinfo, program_points_per_bb,
+                               live_ranges_per_bb, biggest_mode,
+                               smallest_mode, lmul);
+
+  /* Store the computed LMUL and biggest mode for later comparison
+     in cost model.  */
+  m_computed_lmul_from_conv = lmul;
+  m_biggest_mode_for_conv = biggest_mode;
+
+  cleanup_live_range_data (program_points_per_bb, live_ranges_per_bb);
+}
+
+/* Compute the maximum live V_REGS and check for unexpected spills.  */
+bool
+costs::has_unexpected_spills_p (loop_vec_info loop_vinfo)
+{
+  hash_map<basic_block, vec<stmt_point>> program_points_per_bb;
+  hash_map<basic_block, hash_map<tree, pair>> live_ranges_per_bb;
+  machine_mode biggest_mode, smallest_mode;
+  int lmul;
+
+  compute_live_ranges_and_lmul (loop_vinfo, program_points_per_bb,
+                               live_ranges_per_bb, biggest_mode,
+                               smallest_mode, lmul);
+
+  /* TODO: We calculate the maximum live vars base on current STMTS
+     sequence.  We can support live range shrink if it can give us
+     big improvement in the future.  */
+  bool has_spills = false;
+  if (lmul > RVV_M1 && !live_ranges_per_bb.is_empty ())
+    {
+      unsigned int max_nregs = 0;
+      for (hash_map<basic_block, hash_map<tree, pair>>::iterator iter
+          = live_ranges_per_bb.begin ();
+          iter != live_ranges_per_bb.end (); ++iter)
+       {
+         basic_block bb = (*iter).first;
+         unsigned int max_point
+           = (*program_points_per_bb.get (bb)).length () + 1;
+         if ((*iter).second.is_empty ())
+           continue;
+         /* We prefer larger LMUL unless it causes register spillings.  */
+         unsigned int nregs
+           = max_number_of_live_regs (loop_vinfo, bb, (*iter).second,
+                                      max_point, biggest_mode, lmul);
+         if (nregs > max_nregs)
+           max_nregs = nregs;
+       }
+      if (max_nregs > V_REG_NUM)
+       has_spills = true;
+    }
+
+  cleanup_live_range_data (program_points_per_bb, live_ranges_per_bb);
+  return has_spills;
  }
  
  costs::costs (vec_info *vinfo, bool costing_for_scalar)
@@ -937,6 +1027,8 @@ costs::record_potential_unexpected_spills (loop_vec_info loop_vinfo)
        if (!post_dom_available_p)
         free_dominance_info (CDI_POST_DOMINATORS);
      }
+  else if (rvv_max_lmul == RVV_CONV_DYNAMIC)
+    compute_conversion_dynamic_lmul (loop_vinfo);
  }
  
  /* Decide whether to use the unrolling heuristic described above
@@ -1033,6 +1125,50 @@ costs::better_main_loop_than_p (const vector_costs *uncast_other) const
           return other_prefer_unrolled;
         }
      }
+  else if (rvv_max_lmul == RVV_CONV_DYNAMIC)
+    {
+      if (this->m_computed_lmul_from_conv > 0
+         && other->m_computed_lmul_from_conv > 0
+         && this->m_biggest_mode_for_conv != VOIDmode)
+       {
+         int this_vf = vect_vf_for_cost (this_loop_vinfo);
+         int other_vf = vect_vf_for_cost (other_loop_vinfo);
+
+         /* Get element size from the biggest mode.  */
+         unsigned int element_bits
+           = GET_MODE_BITSIZE (this->m_biggest_mode_for_conv).to_constant ();
+
+         /* Estimate LMUL from VF * element_size / MIN_VLEN.  */
+         int this_lmul = (this_vf * element_bits) / TARGET_MIN_VLEN;
+         int other_lmul = (other_vf * element_bits) / TARGET_MIN_VLEN;
+
+         /* Clamp to valid LMUL range.  */
+         this_lmul = MAX (1, MIN (this_lmul, 8));
+         other_lmul = MAX (1, MIN (other_lmul, 8));
+
+         int target_lmul = this->m_computed_lmul_from_conv;
+
+         /* Prefer the LMUL that exactly matches our computed ratio.  */
+         if (this_lmul == target_lmul && other_lmul != target_lmul)
+           {
+             if (dump_enabled_p ())
+               dump_printf_loc (MSG_NOTE, vect_location,
+                                "Preferring LMUL=%d loop because it matches"
+                                " conversion ratio (other LMUL=%d)\n",
+                                this_lmul, other_lmul);
+             return true;
+           }
+         else if (this_lmul != target_lmul && other_lmul == target_lmul)
+           {
+             if (dump_enabled_p ())
+               dump_printf_loc (MSG_NOTE, vect_location,
+                                "Preferring other LMUL=%d loop because it"
+                                " matches conversion ratio"
+                                " (this LMUL=%d)\n", other_lmul, this_lmul);
+             return false;
+           }
+       }
+    }
    else if (rvv_max_lmul == RVV_DYNAMIC)
      {
        if (other->m_has_unexpected_spills_p)
diff --git a/gcc/config/riscv/riscv-vector-costs.h b/gcc/config/riscv/riscv-vector-costs.h

index 6d6d411f04f29e7d5fe4934d1cde45dd0110e469..9a620b719388bfe35a37f329ac17c13e843eca45 100644 (file)
--- a/gcc/config/riscv/riscv-vector-costs.h
+++ b/gcc/config/riscv/riscv-vector-costs.h
@@ -106,6 +106,11 @@ private:
    bool m_has_unexpected_spills_p = false;
    void record_potential_unexpected_spills (loop_vec_info);
  
+  /* For RVV_DYNAMIC_CONV mode, store the LMUL computed from conversion ratio
+     and the biggest mode used in the computation.  */
+  int m_computed_lmul_from_conv = 0;
+  machine_mode m_biggest_mode_for_conv = VOIDmode;
+
    void compute_local_program_points (vec_info *,
                                      hash_map<basic_block, vec<stmt_point>> &);
    void update_local_live_ranges (vec_info *,
@@ -114,9 +119,17 @@ private:
                                  machine_mode *);
    machine_mode compute_local_live_ranges
      (loop_vec_info, const hash_map<basic_block, vec<stmt_point>> &,
-     hash_map<basic_block, hash_map<tree, pair>> &);
-
+     hash_map<basic_block, hash_map<tree, pair>> &,
+     machine_mode * = nullptr);
+
+  void compute_live_ranges_and_lmul (loop_vec_info,
+                                    hash_map<basic_block, vec<stmt_point>> &,
+                                    hash_map<basic_block, hash_map<tree, pair>> &,
+                                    machine_mode &, machine_mode &, int &);
+  void cleanup_live_range_data (hash_map<basic_block, vec<stmt_point>> &,
+                               hash_map<basic_block, hash_map<tree, pair>> &);
    bool has_unexpected_spills_p (loop_vec_info);
+  void compute_conversion_dynamic_lmul (loop_vec_info);
    bool need_additional_vector_vars_p (stmt_vec_info, slp_tree);
  
    void adjust_vect_cost_per_loop (loop_vec_info);
diff --git a/gcc/config/riscv/riscv.opt b/gcc/config/riscv/riscv.opt

index 74dd8131c37664ca253695fdac15bf58e41bbbb4..70f2fb0f5d52f4b31f182186984b9b2cfbef1e5c 100644 (file)
--- a/gcc/config/riscv/riscv.opt
+++ b/gcc/config/riscv/riscv.opt
@@ -313,6 +313,9 @@ Enum(rvv_max_lmul) String(m8) Value(RVV_M8)
  EnumValue
  Enum(rvv_max_lmul) String(dynamic) Value(RVV_DYNAMIC)
  
+EnumValue
+Enum(rvv_max_lmul) String(conv-dynamic) Value(RVV_CONV_DYNAMIC)
+
  mrvv-max-lmul=
  Target RejectNegative Joined Enum(rvv_max_lmul) Var(rvv_max_lmul) Init(RVV_M1)
  -mrvv-max-lmul=<string>        Set the RVV LMUL of auto-vectorization.
diff --git a/gcc/testsuite/gcc.target/riscv/rvv/autovec/dyn-lmul-conv-1.c b/gcc/testsuite/gcc.target/riscv/rvv/autovec/dyn-lmul-conv-1.c

new file mode 100644 (file)

index 0000000..b07bd86
--- /dev/null
+++ b/gcc/testsuite/gcc.target/riscv/rvv/autovec/dyn-lmul-conv-1.c
@@ -0,0 +1,42 @@
+/* { dg-do compile } */
+/* { dg-options "-O3 -march=rv64gcv -mabi=lp64d -mrvv-max-lmul=conv-dynamic" } */
+
+void foo2x1 (short *restrict a, char *restrict b, int n)
+{
+  for (int i = 0; i < n; i++)
+    a[i] = b[i];
+}
+
+void foo2x2 (int *restrict a, short *restrict b, int n)
+{
+  for (int i = 0; i < n; i++)
+    a[i] = b[i];
+}
+
+void foo2x3 (long *restrict a, int *restrict b, int n)
+{
+  for (int i = 0; i < n; i++)
+    a[i] = b[i];
+}
+
+void foo4x1 (int *restrict a, char *restrict b, int n)
+{
+  for (int i = 0; i < n; i++)
+    a[i] = b[i];
+}
+
+void foo4x2 (long *restrict a, short *restrict b, int n)
+{
+  for (int i = 0; i < n; i++)
+    a[i] = b[i];
+}
+
+void foo8x (long *restrict a, char *restrict b, int n)
+{
+  for (int i = 0; i < n; i++)
+    a[i] = b[i];
+}
+
+/* { dg-final { scan-assembler-times ",m2," 3 } } */
+/* { dg-final { scan-assembler-times ",m4," 2 } } */
+/* { dg-final { scan-assembler-times ",m8," 1 } } */
diff --git a/gcc/testsuite/gcc.target/riscv/rvv/autovec/dyn-lmul-conv-2.c b/gcc/testsuite/gcc.target/riscv/rvv/autovec/dyn-lmul-conv-2.c

new file mode 100644 (file)

index 0000000..c37e4dd
--- /dev/null
+++ b/gcc/testsuite/gcc.target/riscv/rvv/autovec/dyn-lmul-conv-2.c
@@ -0,0 +1,43 @@
+/* { dg-do compile } */
+/* { dg-options "-O3 -march=rv64gcv -mabi=lp64d -mrvv-max-lmul=conv-dynamic" } */
+
+void foo2x1 (unsigned char *restrict a, unsigned short *restrict b, int n)
+{
+  for (int i = 0; i < n; i++)
+    a[i] = b[i];
+}
+
+void foo2x2 (unsigned short *restrict a, unsigned int *restrict b, int n)
+{
+  for (int i = 0; i < n; i++)
+    a[i] = b[i];
+}
+
+void foo2x3 (unsigned int *restrict a, unsigned long *restrict b, int n)
+{
+  for (int i = 0; i < n; i++)
+    a[i] = b[i];
+}
+
+void foo4x1 (unsigned char *restrict a, unsigned int *restrict b, int n)
+{
+  for (int i = 0; i < n; i++)
+    a[i] = b[i];
+}
+
+void foo4x2 (unsigned short *restrict a, unsigned long *restrict b, int n)
+{
+  for (int i = 0; i < n; i++)
+    a[i] = b[i];
+}
+
+void foo8x (unsigned char *restrict a, unsigned long *restrict b, int n)
+{
+  for (int i = 0; i < n; i++)
+    a[i] = b[i];
+}
+
+/* { dg-final { scan-assembler-times ",m1," 6 } } */
+/* { dg-final { scan-assembler-times ",m2," 3 } } */
+/* { dg-final { scan-assembler-times ",m4," 1 } } */
+/* { dg-final { scan-assembler-not ",mf2," } } */
diff --git a/gcc/testsuite/gcc.target/riscv/rvv/autovec/pr122846.c b/gcc/testsuite/gcc.target/riscv/rvv/autovec/pr122846.c

new file mode 100644 (file)

index 0000000..7753a66
--- /dev/null
+++ b/gcc/testsuite/gcc.target/riscv/rvv/autovec/pr122846.c
@@ -0,0 +1,14 @@
+/* { dg-do compile } */
+/* { dg-options "-O3 -march=rv64gcv -mabi=lp64d -mrvv-max-lmul=conv-dynamic" } */
+
+int
+foo (const char *x, const char *y)
+{
+  int sum = 0;
+  for (int i = 0; i < 1024; i++)
+    sum += x[i] * y[i];
+  return sum;
+}
+
+/* One for the initial value, one for the reduction.  */
+/* { dg-final { scan-assembler-times ",m4," 2 } } */
author	Robin Dapp <rdapp@ventanamicro.com>
	Wed, 10 Dec 2025 18:02:11 +0000 (19:02 +0100)
committer	Robin Dapp <rdapp@oss.qualcomm.com>
	Thu, 8 Jan 2026 20:48:18 +0000 (21:48 +0100)
gcc/config/riscv/riscv-opts.h		patch \| blob \| blame \| history
gcc/config/riscv/riscv-string.cc		patch \| blob \| blame \| history
gcc/config/riscv/riscv-vector-costs.cc		patch \| blob \| blame \| history
gcc/config/riscv/riscv-vector-costs.h		patch \| blob \| blame \| history
gcc/config/riscv/riscv.opt		patch \| blob \| blame \| history
gcc/testsuite/gcc.target/riscv/rvv/autovec/dyn-lmul-conv-1.c	[new file with mode: 0644]	patch \| blob
gcc/testsuite/gcc.target/riscv/rvv/autovec/dyn-lmul-conv-2.c	[new file with mode: 0644]	patch \| blob
gcc/testsuite/gcc.target/riscv/rvv/autovec/pr122846.c	[new file with mode: 0644]	patch \| blob