DONE;
}
)
+
+;; -------------------------------------------------------------------------
+;; ---- [INT] Sign and zero extension
+;; -------------------------------------------------------------------------
+;; Includes:
+;; - vzext.vf[2|4|8]
+;; - vsext.vf[2|4|8]
+;; -------------------------------------------------------------------------
+
+(define_expand "<optab><v_double_trunc><mode>2"
+ [(set (match_operand:VWEXTI 0 "register_operand")
+ (any_extend:VWEXTI
+ (match_operand:<V_DOUBLE_TRUNC> 1 "register_operand")))]
+ "TARGET_VECTOR"
+{
+ insn_code icode = code_for_pred_vf2 (<CODE>, <MODE>mode);
+ riscv_vector::emit_vlmax_insn (icode, riscv_vector::RVV_UNOP, operands);
+ DONE;
+})
+
+(define_expand "<optab><v_quad_trunc><mode>2"
+ [(set (match_operand:VQEXTI 0 "register_operand")
+ (any_extend:VQEXTI
+ (match_operand:<V_QUAD_TRUNC> 1 "register_operand")))]
+ "TARGET_VECTOR"
+{
+ insn_code icode = code_for_pred_vf4 (<CODE>, <MODE>mode);
+ riscv_vector::emit_vlmax_insn (icode, riscv_vector::RVV_UNOP, operands);
+ DONE;
+})
+
+(define_expand "<optab><v_oct_trunc><mode>2"
+ [(set (match_operand:VOEXTI 0 "register_operand")
+ (any_extend:VOEXTI
+ (match_operand:<V_OCT_TRUNC> 1 "register_operand")))]
+ "TARGET_VECTOR"
+{
+ insn_code icode = code_for_pred_vf8 (<CODE>, <MODE>mode);
+ riscv_vector::emit_vlmax_insn (icode, riscv_vector::RVV_UNOP, operands);
+ DONE;
+})
+
+;; -------------------------------------------------------------------------
+;; ---- [INT] Truncation
+;; -------------------------------------------------------------------------
+;; - vncvt.x.x.w
+;; -------------------------------------------------------------------------
+(define_expand "trunc<mode><v_double_trunc>2"
+ [(set (match_operand:<V_DOUBLE_TRUNC> 0 "register_operand")
+ (truncate:<V_DOUBLE_TRUNC>
+ (match_operand:VWEXTI 1 "register_operand")))]
+ "TARGET_VECTOR"
+{
+ insn_code icode = code_for_pred_trunc (<MODE>mode);
+ riscv_vector::emit_vlmax_insn (icode, riscv_vector::RVV_UNOP, operands);
+ DONE;
+})
+
+;; -------------------------------------------------------------------------
+;; Truncation to a mode whose inner mode size is a quarter of mode's.
+;; We emulate this with two consecutive vncvts.
+;; -------------------------------------------------------------------------
+(define_expand "trunc<mode><v_quad_trunc>2"
+ [(set (match_operand:<V_QUAD_TRUNC> 0 "register_operand")
+ (truncate:<V_QUAD_TRUNC>
+ (match_operand:VQEXTI 1 "register_operand")))]
+ "TARGET_VECTOR"
+{
+ rtx half = gen_reg_rtx (<V_DOUBLE_TRUNC>mode);
+ rtx opshalf[] = {half, operands[1]};
+ insn_code icode = code_for_pred_trunc (<MODE>mode);
+ riscv_vector::emit_vlmax_insn (icode, riscv_vector::RVV_UNOP, opshalf);
+
+ rtx ops[] = {operands[0], half};
+ icode = code_for_pred_trunc (<V_DOUBLE_TRUNC>mode);
+ riscv_vector::emit_vlmax_insn (icode, riscv_vector::RVV_UNOP, ops);
+ DONE;
+})
+
+;; -------------------------------------------------------------------------
+;; Truncation to a mode whose inner mode size is an eigth of mode's.
+;; We emulate this with three consecutive vncvts.
+;; -------------------------------------------------------------------------
+(define_expand "trunc<mode><v_oct_trunc>2"
+ [(set (match_operand:<V_OCT_TRUNC> 0 "register_operand")
+ (truncate:<V_OCT_TRUNC>
+ (match_operand:VOEXTI 1 "register_operand")))]
+ "TARGET_VECTOR"
+{
+ rtx half = gen_reg_rtx (<V_DOUBLE_TRUNC>mode);
+ rtx opshalf[] = {half, operands[1]};
+ insn_code icode = code_for_pred_trunc (<MODE>mode);
+ riscv_vector::emit_vlmax_insn (icode, riscv_vector::RVV_UNOP, opshalf);
+
+ rtx quarter = gen_reg_rtx (<V_QUAD_TRUNC>mode);
+ rtx opsquarter[] = {quarter, half};
+ icode = code_for_pred_trunc (<V_DOUBLE_TRUNC>mode);
+ riscv_vector::emit_vlmax_insn (icode, riscv_vector::RVV_UNOP, opsquarter);
+
+ rtx ops[] = {operands[0], quarter};
+ icode = code_for_pred_trunc (<V_QUAD_TRUNC>mode);
+ riscv_vector::emit_vlmax_insn (icode, riscv_vector::RVV_UNOP, ops);
+ DONE;
+})
FRM_RMM = 0b100,
FRM_DYN = 0b111
};
+
+opt_machine_mode vectorize_related_mode (machine_mode, scalar_mode,
+ poly_uint64);
+unsigned int autovectorize_vector_modes (vec<machine_mode> *, bool);
}
/* We classify builtin types into two classes:
return icode;
}
+/* This hook gives the vectorizer more vector mode options. We want it to not
+ only try modes with the maximum number of units a full vector can hold but
+ for example also half the number of units for a smaller elements size.
+ Such vectors can be promoted to a full vector of widened elements
+ (still with the same number of elements, essentially vectorizing at a
+ fixed number of units rather than a fixed number of bytes). */
+unsigned int
+autovectorize_vector_modes (vector_modes *modes, bool)
+{
+ if (autovec_use_vlmax_p ())
+ {
+ /* TODO: We will support RVV VLS auto-vectorization mode in the future. */
+ poly_uint64 full_size
+ = BYTES_PER_RISCV_VECTOR * ((int) riscv_autovec_lmul);
+
+ /* Start with a VNxYYQImode where YY is the number of units that
+ fit a whole vector.
+ Then try YY = nunits / 2, nunits / 4 and nunits / 8 which
+ is guided by the extensions we have available (vf2, vf4 and vf8).
+
+ - full_size: Try using full vectors for all element types.
+ - full_size / 2:
+ Try using 16-bit containers for 8-bit elements and full vectors
+ for wider elements.
+ - full_size / 4:
+ Try using 32-bit containers for 8-bit and 16-bit elements and
+ full vectors for wider elements.
+ - full_size / 8:
+ Try using 64-bit containers for all element types. */
+ static const int rvv_factors[] = {1, 2, 4, 8};
+ for (unsigned int i = 0; i < sizeof (rvv_factors) / sizeof (int); i++)
+ {
+ poly_uint64 units;
+ machine_mode mode;
+ if (can_div_trunc_p (full_size, rvv_factors[i], &units)
+ && get_vector_mode (QImode, units).exists (&mode))
+ modes->safe_push (mode);
+ }
+ }
+ return 0;
+}
+
+/* If the given VECTOR_MODE is an RVV mode, first get the largest number
+ of units that fit into a full vector at the given ELEMENT_MODE.
+ We will have the vectorizer call us with a successively decreasing
+ number of units (as specified in autovectorize_vector_modes).
+ The starting mode is always the one specified by preferred_simd_mode. */
+opt_machine_mode
+vectorize_related_mode (machine_mode vector_mode, scalar_mode element_mode,
+ poly_uint64 nunits)
+{
+ /* TODO: We will support RVV VLS auto-vectorization mode in the future. */
+ poly_uint64 min_units;
+ if (autovec_use_vlmax_p () && riscv_v_ext_vector_mode_p (vector_mode)
+ && multiple_p (BYTES_PER_RISCV_VECTOR * ((int) riscv_autovec_lmul),
+ GET_MODE_SIZE (element_mode), &min_units))
+ {
+ machine_mode rvv_mode;
+ if (maybe_ne (nunits, 0U))
+ {
+ /* If we were given a number of units NUNITS, try to find an
+ RVV vector mode of inner mode ELEMENT_MODE with the same
+ number of units. */
+ if (multiple_p (min_units, nunits)
+ && get_vector_mode (element_mode, nunits).exists (&rvv_mode))
+ return rvv_mode;
+ }
+ else
+ {
+ /* Look for a vector mode with the same number of units as the
+ VECTOR_MODE we were given. We keep track of the minimum
+ number of units so far which determines the smallest necessary
+ but largest possible, suitable mode for vectorization. */
+ min_units = ordered_min (min_units, GET_MODE_SIZE (vector_mode));
+ if (get_vector_mode (element_mode, min_units).exists (&rvv_mode))
+ return rvv_mode;
+ }
+ }
+
+ return default_vectorize_related_mode (vector_mode, element_mode, nunits);
+}
+
/* Expand an RVV comparison. */
void
return n;
}
+/* Implement TARGET_VECTORIZE_AUTOVECTORIZE_VECTOR_MODES. */
+unsigned int
+riscv_autovectorize_vector_modes (vector_modes *modes, bool all)
+{
+ if (TARGET_VECTOR)
+ return riscv_vector::autovectorize_vector_modes (modes, all);
+
+ return default_autovectorize_vector_modes (modes, all);
+}
+
+/* Implement TARGET_VECTORIZE_RELATED_MODE. */
+opt_machine_mode
+riscv_vectorize_related_mode (machine_mode vector_mode, scalar_mode element_mode,
+ poly_uint64 nunits)
+{
+ if (TARGET_VECTOR)
+ return riscv_vector::vectorize_related_mode (vector_mode, element_mode,
+ nunits);
+ return default_vectorize_related_mode (vector_mode, element_mode, nunits);
+}
+
+
/* Initialize the GCC target structure. */
#undef TARGET_ASM_ALIGNED_HI_OP
#define TARGET_ASM_ALIGNED_HI_OP "\t.half\t"
#undef TARGET_MODE_PRIORITY
#define TARGET_MODE_PRIORITY riscv_mode_priority
+#undef TARGET_VECTORIZE_AUTOVECTORIZE_VECTOR_MODES
+#define TARGET_VECTORIZE_AUTOVECTORIZE_VECTOR_MODES \
+ riscv_autovectorize_vector_modes
+
+#undef TARGET_VECTORIZE_RELATED_MODE
+#define TARGET_VECTORIZE_RELATED_MODE riscv_vectorize_related_mode
+
struct gcc_target targetm = TARGET_INITIALIZER;
#include "gt-riscv.h"
(VNx16HI "VNx16QI") (VNx32HI "VNx32QI") (VNx64HI "VNx64QI")
(VNx1SI "VNx1HI") (VNx2SI "VNx2HI") (VNx4SI "VNx4HI") (VNx8SI "VNx8HI")
(VNx16SI "VNx16HI") (VNx32SI "VNx32HI")
- (VNx1DI "VNx1SI") (VNx2DI "VNx2SI") (VNx4DI "VNx4SI") (VNx8DI "VNx8SI") (VNx16DI "VNx16SI")
- (VNx1DF "VNx1SF") (VNx2DF "VNx2SF") (VNx4DF "VNx4SF") (VNx8DF "VNx8SF") (VNx16DF "VNx16SF")
+ (VNx1DI "VNx1SI") (VNx2DI "VNx2SI") (VNx4DI "VNx4SI") (VNx8DI "VNx8SI")
+ (VNx16DI "VNx16SI")
+ (VNx1DF "VNx1SF") (VNx2DF "VNx2SF") (VNx4DF "VNx4SF") (VNx8DF "VNx8SF")
+ (VNx16DF "VNx16SF")
])
(define_mode_attr V_QUAD_TRUNC [
])
(define_mode_attr V_OCT_TRUNC [
- (VNx1DI "VNx1QI") (VNx2DI "VNx2QI") (VNx4DI "VNx4QI") (VNx8DI "VNx8QI") (VNx16DI "VNx16QI")
+ (VNx1DI "VNx1QI") (VNx2DI "VNx2QI") (VNx4DI "VNx4QI") (VNx8DI "VNx8QI")
+ (VNx16DI "VNx16QI")
+])
+
+; Again in lower case.
+(define_mode_attr v_double_trunc [
+ (VNx1HI "vnx1qi") (VNx2HI "vnx2qi") (VNx4HI "vnx4qi") (VNx8HI "vnx8qi")
+ (VNx16HI "vnx16qi") (VNx32HI "vnx32qi") (VNx64HI "vnx64qi")
+ (VNx1SI "vnx1hi") (VNx2SI "vnx2hi") (VNx4SI "vnx4hi") (VNx8SI "vnx8hi")
+ (VNx16SI "vnx16hi") (VNx32SI "vnx32hi")
+ (VNx1DI "vnx1si") (VNx2DI "vnx2si") (VNx4DI "vnx4si") (VNx8DI "vnx8si")
+ (VNx16DI "vnx16si")
+ (VNx1DF "vnx1sf") (VNx2DF "vnx2sf") (VNx4DF "vnx4sf") (VNx8DF "vnx8sf")
+ (VNx16DF "vnx16sf")
+])
+
+(define_mode_attr v_quad_trunc [
+ (VNx1SI "vnx1qi") (VNx2SI "vnx2qi") (VNx4SI "vnx4qi") (VNx8SI "vnx8qi")
+ (VNx16SI "vnx16qi") (VNx32SI "vnx32qi")
+ (VNx1DI "vnx1hi") (VNx2DI "vnx2hi") (VNx4DI "vnx4hi") (VNx8DI "vnx8hi")
+ (VNx16DI "vnx16hi")
+])
+
+(define_mode_attr v_oct_trunc [
+ (VNx1DI "vnx1qi") (VNx2DI "vnx2qi") (VNx4DI "vnx4qi") (VNx8DI "vnx8qi")
+ (VNx16DI "vnx16qi")
])
(define_mode_attr VINDEX_DOUBLE_TRUNC [
/* { dg-final { scan-assembler {\tvsll\.vv} } } */
/* { dg-final { scan-assembler {\tvsrl\.vv} } } */
/* { dg-final { scan-assembler {\tvsra\.vv} } } */
-
#include "shift-template.h"
-/* TODO: For int16_t and uint16_t we need widening/promotion patterns.
- Therefore, expect only 4 vsll.vv instead of 6 for now. */
-
-/* { dg-final { scan-assembler-times {\tvsll\.vv} 4 } } */
+/* { dg-final { scan-assembler-times {\tvsll\.vv} 6 } } */
/* { dg-final { scan-assembler-times {\tvsrl\.vv} 3 } } */
/* { dg-final { scan-assembler-times {\tvsra\.vv} 3 } } */
a##TYPE[i] = VAL * 3; \
b##TYPE[i] = VAL; \
} \
- vadd_##TYPE (a##TYPE, a##TYPE, b##TYPE, SZ); \
+ vdiv_##TYPE (a##TYPE, a##TYPE, b##TYPE, SZ); \
for (int i = 0; i < SZ; i++) \
assert (a##TYPE[i] == 3);
TYPE as##TYPE[SZ]; \
for (int i = 0; i < SZ; i++) \
as##TYPE[i] = VAL * 5; \
- vadds_##TYPE (as##TYPE, as##TYPE, VAL, SZ); \
+ vdivs_##TYPE (as##TYPE, as##TYPE, VAL, SZ); \
for (int i = 0; i < SZ; i++) \
assert (as##TYPE[i] == 5);
#include "vdiv-template.h"
-/* TODO: Implement vector type promotion. We should have 6 vdiv.vv here. */
+/* Currently we use an epilogue loop which also contains vdivs. Therefore we
+ expect 10 vdiv[u]s instead of 6. */
-/* { dg-final { scan-assembler-times {\tvdiv\.vv} 4 } } */
-/* { dg-final { scan-assembler-times {\tvdivu\.vv} 6 } } */
+/* { dg-final { scan-assembler-times {\tvdiv\.vv} 10 } } */
+/* { dg-final { scan-assembler-times {\tvdivu\.vv} 10 } } */
#include "vdiv-template.h"
-/* TODO: Implement vector type promotion. We should have 6 vdiv.vv here. */
+/* Currently we use an epilogue loop which also contains vdivs. Therefore we
+ expect 10 vdiv[u]s instead of 6. */
-/* { dg-final { scan-assembler-times {\tvdiv\.vv} 4 } } */
-/* { dg-final { scan-assembler-times {\tvdivu\.vv} 6 } } */
+/* { dg-final { scan-assembler-times {\tvdiv\.vv} 10 } } */
+/* { dg-final { scan-assembler-times {\tvdivu\.vv} 10 } } */
#define TEST_TYPE(TYPE) \
__attribute__((noipa)) \
- void vadd_##TYPE (TYPE *dst, TYPE *a, TYPE *b, int n) \
+ void vdiv_##TYPE (TYPE *dst, TYPE *a, TYPE *b, int n) \
{ \
for (int i = 0; i < n; i++) \
dst[i] = a[i] / b[i]; \
#define TEST2_TYPE(TYPE) \
__attribute__((noipa)) \
- void vadds_##TYPE (TYPE *dst, TYPE *a, TYPE b, int n) \
+ void vdivs_##TYPE (TYPE *dst, TYPE *a, TYPE b, int n) \
{ \
for (int i = 0; i < n; i++) \
dst[i] = a[i] / b; \
}
-/* *int8_t not autovec currently. */
#define TEST_ALL() \
TEST_TYPE(int16_t) \
TEST_TYPE(uint16_t) \
#include "vrem-template.h"
-/* TODO: Implement vector type promotion. We should have 6 vrem.vv here. */
+/* Currently we use an epilogue loop which also contains vrems. Therefore we
+ expect 10 vrem[u]s instead of 6. */
-/* { dg-final { scan-assembler-times {\tvrem\.vv} 5 } } */
-/* { dg-final { scan-assembler-times {\tvremu\.vv} 6 } } */
+/* { dg-final { scan-assembler-times {\tvrem\.vv} 10 } } */
+/* { dg-final { scan-assembler-times {\tvremu\.vv} 10 } } */
#include "vrem-template.h"
-/* TODO: Implement vector type promotion. We should have 6 vrem.vv here. */
+/* Currently we use an epilogue loop which also contains vrems. Therefore we
+ expect 10 vrem[u]s instead of 6. */
-/* { dg-final { scan-assembler-times {\tvrem\.vv} 5 } } */
-/* { dg-final { scan-assembler-times {\tvremu\.vv} 6 } } */
+/* { dg-final { scan-assembler-times {\tvrem\.vv} 10 } } */
+/* { dg-final { scan-assembler-times {\tvremu\.vv} 10 } } */
--- /dev/null
+/* { dg-do run { target { riscv_vector } } } */
+/* { dg-additional-options "-std=c99 -fno-vect-cost-model --param=riscv-autovec-preference=fixed-vlmax" } */
+
+#include "vncvt-template.h"
+
+#include <assert.h>
+
+#define SZ 256
+
+#define RUN(TYPE1,TYPE2) \
+ TYPE1 src##TYPE1##TYPE2[SZ]; \
+ TYPE2 dst##TYPE1##TYPE2[SZ]; \
+ for (int i = 0; i < SZ; i++) \
+ { \
+ src##TYPE1##TYPE2[i] = i; \
+ dst##TYPE1##TYPE2[i] = -1; \
+ } \
+ vncvt_##TYPE1##TYPE2 (dst##TYPE1##TYPE2, \
+ src##TYPE1##TYPE2, SZ); \
+ for (int i = 0; i < SZ; i++) \
+ assert (dst##TYPE1##TYPE2[i] == i);
+
+
+#define RUN_ALL() \
+ RUN(uint16_t, uint8_t) \
+ RUN(uint32_t, uint8_t) \
+ RUN(uint64_t, uint8_t) \
+ RUN(uint32_t, uint16_t) \
+ RUN(uint64_t, uint16_t) \
+ RUN(uint64_t, uint32_t) \
+
+int main ()
+{
+ RUN_ALL()
+}
--- /dev/null
+/* { dg-do compile } */
+/* { dg-additional-options "-std=c99 -fno-vect-cost-model -march=rv32gcv -mabi=ilp32d --param=riscv-autovec-preference=fixed-vlmax" } */
+
+#include "vncvt-template.h"
+
+/* { dg-final { scan-assembler-times {\tvncvt.x.x.w} 10 } } */
--- /dev/null
+/* { dg-do compile } */
+/* { dg-additional-options "-std=c99 -fno-vect-cost-model -march=rv64gcv -mabi=lp64d --param=riscv-autovec-preference=fixed-vlmax" } */
+
+#include "vncvt-template.h"
+
+/* { dg-final { scan-assembler-times {\tvncvt.x.x.w} 10 } } */
--- /dev/null
+#include <stdint-gcc.h>
+
+#define TEST(TYPE1, TYPE2) \
+ __attribute__((noipa)) \
+ void vncvt_##TYPE1##TYPE2 (TYPE2 *dst, TYPE1 *a, int n) \
+ { \
+ for (int i = 0; i < n; i++) \
+ dst[i] = (TYPE1)a[i]; \
+ }
+
+#define TEST_ALL() \
+ TEST(uint16_t, uint8_t) \
+ TEST(uint32_t, uint8_t) \
+ TEST(uint32_t, uint16_t) \
+ TEST(uint64_t, uint8_t) \
+ TEST(uint64_t, uint16_t) \
+ TEST(uint64_t, uint32_t) \
+
+TEST_ALL()
--- /dev/null
+/* { dg-do run { target { riscv_vector } } } */
+/* { dg-additional-options "-std=c99 -fno-vect-cost-model --param=riscv-autovec-preference=fixed-vlmax" } */
+
+#include "vsext-template.h"
+
+#include <assert.h>
+
+#define SZ 256
+
+#define RUN(TYPE1,TYPE2) \
+ TYPE1 src##TYPE1##TYPE2[SZ]; \
+ TYPE2 dst##TYPE1##TYPE2[SZ]; \
+ for (int i = 0; i < SZ; i++) \
+ { \
+ src##TYPE1##TYPE2[i] = i - 128; \
+ dst##TYPE1##TYPE2[i] = 0; \
+ } \
+ vsext_##TYPE1##TYPE2 (dst##TYPE1##TYPE2, \
+ src##TYPE1##TYPE2, SZ); \
+ for (int i = 0; i < SZ; i++) \
+ assert (dst##TYPE1##TYPE2[i] == i - 128);
+
+
+#define RUN_ALL() \
+ RUN(int8_t, int16_t) \
+ RUN(int8_t, int32_t) \
+ RUN(int8_t, int64_t) \
+ RUN(int16_t, int32_t) \
+ RUN(int16_t, int64_t) \
+ RUN(int32_t, int64_t) \
+
+int main ()
+{
+ RUN_ALL()
+}
--- /dev/null
+/* { dg-do compile } */
+/* { dg-additional-options "-std=c99 -fno-vect-cost-model -march=rv32gcv -mabi=ilp32d --param=riscv-autovec-preference=fixed-vlmax" } */
+
+#include "vsext-template.h"
+
+/* { dg-final { scan-assembler-times {\tvsext\.vf2} 3 } } */
+/* { dg-final { scan-assembler-times {\tvsext\.vf4} 2 } } */
+/* { dg-final { scan-assembler-times {\tvsext\.vf8} 1 } } */
--- /dev/null
+/* { dg-do compile } */
+/* { dg-additional-options "-std=c99 -fno-vect-cost-model -march=rv64gcv -mabi=lp64d --param=riscv-autovec-preference=fixed-vlmax" } */
+
+#include "vsext-template.h"
+
+/* { dg-final { scan-assembler-times {\tvsext\.vf2} 3 } } */
+/* { dg-final { scan-assembler-times {\tvsext\.vf4} 2 } } */
+/* { dg-final { scan-assembler-times {\tvsext\.vf8} 1 } } */
--- /dev/null
+#include <stdint-gcc.h>
+
+#define TEST(TYPE1, TYPE2) \
+ __attribute__((noipa)) \
+ void vsext_##TYPE1##TYPE2 (TYPE2 *dst, TYPE1 *a, int n) \
+ { \
+ for (int i = 0; i < n; i++) \
+ dst[i] = (TYPE1)a[i]; \
+ }
+
+#define TEST_ALL() \
+ TEST(int8_t, int16_t) \
+ TEST(int8_t, int32_t) \
+ TEST(int8_t, int64_t) \
+ TEST(int16_t, int32_t) \
+ TEST(int16_t, int64_t) \
+ TEST(int32_t, int64_t) \
+
+TEST_ALL()
--- /dev/null
+/* { dg-do run { target { riscv_vector } } } */
+/* { dg-additional-options "-std=c99 -fno-vect-cost-model --param=riscv-autovec-preference=fixed-vlmax" } */
+
+#include "vzext-template.h"
+
+#include <assert.h>
+
+#define SZ 256
+
+#define RUN(TYPE1,TYPE2) \
+ TYPE1 src##TYPE1##TYPE2[SZ]; \
+ TYPE2 dst##TYPE1##TYPE2[SZ]; \
+ for (int i = 0; i < SZ; i++) \
+ { \
+ src##TYPE1##TYPE2[i] = i; \
+ dst##TYPE1##TYPE2[i] = -1; \
+ } \
+ vzext_##TYPE1##TYPE2 (dst##TYPE1##TYPE2, \
+ src##TYPE1##TYPE2, SZ); \
+ for (int i = 0; i < SZ; i++) \
+ assert (dst##TYPE1##TYPE2[i] == i);
+
+
+#define RUN_ALL() \
+ RUN(uint8_t, uint16_t) \
+ RUN(uint8_t, uint32_t) \
+ RUN(uint8_t, uint64_t) \
+ RUN(uint16_t, uint32_t) \
+ RUN(uint16_t, uint64_t) \
+ RUN(uint32_t, uint64_t) \
+
+int main ()
+{
+ RUN_ALL()
+}
--- /dev/null
+/* { dg-do compile } */
+/* { dg-additional-options "-std=c99 -fno-vect-cost-model -march=rv32gcv -mabi=ilp32d --param=riscv-autovec-preference=fixed-vlmax" } */
+
+#include "vzext-template.h"
+
+/* { dg-final { scan-assembler-times {\tvzext\.vf2} 3 } } */
+/* { dg-final { scan-assembler-times {\tvzext\.vf4} 2 } } */
+/* { dg-final { scan-assembler-times {\tvzext\.vf8} 1 } } */
--- /dev/null
+/* { dg-do compile } */
+/* { dg-additional-options "-std=c99 -fno-vect-cost-model -march=rv64gcv -mabi=lp64d --param=riscv-autovec-preference=fixed-vlmax" } */
+
+#include "vzext-template.h"
+
+/* { dg-final { scan-assembler-times {\tvzext\.vf2} 3 } } */
+/* { dg-final { scan-assembler-times {\tvzext\.vf4} 2 } } */
+/* { dg-final { scan-assembler-times {\tvzext\.vf8} 1 } } */
--- /dev/null
+#include <stdint-gcc.h>
+
+#define TEST(TYPE1, TYPE2) \
+ __attribute__((noipa)) \
+ void vzext_##TYPE1##TYPE2 (TYPE2 *dst, TYPE1 *a, int n) \
+ { \
+ for (int i = 0; i < n; i++) \
+ dst[i] = (TYPE1)a[i]; \
+ }
+
+#define TEST_ALL() \
+ TEST(uint8_t, uint16_t) \
+ TEST(uint8_t, uint32_t) \
+ TEST(uint8_t, uint64_t) \
+ TEST(uint16_t, uint32_t) \
+ TEST(uint16_t, uint64_t) \
+ TEST(uint32_t, uint64_t) \
+
+TEST_ALL()
#include "template-1.h"
-/* { dg-final { scan-tree-dump-times "vectorized 1 loops in function" 4 "vect" } } */
+/* { dg-final { scan-tree-dump-times "vectorized 1 loops in function" 5 "vect" } } */
#include "template-1.h"
-/* { dg-final { scan-tree-dump-times "vectorized 1 loops in function" 2 "vect" } } */
+/* { dg-final { scan-tree-dump-times "vectorized 1 loops in function" 3 "vect" } } */
#include "template-1.h"
-/* { dg-final { scan-tree-dump-times "vectorized 1 loops in function" 3 "vect" } } */
+/* { dg-final { scan-tree-dump-times "vectorized 1 loops in function" 6 "vect" } } */
#include "template-1.h"
-/* { dg-final { scan-tree-dump-times "vectorized 1 loops in function" 3 "vect" } } */
+/* { dg-final { scan-tree-dump-times "vectorized 1 loops in function" 5 "vect" } } */
#include "template-1.h"
-/* { dg-final { scan-tree-dump-times "vectorized 1 loops in function" 2 "vect" } } */
+/* { dg-final { scan-tree-dump-times "vectorized 1 loops in function" 3 "vect" } } */
"" "$op"
dg-runtest [lsort [glob -nocomplain $srcdir/$subdir/autovec/cmp/*.\[cS\]]] \
"" "$op"
+ dg-runtest [lsort [glob -nocomplain $srcdir/$subdir/autovec/conversions/*.\[cS\]]] \
+ "" "$op"
}
# VLS-VLMAX tests
/* { dg-do compile } */
-/* { dg-options "-march=rv32gcv -mabi=ilp32 -fno-schedule-insns -fno-schedule-insns2" } */
+/* { dg-options "-march=rv32gcv -mabi=ilp32 -fno-schedule-insns -fno-schedule-insns2 -fno-tree-vectorize" } */
#include "riscv_vector.h"
/* { dg-do compile } */
-/* { dg-options "-march=rv32gcv -mabi=ilp32 -fno-schedule-insns -fno-schedule-insns2" } */
+/* { dg-options "-march=rv32gcv -mabi=ilp32 -fno-schedule-insns -fno-schedule-insns2 -fno-tree-vectorize" } */
#include "riscv_vector.h"
/* { dg-do compile } */
-/* { dg-options "-march=rv32gcv -mabi=ilp32 -fno-schedule-insns -fno-schedule-insns2" } */
+/* { dg-options "-march=rv32gcv -mabi=ilp32 -fno-schedule-insns -fno-schedule-insns2 -fno-tree-vectorize" } */
#include "riscv_vector.h"
/* { dg-do compile } */
-/* { dg-options "-march=rv32gcv -mabi=ilp32 -fno-schedule-insns -fno-schedule-insns2" } */
+/* { dg-options "-march=rv32gcv -mabi=ilp32 -fno-schedule-insns -fno-schedule-insns2 -fno-tree-vectorize" } */
#include "riscv_vector.h"
/* { dg-do compile } */
-/* { dg-options "-march=rv32gcv -mabi=ilp32 -fno-schedule-insns -fno-schedule-insns2" } */
+/* { dg-options "-march=rv32gcv -mabi=ilp32 -fno-schedule-insns -fno-schedule-insns2 -fno-tree-vectorize" } */
#include "riscv_vector.h"