DONE;
})
+;; -------------------------------------------------------------------------
+;; ---- [INT,FP] Extract a vector from a vector.
+;; -------------------------------------------------------------------------
+;; TODO: This can be extended to allow basically any extract mode.
+;; For now this helps optimize VLS subregs like (subreg:V2DI (reg:V4DI) 16)
+;; that would otherwise need to go via memory.
+
+(define_expand "vec_extract<mode><vls_half>"
+ [(set (match_operand:<VLS_HALF> 0 "nonimmediate_operand")
+ (vec_select:<VLS_HALF>
+ (match_operand:VLS_HAS_HALF 1 "register_operand")
+ (parallel
+ [(match_operand 2 "immediate_operand")])))]
+ "TARGET_VECTOR"
+{
+ int sz = GET_MODE_NUNITS (<VLS_HALF>mode).to_constant ();
+ int part = INTVAL (operands[2]);
+
+ rtx start = GEN_INT (part * sz);
+ rtx tmp = operands[1];
+
+ if (part != 0)
+ {
+ tmp = gen_reg_rtx (<MODE>mode);
+
+ rtx ops[] = {tmp, operands[1], start};
+ riscv_vector::emit_vlmax_insn
+ (code_for_pred_slide (UNSPEC_VSLIDEDOWN, <MODE>mode),
+ riscv_vector::BINARY_OP, ops);
+ }
+
+ emit_move_insn (operands[0], gen_lowpart (<VLS_HALF>mode, tmp));
+ DONE;
+})
+
;; -------------------------------------------------------------------------
;; ---- [FP] Binary operations
;; -------------------------------------------------------------------------
if (reg_classes_intersect_p (V_REGS, rclass)
&& !ordered_p (GET_MODE_PRECISION (from), GET_MODE_PRECISION (to)))
return false;
+
+ /* Subregs of modes larger than one vector are ambiguous.
+ A V4DImode with rv64gcv_zvl128b could, for example, span two registers/one
+ register group of two at VLEN = 128 or one register at VLEN >= 256 and
+ we cannot, statically, determine which part of it to extract.
+ Therefore prevent that. */
+ if (reg_classes_intersect_p (V_REGS, rclass)
+ && riscv_v_ext_vls_mode_p (from)
+ && !ordered_p (BITS_PER_RISCV_VECTOR, GET_MODE_PRECISION (from)))
+ return false;
+
return !reg_classes_intersect_p (FP_REGS, rclass);
}
(define_mode_attr VSIX16 [
(RVVMF2SI "RVVM8SI")
])
+
+(define_mode_iterator VLS_HAS_HALF [
+ (V2QI "riscv_vector::vls_mode_valid_p (V2QImode)")
+ (V4QI "riscv_vector::vls_mode_valid_p (V4QImode)")
+ (V8QI "riscv_vector::vls_mode_valid_p (V8QImode)")
+ (V16QI "riscv_vector::vls_mode_valid_p (V16QImode)")
+ (V2HI "riscv_vector::vls_mode_valid_p (V2HImode)")
+ (V4HI "riscv_vector::vls_mode_valid_p (V4HImode)")
+ (V8HI "riscv_vector::vls_mode_valid_p (V8HImode)")
+ (V16HI "riscv_vector::vls_mode_valid_p (V16HImode)")
+ (V2SI "riscv_vector::vls_mode_valid_p (V2SImode)")
+ (V4SI "riscv_vector::vls_mode_valid_p (V4SImode)")
+ (V8SI "riscv_vector::vls_mode_valid_p (V8SImode)")
+ (V16SI "riscv_vector::vls_mode_valid_p (V16SImode) && TARGET_MIN_VLEN >= 64")
+ (V2DI "riscv_vector::vls_mode_valid_p (V2DImode) && TARGET_VECTOR_ELEN_64")
+ (V4DI "riscv_vector::vls_mode_valid_p (V4DImode) && TARGET_VECTOR_ELEN_64")
+ (V8DI "riscv_vector::vls_mode_valid_p (V8DImode) && TARGET_VECTOR_ELEN_64 && TARGET_MIN_VLEN >= 64")
+ (V16DI "riscv_vector::vls_mode_valid_p (V16DImode) && TARGET_VECTOR_ELEN_64 && TARGET_MIN_VLEN >= 128")
+ (V2SF "riscv_vector::vls_mode_valid_p (V2SFmode) && TARGET_VECTOR_ELEN_FP_32")
+ (V4SF "riscv_vector::vls_mode_valid_p (V4SFmode) && TARGET_VECTOR_ELEN_FP_32")
+ (V8SF "riscv_vector::vls_mode_valid_p (V8SFmode) && TARGET_VECTOR_ELEN_FP_32")
+ (V16SF "riscv_vector::vls_mode_valid_p (V16SFmode) && TARGET_VECTOR_ELEN_FP_32 && TARGET_MIN_VLEN >= 64")
+ (V2DF "riscv_vector::vls_mode_valid_p (V2DFmode) && TARGET_VECTOR_ELEN_FP_64")
+ (V4DF "riscv_vector::vls_mode_valid_p (V4DFmode) && TARGET_VECTOR_ELEN_FP_64")
+ (V8DF "riscv_vector::vls_mode_valid_p (V8DFmode) && TARGET_VECTOR_ELEN_FP_64 && TARGET_MIN_VLEN >= 64")
+ (V16DF "riscv_vector::vls_mode_valid_p (V16DFmode) && TARGET_VECTOR_ELEN_FP_64 && TARGET_MIN_VLEN >= 128")
+ (V32QI "riscv_vector::vls_mode_valid_p (V32QImode)")
+ (V64QI "riscv_vector::vls_mode_valid_p (V64QImode) && TARGET_MIN_VLEN >= 64")
+ (V128QI "riscv_vector::vls_mode_valid_p (V128QImode) && TARGET_MIN_VLEN >= 128")
+ (V256QI "riscv_vector::vls_mode_valid_p (V256QImode) && TARGET_MIN_VLEN >= 256")
+ (V512QI "riscv_vector::vls_mode_valid_p (V512QImode) && TARGET_MIN_VLEN >= 512")
+ (V1024QI "riscv_vector::vls_mode_valid_p (V1024QImode) && TARGET_MIN_VLEN >= 1024")
+ (V2048QI "riscv_vector::vls_mode_valid_p (V2048QImode) && TARGET_MIN_VLEN >= 2048")
+ (V4096QI "riscv_vector::vls_mode_valid_p (V4096QImode) && TARGET_MIN_VLEN >= 4096")
+ (V32HI "riscv_vector::vls_mode_valid_p (V32HImode) && TARGET_MIN_VLEN >= 64")
+ (V64HI "riscv_vector::vls_mode_valid_p (V64HImode) && TARGET_MIN_VLEN >= 128")
+ (V128HI "riscv_vector::vls_mode_valid_p (V128HImode) && TARGET_MIN_VLEN >= 256")
+ (V256HI "riscv_vector::vls_mode_valid_p (V256HImode) && TARGET_MIN_VLEN >= 512")
+ (V512HI "riscv_vector::vls_mode_valid_p (V512HImode) && TARGET_MIN_VLEN >= 1024")
+ (V1024HI "riscv_vector::vls_mode_valid_p (V1024HImode) && TARGET_MIN_VLEN >= 2048")
+ (V2048HI "riscv_vector::vls_mode_valid_p (V2048HImode) && TARGET_MIN_VLEN >= 4096")
+ (V32SI "riscv_vector::vls_mode_valid_p (V32SImode) && TARGET_MIN_VLEN >= 128")
+ (V64SI "riscv_vector::vls_mode_valid_p (V64SImode) && TARGET_MIN_VLEN >= 256")
+ (V128SI "riscv_vector::vls_mode_valid_p (V128SImode) && TARGET_MIN_VLEN >= 512")
+ (V256SI "riscv_vector::vls_mode_valid_p (V256SImode) && TARGET_MIN_VLEN >= 1024")
+ (V512SI "riscv_vector::vls_mode_valid_p (V512SImode) && TARGET_MIN_VLEN >= 2048")
+ (V1024SI "riscv_vector::vls_mode_valid_p (V1024SImode) && TARGET_MIN_VLEN >= 4096")
+ (V32DI "riscv_vector::vls_mode_valid_p (V32DImode) && TARGET_VECTOR_ELEN_64 && TARGET_MIN_VLEN >= 256")
+ (V64DI "riscv_vector::vls_mode_valid_p (V64DImode) && TARGET_VECTOR_ELEN_64 && TARGET_MIN_VLEN >= 512")
+ (V128DI "riscv_vector::vls_mode_valid_p (V128DImode) && TARGET_VECTOR_ELEN_64 && TARGET_MIN_VLEN >= 1024")
+ (V256DI "riscv_vector::vls_mode_valid_p (V256DImode) && TARGET_VECTOR_ELEN_64 && TARGET_MIN_VLEN >= 2048")
+ (V512DI "riscv_vector::vls_mode_valid_p (V512DImode) && TARGET_VECTOR_ELEN_64 && TARGET_MIN_VLEN >= 4096")
+ (V32SF "riscv_vector::vls_mode_valid_p (V32SFmode) && TARGET_VECTOR_ELEN_FP_32 && TARGET_MIN_VLEN >= 128")
+ (V64SF "riscv_vector::vls_mode_valid_p (V64SFmode) && TARGET_VECTOR_ELEN_FP_32 && TARGET_MIN_VLEN >= 256")
+ (V128SF "riscv_vector::vls_mode_valid_p (V128SFmode) && TARGET_VECTOR_ELEN_FP_32 && TARGET_MIN_VLEN >= 512")
+ (V256SF "riscv_vector::vls_mode_valid_p (V256SFmode) && TARGET_VECTOR_ELEN_FP_32 && TARGET_MIN_VLEN >= 1024")
+ (V512SF "riscv_vector::vls_mode_valid_p (V512SFmode) && TARGET_VECTOR_ELEN_FP_32 && TARGET_MIN_VLEN >= 2048")
+ (V1024SF "riscv_vector::vls_mode_valid_p (V1024SFmode) && TARGET_VECTOR_ELEN_FP_32 && TARGET_MIN_VLEN >= 4096")
+ (V32DF "riscv_vector::vls_mode_valid_p (V32DFmode) && TARGET_VECTOR_ELEN_FP_64 && TARGET_MIN_VLEN >= 256")
+ (V64DF "riscv_vector::vls_mode_valid_p (V64DFmode) && TARGET_VECTOR_ELEN_FP_64 && TARGET_MIN_VLEN >= 512")
+ (V128DF "riscv_vector::vls_mode_valid_p (V128DFmode) && TARGET_VECTOR_ELEN_FP_64 && TARGET_MIN_VLEN >= 1024")
+ (V256DF "riscv_vector::vls_mode_valid_p (V256DFmode) && TARGET_VECTOR_ELEN_FP_64 && TARGET_MIN_VLEN >= 2048")
+ (V512DF "riscv_vector::vls_mode_valid_p (V512DFmode) && TARGET_VECTOR_ELEN_FP_64 && TARGET_MIN_VLEN >= 4096")
+])
+
+(define_mode_attr VLS_HALF [
+ (V2QI "V1QI")
+ (V4QI "V2QI")
+ (V8QI "V4QI")
+ (V16QI "V8QI")
+ (V32QI "V16QI")
+ (V64QI "V32QI")
+ (V128QI "V64QI")
+ (V256QI "V128QI")
+ (V512QI "V256QI")
+ (V1024QI "V512QI")
+ (V2048QI "V1024QI")
+ (V4096QI "V2048QI")
+
+ (V2HI "V1HI")
+ (V4HI "V2HI")
+ (V8HI "V4HI")
+ (V16HI "V8HI")
+ (V32HI "V16HI")
+ (V64HI "V32HI")
+ (V128HI "V64HI")
+ (V256HI "V128HI")
+ (V512HI "V256HI")
+ (V1024HI "V512HI")
+ (V2048HI "V1024HI")
+
+ (V2SI "V1SI")
+ (V4SI "V2SI")
+ (V8SI "V4SI")
+ (V16SI "V8SI")
+ (V32SI "V16SI")
+ (V64SI "V32SI")
+ (V128SI "V64SI")
+ (V256SI "V128SI")
+ (V512SI "V256SI")
+ (V1024SI "V512SI")
+
+ (V2DI "V1DI")
+ (V4DI "V2DI")
+ (V8DI "V4DI")
+ (V16DI "V8DI")
+ (V32DI "V16DI")
+ (V64DI "V32DI")
+ (V128DI "V64DI")
+ (V256DI "V128DI")
+ (V512DI "V256DI")
+
+ (V2SF "V1SF")
+ (V4SF "V2SF")
+ (V8SF "V4SF")
+ (V16SF "V8SF")
+ (V32SF "V16SF")
+ (V64SF "V32SF")
+ (V128SF "V64SF")
+ (V256SF "V128SF")
+ (V512SF "V256SF")
+ (V1024SF "V512SF")
+
+ (V2DF "V1DF")
+ (V4DF "V2DF")
+ (V8DF "V4DF")
+ (V16DF "V8DF")
+ (V32DF "V16DF")
+ (V64DF "V32DF")
+ (V128DF "V64DF")
+ (V256DF "V128DF")
+ (V512DF "V256DF")
+])
+
+(define_mode_attr vls_half [
+ (V2QI "v1qi")
+ (V4QI "v2qi")
+ (V8QI "v4qi")
+ (V16QI "v8qi")
+ (V32QI "v16qi")
+ (V64QI "v32qi")
+ (V128QI "v64qi")
+ (V256QI "v128qi")
+ (V512QI "v256qi")
+ (V1024QI "v512qi")
+ (V2048QI "v1024qi")
+ (V4096QI "v2048qi")
+
+ (V2HI "v1hi")
+ (V4HI "v2hi")
+ (V8HI "v4hi")
+ (V16HI "v8hi")
+ (V32HI "v16hi")
+ (V64HI "v32hi")
+ (V128HI "v64hi")
+ (V256HI "v128hi")
+ (V512HI "v256hi")
+ (V1024HI "v512hi")
+ (V2048HI "v1024hi")
+
+ (V2SI "v1si")
+ (V4SI "v2si")
+ (V8SI "v4si")
+ (V16SI "v8si")
+ (V32SI "v16si")
+ (V64SI "v32si")
+ (V128SI "v64si")
+ (V256SI "v128si")
+ (V512SI "v256si")
+ (V1024SI "v512si")
+
+ (V2DI "v1di")
+ (V4DI "v2di")
+ (V8DI "v4di")
+ (V16DI "v8di")
+ (V32DI "v16di")
+ (V64DI "v32di")
+ (V128DI "v64di")
+ (V256DI "v128di")
+ (V512DI "v256di")
+
+ (V2SF "v1sf")
+ (V4SF "v2sf")
+ (V8SF "v4sf")
+ (V16SF "v8sf")
+ (V32SF "v16sf")
+ (V64SF "v32sf")
+ (V128SF "v64sf")
+ (V256SF "v128sf")
+ (V512SF "v256sf")
+ (V1024SF "v512sf")
+
+ (V2DF "v1df")
+ (V4DF "v2df")
+ (V8DF "v4df")
+ (V16DF "v8df")
+ (V32DF "v16df")
+ (V64DF "v32df")
+ (V128DF "v64df")
+ (V256DF "v128df")
+ (V512DF "v256df")
+])
--- /dev/null
+/* { dg-do run } */
+/* { dg-require-effective-target riscv_v } */
+/* { dg-require-effective-target rvv_zvl256b_ok } */
+/* { dg-options "-march=rv64gcv -mabi=lp64d -mrvv-max-lmul=m2" } */
+
+#include "pr116086-2.c"
--- /dev/null
+/* { dg-do compile } */
+/* { dg-options "-march=rv64gcv -mabi=lp64d -mrvv-max-lmul=m2" } */
+
+long a;
+long b;
+long c[80];
+int main() {
+ for (int d = 0; d < 16; d++)
+ c[d] = a;
+ for (int d = 16; d < 80; d++)
+ c[d] = c[d - 2];
+ for (int d = 0; d < 80; d += 8)
+ b += c[d];
+ if (b != 0)
+ __builtin_abort ();
+}
+
+/* { dg-final { scan-assembler-times "vmv1r" 0 } } */
--- /dev/null
+/* { dg-do run } */
+/* { dg-require-effective-target riscv_v } */
+/* { dg-require-effective-target rvv_zvl256b_ok } */
+/* { dg-options "-march=rv64gcv -mabi=lp64d -mrvv-max-lmul=m2" } */
+
+typedef unsigned int uint32_t;
+typedef unsigned long long uint64_t;
+
+typedef struct
+{
+ uint64_t length;
+ uint64_t state[8];
+ uint32_t curlen;
+ unsigned char buf[128];
+} sha512_state;
+
+static uint64_t load64(const unsigned char* y)
+{
+ uint64_t res = 0;
+ for(int i = 0; i != 8; ++i)
+ res |= (uint64_t)(y[i]) << ((7-i) * 8);
+ return res;
+}
+
+static const uint64_t K[80] =
+{ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 };
+
+__attribute__ ((noipa))
+static void sha_compress(sha512_state *md, const unsigned char *buf)
+{
+ uint64_t S[8], W[80];
+
+ for(int i = 0; i < 8; i++)
+ S[i] = 0;
+
+ // Copy the state into 1024-bits into W[0..15]
+ for(int i = 0; i < 16; i++)
+ W[i] = load64(buf + (8*i));
+
+ // Fill W[16..79]
+ for(int i = 16; i < 80; i++)
+ W[i] = W[i - 2] + W[i - 7] + W[i - 15] + W[i - 16];
+
+ S[7] = W[72];
+
+ // Feedback
+ for(int i = 0; i < 8; i++)
+ md->state[i] = md->state[i] + S[i];
+}
+
+int main ()
+{
+ sha512_state md;
+ md.curlen = 0;
+ md.length = 0;
+ md.state[0] = 0;
+ md.state[1] = 0;
+ md.state[2] = 0;
+ md.state[3] = 0;
+ md.state[4] = 0;
+ md.state[5] = 0;
+ md.state[6] = 0;
+ md.state[7] = 0;
+
+ for (int i = 0; i < 128; i++)
+ md.buf[i] = 0;
+
+ md.buf[md.curlen++] = (unsigned char)0x80;
+
+ sha_compress (&md, md.buf);
+
+ if (md.state[7] != 0x8000000000000000ULL)
+ __builtin_abort ();
+
+ return 0;
+}
}]
}
+# Return 1 if the target runtime supports 256-bit vectors, 0 otherwise.
+# Cache the result.
+
+proc check_effective_target_rvv_zvl256b_ok { } {
+ # Check if the target has a VLENB of 32.
+ set gcc_march [regsub {[[:alnum:]]*} [riscv_get_arch] &v]
+ return [check_runtime ${gcc_march}_exec {
+ int main()
+ {
+ int vlenb = 0;
+ asm ("csrr %0,vlenb" : "=r" (vlenb) : : );
+ if (vlenb == 32)
+ return 1;
+ return 0;
+ }
+ } "-march=${gcc_march}"]
+}
+
+# Return 1 if the target runtime supports 512-bit vectors, 0 otherwise.
+# Cache the result.
+
+proc check_effective_target_rvv_zvl512b_ok { } {
+ # Check if the target has a VLENB of 64.
+ set gcc_march [regsub {[[:alnum:]]*} [riscv_get_arch] &v]
+ return [check_runtime ${gcc_march}_exec {
+ int main()
+ {
+ int vlenb = 0;
+ asm ("csrr %0,vlenb" : "=r" (vlenb) : : );
+ if (vlenb == 64)
+ return 1;
+ return 0;
+ }
+ } "-march=${gcc_march}"]
+}
+
+
# Return 1 if the target arch supports the Zvfh extension, 0 otherwise.
# Cache the result.