"A signed 34-bit integer constant if prefixed instructions are supported."
(match_operand 0 "cint34_operand"))
+;; A SF/DF scalar constant or a vector constant that can be loaded into vector
+;; registers with one prefixed instruction such as XXSPLTIDP or XXSPLTIW.
+(define_constraint "eP"
+ "A constant that can be loaded into a VSX register with one prefixed insn."
+ (match_operand 0 "vsx_prefixed_constant"))
+
;; A TF/KF scalar constant or a vector constant that can load certain IEEE
;; 128-bit constants into vector registers using LXVKQ.
(define_constraint "eQ"
vec_const_128bit_type vsx_const;
if (TARGET_POWER10 && vec_const_128bit_to_bytes (op, mode, &vsx_const))
{
- if (constant_generates_lxvkq (&vsx_const) != 0)
+ if (constant_generates_lxvkq (&vsx_const))
+ return true;
+
+ if (constant_generates_xxspltiw (&vsx_const))
return true;
}
return 0;
})
+;; Return 1 if the operand is a 64-bit floating point scalar constant or a
+;; vector constant that can be loaded to a VSX register with one prefixed
+;; instruction, such as XXSPLTIDP or XXSPLTIW.
+;;
+;; In addition regular constants, we also recognize constants formed with the
+;; VEC_DUPLICATE insn from scalar constants.
+;;
+;; We don't handle scalar integer constants here because the assumption is the
+;; normal integer constants will be loaded into GPR registers. For the
+;; constants that need to be loaded into vector registers, the instructions
+;; don't work well with TImode variables assigned a constant. This is because
+;; the 64-bit scalar constants are splatted into both halves of the register.
+
+(define_predicate "vsx_prefixed_constant"
+ (match_code "const_double,const_vector,vec_duplicate")
+{
+ /* If we can generate the constant with a few Altivec instructions, don't
+ generate a prefixed instruction. */
+ if (CONST_VECTOR_P (op) && easy_altivec_constant (op, mode))
+ return false;
+
+ /* Do we have prefixed instructions and are VSX registers available? Is the
+ constant recognized? */
+ if (!TARGET_PREFIXED || !TARGET_VSX)
+ return false;
+
+ vec_const_128bit_type vsx_const;
+ if (!vec_const_128bit_to_bytes (op, mode, &vsx_const))
+ return false;
+
+ if (constant_generates_xxspltiw (&vsx_const))
+ return true;
+
+ return false;
+})
+
;; Return 1 if the operand is a special IEEE 128-bit value that can be loaded
;; via the LXVKQ instruction.
vec_const_128bit_type vsx_const;
if (TARGET_POWER10 && vec_const_128bit_to_bytes (op, mode, &vsx_const))
{
- if (constant_generates_lxvkq (&vsx_const) != 0)
+ if (constant_generates_lxvkq (&vsx_const))
+ return true;
+
+ if (constant_generates_xxspltiw (&vsx_const))
return true;
}
extern bool prefixed_load_p (rtx_insn *);
extern bool prefixed_store_p (rtx_insn *);
extern bool prefixed_paddi_p (rtx_insn *);
+extern bool prefixed_xxsplti_p (rtx_insn *);
extern void rs6000_asm_output_opcode (FILE *);
extern void output_pcrel_opt_reloc (rtx);
extern void rs6000_final_prescan_insn (rtx_insn *, rtx [], int);
extern bool vec_const_128bit_to_bytes (rtx, machine_mode,
vec_const_128bit_type *);
extern unsigned constant_generates_lxvkq (vec_const_128bit_type *);
+extern unsigned constant_generates_xxspltiw (vec_const_128bit_type *);
#endif /* RTX_CODE */
#ifdef TREE_CODE
else if (IN_RANGE (value, -1, 0))
*num_insns_ptr = 1;
+ /* Do not generate XXSPLTIB and a sign extend operation if we can generate a
+ single XXSPLTIW or XXSPLTIDP instruction. */
+ else if (vsx_prefixed_constant (op, mode))
+ return false;
+
+ /* Return XXSPLITB followed by a sign extend operation to convert the
+ constant to V8HImode or V4SImode. */
else
*num_insns_ptr = 2;
operands[2] = GEN_INT (imm);
return "lxvkq %x0,%2";
}
+
+ imm = constant_generates_xxspltiw (&vsx_const);
+ if (imm)
+ {
+ operands[2] = GEN_INT (imm);
+ return "xxspltiw %x0,%2";
+ }
}
if (TARGET_P9_VECTOR
return (iform == INSN_FORM_PCREL_EXTERNAL || iform == INSN_FORM_PCREL_LOCAL);
}
+/* Whether an instruction is a prefixed XXSPLTI* instruction. This is called
+ from the prefixed attribute processing. */
+
+bool
+prefixed_xxsplti_p (rtx_insn *insn)
+{
+ rtx set = single_set (insn);
+ if (!set)
+ return false;
+
+ rtx dest = SET_DEST (set);
+ rtx src = SET_SRC (set);
+ machine_mode mode = GET_MODE (dest);
+
+ if (!REG_P (dest) && !SUBREG_P (dest))
+ return false;
+
+ if (GET_CODE (src) == UNSPEC)
+ {
+ int unspec = XINT (src, 1);
+ return (unspec == UNSPEC_XXSPLTIW
+ || unspec == UNSPEC_XXSPLTIDP
+ || unspec == UNSPEC_XXSPLTI32DX);
+ }
+
+ vec_const_128bit_type vsx_const;
+ if (vec_const_128bit_to_bytes (src, mode, &vsx_const))
+ {
+ if (constant_generates_xxspltiw (&vsx_const))
+ return true;
+ }
+
+ return false;
+}
+
/* Whether the next instruction needs a 'p' prefix issued before the
instruction is printed out. */
static bool prepend_p_to_next_insn;
return 0;
}
+/* Determine if a vector constant can be loaded with XXSPLTIW. Return zero if
+ the XXSPLTIW instruction cannot be used. Otherwise return the immediate
+ value to be used with the XXSPLTIW instruction. */
+
+unsigned
+constant_generates_xxspltiw (vec_const_128bit_type *vsx_const)
+{
+ if (!TARGET_SPLAT_WORD_CONSTANT || !TARGET_PREFIXED || !TARGET_VSX)
+ return 0;
+
+ if (!vsx_const->all_words_same)
+ return 0;
+
+ /* If we can use XXSPLTIB, don't generate XXSPLTIW. */
+ if (vsx_const->all_bytes_same)
+ return 0;
+
+ /* See if we can use VSPLTISH or VSPLTISW. */
+ if (vsx_const->all_half_words_same)
+ {
+ unsigned short h_word = vsx_const->half_words[0];
+ short sign_h_word = ((h_word & 0xffff) ^ 0x8000) - 0x8000;
+ if (EASY_VECTOR_15 (sign_h_word))
+ return 0;
+ }
+
+ unsigned int word = vsx_const->words[0];
+ int sign_word = ((word & 0xffffffff) ^ 0x80000000) - 0x80000000;
+ if (EASY_VECTOR_15 (sign_word))
+ return 0;
+
+ return vsx_const->words[0];
+}
+
\f
struct gcc_target targetm = TARGET_INITIALIZER;
(eq_attr "type" "integer,add")
(if_then_else (match_test "prefixed_paddi_p (insn)")
+ (const_string "yes")
+ (const_string "no"))
+
+ (eq_attr "type" "vecperm")
+ (if_then_else (match_test "prefixed_xxsplti_p (insn)")
(const_string "yes")
(const_string "no"))]
Target Var(rs6000_privileged) Init(0)
Generate code that will run in privileged state.
+msplat-word-constant
+Target Var(TARGET_SPLAT_WORD_CONSTANT) Init(1) Save
+Generate (do not generate) code that uses the XXSPLTIW instruction.
+
mieee128-constant
Target Var(TARGET_IEEE128_CONSTANT) Init(1) Save
Generate (do not generate) code that uses the LXVKQ instruction.
;; VSX store VSX load VSX move VSX->GPR GPR->VSX LQ (GPR)
;; STQ (GPR) GPR load GPR store GPR move XXSPLTIB VSPLTISW
-;; LXVKQ
+;; LXVKQ XXSPLTI*
;; VSX 0/-1 VMX const GPR const LVX (VMX) STVX (VMX)
(define_insn "vsx_mov<mode>_64bit"
[(set (match_operand:VSX_M 0 "nonimmediate_operand"
"=ZwO, wa, wa, r, we, ?wQ,
?&r, ??r, ??Y, <??r>, wa, v,
- wa,
+ wa, wa,
?wa, v, <??r>, wZ, v")
(match_operand:VSX_M 1 "input_operand"
"wa, ZwO, wa, we, r, r,
wQ, Y, r, r, wE, jwM,
- eQ,
+ eQ, eP,
?jwM, W, <nW>, v, wZ"))]
"TARGET_POWERPC64 && VECTOR_MEM_VSX_P (<MODE>mode)
[(set_attr "type"
"vecstore, vecload, vecsimple, mtvsr, mfvsr, load,
store, load, store, *, vecsimple, vecsimple,
- vecperm,
+ vecperm, vecperm,
vecsimple, *, *, vecstore, vecload")
(set_attr "num_insns"
"*, *, *, 2, *, 2,
2, 2, 2, 2, *, *,
- *,
+ *, *,
*, 5, 2, *, *")
(set_attr "max_prefixed_insns"
"*, *, *, *, *, 2,
2, 2, 2, 2, *, *,
- *,
+ *, *,
*, *, *, *, *")
(set_attr "length"
"*, *, *, 8, *, 8,
8, 8, 8, 8, *, *,
- *,
+ *, *,
*, 20, 8, *, *")
(set_attr "isa"
"<VSisa>, <VSisa>, <VSisa>, *, *, *,
*, *, *, *, p9v, *,
- p10,
+ p10, p10,
<VSisa>, *, *, *, *")])
;; VSX store VSX load VSX move GPR load GPR store GPR move
-;; LXVKQ
+;; LXVKQ XXSPLTI*
;; XXSPLTIB VSPLTISW VSX 0/-1 VMX const GPR const
;; LVX (VMX) STVX (VMX)
(define_insn "*vsx_mov<mode>_32bit"
[(set (match_operand:VSX_M 0 "nonimmediate_operand"
"=ZwO, wa, wa, ??r, ??Y, <??r>,
- wa,
+ wa, wa,
wa, v, ?wa, v, <??r>,
wZ, v")
(match_operand:VSX_M 1 "input_operand"
"wa, ZwO, wa, Y, r, r,
- eQ,
+ eQ, eP,
wE, jwM, ?jwM, W, <nW>,
v, wZ"))]
}
[(set_attr "type"
"vecstore, vecload, vecsimple, load, store, *,
- vecperm,
+ vecperm, vecperm,
vecsimple, vecsimple, vecsimple, *, *,
vecstore, vecload")
(set_attr "length"
"*, *, *, 16, 16, 16,
- *,
+ *, *,
*, *, *, 20, 16,
*, *")
(set_attr "isa"
"<VSisa>, <VSisa>, <VSisa>, *, *, *,
- p10,
+ p10, p10,
p9v, *, <VSisa>, *, *,
*, *")])
@item eI
A signed 34-bit integer constant if prefixed instructions are supported.
+@item eP
+A scalar floating point constant or a vector constant that can be
+loaded to a VSX register with one prefixed instruction.
+
@item eQ
An IEEE 128-bit constant that can be loaded into a VSX register with
the @code{lxvkq} instruction.
--- /dev/null
+/* { dg-do compile } */
+/* { dg-require-effective-target power10_ok } */
+/* { dg-options "-mdejagnu-cpu=power10 -O2" } */
+
+#include <altivec.h>
+
+/* Test whether XXSPLTIW is generated for V16HI vector constants where the
+ first 4 elements are the same as the next 4 elements, etc. */
+
+vector unsigned char
+v16qi_const_1 (void)
+{
+ return (vector unsigned char) { 1, 1, 1, 1, 1, 1, 1, 1,
+ 1, 1, 1, 1, 1, 1, 1, 1, }; /* VSLTPISB. */
+}
+
+vector unsigned char
+v16qi_const_2 (void)
+{
+ return (vector unsigned char) { 1, 2, 3, 4, 1, 2, 3, 4,
+ 1, 2, 3, 4, 1, 2, 3, 4, }; /* XXSPLTIW. */
+}
+
+/* { dg-final { scan-assembler-times {\mxxspltiw\M} 1 } } */
+/* { dg-final { scan-assembler-times {\mvspltisb\M|\mxxspltib\M} 1 } } */
+/* { dg-final { scan-assembler-not {\mlxvx?\M} } } */
+/* { dg-final { scan-assembler-not {\mplxv\M} } } */
--- /dev/null
+/* { dg-do compile } */
+/* { dg-require-effective-target power10_ok } */
+/* { dg-options "-mdejagnu-cpu=power10 -O2" } */
+
+#include <altivec.h>
+
+/* Test whether XXSPLTIW is generated for V4SF vector constants. */
+
+vector float
+v4sf_const_1 (void)
+{
+ return (vector float) { 1.0f, 1.0f, 1.0f, 1.0f }; /* XXSPLTIW. */
+}
+
+vector float
+v4sf_const_nan (void)
+{
+ return (vector float) { __builtin_nanf (""),
+ __builtin_nanf (""),
+ __builtin_nanf (""),
+ __builtin_nanf ("") }; /* XXSPLTIW. */
+}
+
+vector float
+v4sf_const_inf (void)
+{
+ return (vector float) { __builtin_inff (),
+ __builtin_inff (),
+ __builtin_inff (),
+ __builtin_inff () }; /* XXSPLTIW. */
+}
+
+vector float
+v4sf_const_m0 (void)
+{
+ return (vector float) { -0.0f, -0.0f, -0.0f, -0.0f }; /* XXSPLTIB/VSLW. */
+}
+
+vector float
+v4sf_splats_1 (void)
+{
+ return vec_splats (1.0f); /* XXSPLTIW. */
+}
+
+vector float
+v4sf_splats_nan (void)
+{
+ return vec_splats (__builtin_nanf ("")); /* XXSPLTIW. */
+}
+
+vector float
+v4sf_splats_inf (void)
+{
+ return vec_splats (__builtin_inff ()); /* XXSPLTIW. */
+}
+
+vector float
+v8hi_splats_m0 (void)
+{
+ return vec_splats (-0.0f); /* XXSPLTIB/VSLW. */
+}
+
+/* { dg-final { scan-assembler-times {\mxxspltiw\M} 6 } } */
+/* { dg-final { scan-assembler-times {\mxxspltib\M} 2 } } */
+/* { dg-final { scan-assembler-times {\mvslw\M} 2 } } */
+/* { dg-final { scan-assembler-not {\mlxvx?\M} } } */
+/* { dg-final { scan-assembler-not {\mplxv\M} } } */
--- /dev/null
+/* { dg-do compile } */
+/* { dg-require-effective-target power10_ok } */
+/* { dg-options "-mdejagnu-cpu=power10 -O2" } */
+
+#include <altivec.h>
+
+/* Test whether XXSPLTIW is generated for V4SI vector constants. We make sure
+ the power9 support (XXSPLTIB/VEXTSB2W) is not done. */
+
+vector int
+v4si_const_1 (void)
+{
+ return (vector int) { 1, 1, 1, 1 }; /* VSLTPISW. */
+}
+
+vector int
+v4si_const_126 (void)
+{
+ return (vector int) { 126, 126, 126, 126 }; /* XXSPLTIW. */
+}
+
+vector int
+v4si_const_1023 (void)
+{
+ return (vector int) { 1023, 1023, 1023, 1023 }; /* XXSPLTIW. */
+}
+
+vector int
+v4si_splats_1 (void)
+{
+ return vec_splats (1); /* VSLTPISW. */
+}
+
+vector int
+v4si_splats_126 (void)
+{
+ return vec_splats (126); /* XXSPLTIW. */
+}
+
+vector int
+v8hi_splats_1023 (void)
+{
+ return vec_splats (1023); /* XXSPLTIW. */
+}
+
+/* { dg-final { scan-assembler-times {\mxxspltiw\M} 4 } } */
+/* { dg-final { scan-assembler-times {\mvspltisw\M} 2 } } */
+/* { dg-final { scan-assembler-not {\mxxspltib\M} } } */
+/* { dg-final { scan-assembler-not {\mvextsb2w\M} } } */
+/* { dg-final { scan-assembler-not {\mlxvx?\M} } } */
+/* { dg-final { scan-assembler-not {\mplxv\M} } } */
--- /dev/null
+/* { dg-do compile } */
+/* { dg-require-effective-target power10_ok } */
+/* { dg-options "-mdejagnu-cpu=power10 -O2" } */
+
+#include <altivec.h>
+
+/* Test whether XXSPLTIW is generated for V8HI vector constants. We make sure
+ the power9 support (XXSPLTIB/VUPKLSB) is not done. */
+
+vector short
+v8hi_const_1 (void)
+{
+ return (vector short) { 1, 1, 1, 1, 1, 1, 1, 1 }; /* VSLTPISH. */
+}
+
+vector short
+v8hi_const_126 (void)
+{
+ return (vector short) { 126, 126, 126, 126,
+ 126, 126, 126, 126 }; /* XXSPLTIW. */
+}
+
+vector short
+v8hi_const_1023 (void)
+{
+ return (vector short) { 1023, 1023, 1023, 1023,
+ 1023, 1023, 1023, 1023 }; /* XXSPLTIW. */
+}
+
+vector short
+v8hi_splats_1 (void)
+{
+ return vec_splats ((short)1); /* VSLTPISH. */
+}
+
+vector short
+v8hi_splats_126 (void)
+{
+ return vec_splats ((short)126); /* XXSPLTIW. */
+}
+
+vector short
+v8hi_splats_1023 (void)
+{
+ return vec_splats ((short)1023); /* XXSPLTIW. */
+}
+
+/* Test that we can optimiza V8HI where all of the even elements are the same
+ and all of the odd elements are the same. */
+vector short
+v8hi_const_1023_1000 (void)
+{
+ return (vector short) { 1023, 1000, 1023, 1000,
+ 1023, 1000, 1023, 1000 }; /* XXSPLTIW. */
+}
+
+/* { dg-final { scan-assembler-times {\mxxspltiw\M} 5 } } */
+/* { dg-final { scan-assembler-times {\mvspltish\M} 2 } } */
+/* { dg-final { scan-assembler-not {\mxxspltib\M} } } */
+/* { dg-final { scan-assembler-not {\mvupklsb\M} } } */
+/* { dg-final { scan-assembler-not {\mlxvx?\M} } } */
+/* { dg-final { scan-assembler-not {\mplxv\M} } } */
return 0;
}
-/* { dg-final { scan-assembler-times {\mxxspltiw\M} 2 } } */
-/* { dg-final { scan-assembler-times {\mxxspltidp\M} 2 } } */
+/* { dg-final { scan-assembler-times {\mxxspltiw\M} 3 } } */
+/* { dg-final { scan-assembler-times {\mxxspltidp\M} 3 } } */
/* { dg-final { scan-assembler-times {\mxxsplti32dx\M} 3 } } */