V32DI V32DF
V64DI V64DF])
+; Vector modes for four vector registers
+(define_mode_iterator V_4REG [V2TI V4TI V8TI V16TI V32TI V64TI])
+(define_mode_iterator V_4REG_ALT [V2TI V4TI V8TI V16TI V32TI V64TI])
+
; Vector modes with native support
(define_mode_iterator V_noQI
[V2HI V2HF V2SI V2SF V2DI V2DF
V32SF V32DF
V64SF V64DF])
-; All of above
+; All modes in which we want to do more than just moves.
(define_mode_iterator V_ALL
[V2QI V2HI V2HF V2SI V2SF V2DI V2DF
V4QI V4HI V4HF V4SI V4SF V4DI V4DF
V32HF V32SF V32DF
V64HF V64SF V64DF])
+; All modes that need moves, including those without many insns.
+(define_mode_iterator V_MOV
+ [V2QI V2HI V2HF V2SI V2SF V2DI V2DF V2TI
+ V4QI V4HI V4HF V4SI V4SF V4DI V4DF V4TI
+ V8QI V8HI V8HF V8SI V8SF V8DI V8DF V8TI
+ V16QI V16HI V16HF V16SI V16SF V16DI V16DF V16TI
+ V32QI V32HI V32HF V32SI V32SF V32DI V32DF V32TI
+ V64QI V64HI V64HF V64SI V64SF V64DI V64DF V64TI])
+(define_mode_iterator V_MOV_ALT
+ [V2QI V2HI V2HF V2SI V2SF V2DI V2DF V2TI
+ V4QI V4HI V4HF V4SI V4SF V4DI V4DF V4TI
+ V8QI V8HI V8HF V8SI V8SF V8DI V8DF V8TI
+ V16QI V16HI V16HF V16SI V16SF V16DI V16DF V16TI
+ V32QI V32HI V32HF V32SI V32SF V32DI V32DF V32TI
+ V64QI V64HI V64HF V64SI V64SF V64DI V64DF V64TI])
+
(define_mode_attr scalar_mode
- [(QI "qi") (HI "hi") (SI "si")
+ [(QI "qi") (HI "hi") (SI "si") (TI "ti")
(HF "hf") (SF "sf") (DI "di") (DF "df")
- (V2QI "qi") (V2HI "hi") (V2SI "si")
+ (V2QI "qi") (V2HI "hi") (V2SI "si") (V2TI "ti")
(V2HF "hf") (V2SF "sf") (V2DI "di") (V2DF "df")
- (V4QI "qi") (V4HI "hi") (V4SI "si")
+ (V4QI "qi") (V4HI "hi") (V4SI "si") (V4TI "ti")
(V4HF "hf") (V4SF "sf") (V4DI "di") (V4DF "df")
- (V8QI "qi") (V8HI "hi") (V8SI "si")
+ (V8QI "qi") (V8HI "hi") (V8SI "si") (V8TI "ti")
(V8HF "hf") (V8SF "sf") (V8DI "di") (V8DF "df")
- (V16QI "qi") (V16HI "hi") (V16SI "si")
+ (V16QI "qi") (V16HI "hi") (V16SI "si") (V16TI "ti")
(V16HF "hf") (V16SF "sf") (V16DI "di") (V16DF "df")
- (V32QI "qi") (V32HI "hi") (V32SI "si")
+ (V32QI "qi") (V32HI "hi") (V32SI "si") (V32TI "ti")
(V32HF "hf") (V32SF "sf") (V32DI "di") (V32DF "df")
- (V64QI "qi") (V64HI "hi") (V64SI "si")
+ (V64QI "qi") (V64HI "hi") (V64SI "si") (V64TI "ti")
(V64HF "hf") (V64SF "sf") (V64DI "di") (V64DF "df")])
(define_mode_attr SCALAR_MODE
- [(QI "QI") (HI "HI") (SI "SI")
+ [(QI "QI") (HI "HI") (SI "SI") (TI "TI")
(HF "HF") (SF "SF") (DI "DI") (DF "DF")
- (V2QI "QI") (V2HI "HI") (V2SI "SI")
+ (V2QI "QI") (V2HI "HI") (V2SI "SI") (V2TI "TI")
(V2HF "HF") (V2SF "SF") (V2DI "DI") (V2DF "DF")
- (V4QI "QI") (V4HI "HI") (V4SI "SI")
+ (V4QI "QI") (V4HI "HI") (V4SI "SI") (V4TI "TI")
(V4HF "HF") (V4SF "SF") (V4DI "DI") (V4DF "DF")
- (V8QI "QI") (V8HI "HI") (V8SI "SI")
+ (V8QI "QI") (V8HI "HI") (V8SI "SI") (V8TI "TI")
(V8HF "HF") (V8SF "SF") (V8DI "DI") (V8DF "DF")
- (V16QI "QI") (V16HI "HI") (V16SI "SI")
+ (V16QI "QI") (V16HI "HI") (V16SI "SI") (V16TI "TI")
(V16HF "HF") (V16SF "SF") (V16DI "DI") (V16DF "DF")
- (V32QI "QI") (V32HI "HI") (V32SI "SI")
+ (V32QI "QI") (V32HI "HI") (V32SI "SI") (V32TI "TI")
(V32HF "HF") (V32SF "SF") (V32DI "DI") (V32DF "DF")
- (V64QI "QI") (V64HI "HI") (V64SI "SI")
+ (V64QI "QI") (V64HI "HI") (V64SI "SI") (V64TI "TI")
(V64HF "HF") (V64SF "SF") (V64DI "DI") (V64DF "DF")])
(define_mode_attr vnsi
- [(QI "si") (HI "si") (SI "si")
+ [(QI "si") (HI "si") (SI "si") (TI "si")
(HF "si") (SF "si") (DI "si") (DF "si")
(V2QI "v2si") (V2HI "v2si") (V2HF "v2si") (V2SI "v2si")
- (V2SF "v2si") (V2DI "v2si") (V2DF "v2si")
+ (V2SF "v2si") (V2DI "v2si") (V2DF "v2si") (V2TI "v2si")
(V4QI "v4si") (V4HI "v4si") (V4HF "v4si") (V4SI "v4si")
- (V4SF "v4si") (V4DI "v4si") (V4DF "v4si")
+ (V4SF "v4si") (V4DI "v4si") (V4DF "v4si") (V4TI "v4si")
(V8QI "v8si") (V8HI "v8si") (V8HF "v8si") (V8SI "v8si")
- (V8SF "v8si") (V8DI "v8si") (V8DF "v8si")
+ (V8SF "v8si") (V8DI "v8si") (V8DF "v8si") (V8TI "v8si")
(V16QI "v16si") (V16HI "v16si") (V16HF "v16si") (V16SI "v16si")
- (V16SF "v16si") (V16DI "v16si") (V16DF "v16si")
+ (V16SF "v16si") (V16DI "v16si") (V16DF "v16si") (V16TI "v16si")
(V32QI "v32si") (V32HI "v32si") (V32HF "v32si") (V32SI "v32si")
- (V32SF "v32si") (V32DI "v32si") (V32DF "v32si")
+ (V32SF "v32si") (V32DI "v32si") (V32DF "v32si") (V32TI "v32si")
(V64QI "v64si") (V64HI "v64si") (V64HF "v64si") (V64SI "v64si")
- (V64SF "v64si") (V64DI "v64si") (V64DF "v64si")])
+ (V64SF "v64si") (V64DI "v64si") (V64DF "v64si") (V64TI "v64si")])
(define_mode_attr VnSI
- [(QI "SI") (HI "SI") (SI "SI")
+ [(QI "SI") (HI "SI") (SI "SI") (TI "SI")
(HF "SI") (SF "SI") (DI "SI") (DF "SI")
(V2QI "V2SI") (V2HI "V2SI") (V2HF "V2SI") (V2SI "V2SI")
- (V2SF "V2SI") (V2DI "V2SI") (V2DF "V2SI")
+ (V2SF "V2SI") (V2DI "V2SI") (V2DF "V2SI") (V2TI "V2SI")
(V4QI "V4SI") (V4HI "V4SI") (V4HF "V4SI") (V4SI "V4SI")
- (V4SF "V4SI") (V4DI "V4SI") (V4DF "V4SI")
+ (V4SF "V4SI") (V4DI "V4SI") (V4DF "V4SI") (V4TI "V4SI")
(V8QI "V8SI") (V8HI "V8SI") (V8HF "V8SI") (V8SI "V8SI")
- (V8SF "V8SI") (V8DI "V8SI") (V8DF "V8SI")
+ (V8SF "V8SI") (V8DI "V8SI") (V8DF "V8SI") (V8TI "V8SI")
(V16QI "V16SI") (V16HI "V16SI") (V16HF "V16SI") (V16SI "V16SI")
- (V16SF "V16SI") (V16DI "V16SI") (V16DF "V16SI")
+ (V16SF "V16SI") (V16DI "V16SI") (V16DF "V16SI") (V16TI "V16SI")
(V32QI "V32SI") (V32HI "V32SI") (V32HF "V32SI") (V32SI "V32SI")
- (V32SF "V32SI") (V32DI "V32SI") (V32DF "V32SI")
+ (V32SF "V32SI") (V32DI "V32SI") (V32DF "V32SI") (V32TI "V32SI")
(V64QI "V64SI") (V64HI "V64SI") (V64HF "V64SI") (V64SI "V64SI")
- (V64SF "V64SI") (V64DI "V64SI") (V64DF "V64SI")])
+ (V64SF "V64SI") (V64DI "V64SI") (V64DF "V64SI") (V64TI "V64SI")])
(define_mode_attr vndi
[(V2QI "v2di") (V2HI "v2di") (V2HF "v2di") (V2SI "v2di")
- (V2SF "v2di") (V2DI "v2di") (V2DF "v2di")
+ (V2SF "v2di") (V2DI "v2di") (V2DF "v2di") (V2TI "v2di")
(V4QI "v4di") (V4HI "v4di") (V4HF "v4di") (V4SI "v4di")
- (V4SF "v4di") (V4DI "v4di") (V4DF "v4di")
+ (V4SF "v4di") (V4DI "v4di") (V4DF "v4di") (V4TI "v4di")
(V8QI "v8di") (V8HI "v8di") (V8HF "v8di") (V8SI "v8di")
- (V8SF "v8di") (V8DI "v8di") (V8DF "v8di")
+ (V8SF "v8di") (V8DI "v8di") (V8DF "v8di") (V8TI "v8di")
(V16QI "v16di") (V16HI "v16di") (V16HF "v16di") (V16SI "v16di")
- (V16SF "v16di") (V16DI "v16di") (V16DF "v16di")
+ (V16SF "v16di") (V16DI "v16di") (V16DF "v16di") (V16TI "v16di")
(V32QI "v32di") (V32HI "v32di") (V32HF "v32di") (V32SI "v32di")
- (V32SF "v32di") (V32DI "v32di") (V32DF "v32di")
+ (V32SF "v32di") (V32DI "v32di") (V32DF "v32di") (V32TI "v32di")
(V64QI "v64di") (V64HI "v64di") (V64HF "v64di") (V64SI "v64di")
- (V64SF "v64di") (V64DI "v64di") (V64DF "v64di")])
+ (V64SF "v64di") (V64DI "v64di") (V64DF "v64di") (V64TI "v64di")])
(define_mode_attr VnDI
[(V2QI "V2DI") (V2HI "V2DI") (V2HF "V2DI") (V2SI "V2DI")
- (V2SF "V2DI") (V2DI "V2DI") (V2DF "V2DI")
+ (V2SF "V2DI") (V2DI "V2DI") (V2DF "V2DI") (V2TI "V2DI")
(V4QI "V4DI") (V4HI "V4DI") (V4HF "V4DI") (V4SI "V4DI")
- (V4SF "V4DI") (V4DI "V4DI") (V4DF "V4DI")
+ (V4SF "V4DI") (V4DI "V4DI") (V4DF "V4DI") (V4TI "V4DI")
(V8QI "V8DI") (V8HI "V8DI") (V8HF "V8DI") (V8SI "V8DI")
- (V8SF "V8DI") (V8DI "V8DI") (V8DF "V8DI")
+ (V8SF "V8DI") (V8DI "V8DI") (V8DF "V8DI") (V8TI "V8DI")
(V16QI "V16DI") (V16HI "V16DI") (V16HF "V16DI") (V16SI "V16DI")
- (V16SF "V16DI") (V16DI "V16DI") (V16DF "V16DI")
+ (V16SF "V16DI") (V16DI "V16DI") (V16DF "V16DI") (V16TI "V16DI")
(V32QI "V32DI") (V32HI "V32DI") (V32HF "V32DI") (V32SI "V32DI")
- (V32SF "V32DI") (V32DI "V32DI") (V32DF "V32DI")
+ (V32SF "V32DI") (V32DI "V32DI") (V32DF "V32DI") (V32TI "V32DI")
(V64QI "V64DI") (V64HI "V64DI") (V64HF "V64DI") (V64SI "V64DI")
- (V64SF "V64DI") (V64DI "V64DI") (V64DF "V64DI")])
+ (V64SF "V64DI") (V64DI "V64DI") (V64DF "V64DI") (V64TI "V64DI")])
(define_mode_attr sdwa
[(V2QI "BYTE_0") (V2HI "WORD_0") (V2SI "DWORD")
"" "_exec")
(define_subst "vec_merge"
- [(set (match_operand:V_ALL 0)
- (match_operand:V_ALL 1))]
+ [(set (match_operand:V_MOV 0)
+ (match_operand:V_MOV 1))]
""
[(set (match_dup 0)
- (vec_merge:V_ALL
+ (vec_merge:V_MOV
(match_dup 1)
- (match_operand:V_ALL 3 "gcn_register_or_unspec_operand" "U0")
+ (match_operand:V_MOV 3 "gcn_register_or_unspec_operand" "U0")
(match_operand:DI 4 "gcn_exec_reg_operand" "e")))])
(define_subst "vec_merge_with_clobber"
- [(set (match_operand:V_ALL 0)
- (match_operand:V_ALL 1))
+ [(set (match_operand:V_MOV 0)
+ (match_operand:V_MOV 1))
(clobber (match_operand 2))]
""
[(set (match_dup 0)
- (vec_merge:V_ALL
+ (vec_merge:V_MOV
(match_dup 1)
- (match_operand:V_ALL 3 "gcn_register_or_unspec_operand" "U0")
+ (match_operand:V_MOV 3 "gcn_register_or_unspec_operand" "U0")
(match_operand:DI 4 "gcn_exec_reg_operand" "e")))
(clobber (match_dup 2))])
(define_subst "vec_merge_with_vcc"
- [(set (match_operand:V_ALL 0)
- (match_operand:V_ALL 1))
+ [(set (match_operand:V_MOV 0)
+ (match_operand:V_MOV 1))
(set (match_operand:DI 2)
(match_operand:DI 3))]
""
[(parallel
[(set (match_dup 0)
- (vec_merge:V_ALL
+ (vec_merge:V_MOV
(match_dup 1)
- (match_operand:V_ALL 4 "gcn_register_or_unspec_operand" "U0")
+ (match_operand:V_MOV 4 "gcn_register_or_unspec_operand" "U0")
(match_operand:DI 5 "gcn_exec_reg_operand" "e")))
(set (match_dup 2)
(and:DI (match_dup 3)
; gather/scatter, maskload/store, etc.
(define_expand "mov<mode>"
- [(set (match_operand:V_ALL 0 "nonimmediate_operand")
- (match_operand:V_ALL 1 "general_operand"))]
+ [(set (match_operand:V_MOV 0 "nonimmediate_operand")
+ (match_operand:V_MOV 1 "general_operand"))]
""
{
/* Bitwise reinterpret casts via SUBREG don't work with GCN vector
; A pseudo instruction that helps LRA use the "U0" constraint.
(define_insn "mov<mode>_unspec"
- [(set (match_operand:V_ALL 0 "nonimmediate_operand" "=v")
- (match_operand:V_ALL 1 "gcn_unspec_operand" " U"))]
+ [(set (match_operand:V_MOV 0 "nonimmediate_operand" "=v")
+ (match_operand:V_MOV 1 "gcn_unspec_operand" " U"))]
""
""
[(set_attr "type" "unknown")
[(set_attr "type" "vmult,vmult,vmult,*,*")
(set_attr "length" "16,16,16,16,16")])
+(define_insn "*mov<mode>_4reg"
+ [(set (match_operand:V_4REG 0 "nonimmediate_operand" "=v")
+ (match_operand:V_4REG 1 "general_operand" "vDB"))]
+ ""
+ {
+ return "v_mov_b32\t%L0, %L1\;"
+ "v_mov_b32\t%H0, %H1\;"
+ "v_mov_b32\t%J0, %J1\;"
+ "v_mov_b32\t%K0, %K1\;";
+ }
+ [(set_attr "type" "vmult")
+ (set_attr "length" "16")])
+
+(define_insn "mov<mode>_exec"
+ [(set (match_operand:V_4REG 0 "nonimmediate_operand" "= v, v, v, v, m")
+ (vec_merge:V_4REG
+ (match_operand:V_4REG 1 "general_operand" "vDB, v0, v0, m, v")
+ (match_operand:V_4REG 2 "gcn_alu_or_unspec_operand"
+ " U0,vDA0,vDA0,U0,U0")
+ (match_operand:DI 3 "register_operand" " e, cV, Sv, e, e")))
+ (clobber (match_scratch:<VnDI> 4 "= X, X, X,&v,&v"))]
+ "!MEM_P (operands[0]) || REG_P (operands[1])"
+ {
+ if (!REG_P (operands[1]) || REGNO (operands[0]) <= REGNO (operands[1]))
+ switch (which_alternative)
+ {
+ case 0:
+ return "v_mov_b32\t%L0, %L1\;v_mov_b32\t%H0, %H1\;"
+ "v_mov_b32\t%J0, %J1\;v_mov_b32\t%K0, %K1";
+ case 1:
+ return "v_cndmask_b32\t%L0, %L2, %L1, vcc\;"
+ "v_cndmask_b32\t%H0, %H2, %H1, vcc\;"
+ "v_cndmask_b32\t%J0, %J2, %J1, vcc\;"
+ "v_cndmask_b32\t%K0, %K2, %K1, vcc";
+ case 2:
+ return "v_cndmask_b32\t%L0, %L2, %L1, %3\;"
+ "v_cndmask_b32\t%H0, %H2, %H1, %3\;"
+ "v_cndmask_b32\t%J0, %J2, %J1, %3\;"
+ "v_cndmask_b32\t%K0, %K2, %K1, %3";
+ }
+ else
+ switch (which_alternative)
+ {
+ case 0:
+ return "v_mov_b32\t%H0, %H1\;v_mov_b32\t%L0, %L1\;"
+ "v_mov_b32\t%J0, %J1\;v_mov_b32\t%K0, %K1";
+ case 1:
+ return "v_cndmask_b32\t%H0, %H2, %H1, vcc\;"
+ "v_cndmask_b32\t%L0, %L2, %L1, vcc\;"
+ "v_cndmask_b32\t%J0, %J2, %J1, vcc\;"
+ "v_cndmask_b32\t%K0, %K2, %K1, vcc";
+ case 2:
+ return "v_cndmask_b32\t%H0, %H2, %H1, %3\;"
+ "v_cndmask_b32\t%L0, %L2, %L1, %3\;"
+ "v_cndmask_b32\t%J0, %J2, %J1, %3\;"
+ "v_cndmask_b32\t%K0, %K2, %K1, %3";
+ }
+
+ return "#";
+ }
+ [(set_attr "type" "vmult,vmult,vmult,*,*")
+ (set_attr "length" "32")])
+
; This variant does not accept an unspec, but does permit MEM
; read/modify/write which is necessary for maskstore.
[(set_attr "type" "vmult,*,*")
(set_attr "length" "8,12,12")])
+(define_insn "mov<mode>_sgprbase"
+ [(set (match_operand:V_4REG 0 "nonimmediate_operand" "= v, v, m")
+ (unspec:V_4REG
+ [(match_operand:V_4REG 1 "general_operand" "vDB, m, v")]
+ UNSPEC_SGPRBASE))
+ (clobber (match_operand:<VnDI> 2 "register_operand" "=&v,&v,&v"))]
+ "lra_in_progress || reload_completed"
+ "v_mov_b32\t%L0, %L1\;v_mov_b32\t%H0, %H1\;v_mov_b32\t%J0, %J1\;v_mov_b32\t%K0, %K1
+ #
+ #"
+ [(set_attr "type" "vmult,*,*")
+ (set_attr "length" "8,12,12")])
+
; reload_in was once a standard name, but here it's only referenced by
; gcn_secondary_reload. It allows a reload with a scratch register.
(define_expand "reload_in<mode>"
- [(set (match_operand:V_ALL 0 "register_operand" "= v")
- (match_operand:V_ALL 1 "memory_operand" " m"))
+ [(set (match_operand:V_MOV 0 "register_operand" "= v")
+ (match_operand:V_MOV 1 "memory_operand" " m"))
(clobber (match_operand:<VnDI> 2 "register_operand" "=&v"))]
""
{
; reload_out is similar to reload_in, above.
(define_expand "reload_out<mode>"
- [(set (match_operand:V_ALL 0 "memory_operand" "= m")
- (match_operand:V_ALL 1 "register_operand" " v"))
+ [(set (match_operand:V_MOV 0 "memory_operand" "= m")
+ (match_operand:V_MOV 1 "register_operand" " v"))
(clobber (match_operand:<VnDI> 2 "register_operand" "=&v"))]
""
{
; Expand scalar addresses into gather/scatter patterns
(define_split
- [(set (match_operand:V_ALL 0 "memory_operand")
- (unspec:V_ALL
- [(match_operand:V_ALL 1 "general_operand")]
+ [(set (match_operand:V_MOV 0 "memory_operand")
+ (unspec:V_MOV
+ [(match_operand:V_MOV 1 "general_operand")]
UNSPEC_SGPRBASE))
(clobber (match_scratch:<VnDI> 2))]
""
})
(define_split
- [(set (match_operand:V_ALL 0 "memory_operand")
- (vec_merge:V_ALL
- (match_operand:V_ALL 1 "general_operand")
- (match_operand:V_ALL 2 "")
+ [(set (match_operand:V_MOV 0 "memory_operand")
+ (vec_merge:V_MOV
+ (match_operand:V_MOV 1 "general_operand")
+ (match_operand:V_MOV 2 "")
(match_operand:DI 3 "gcn_exec_reg_operand")))
(clobber (match_scratch:<VnDI> 4))]
""
})
(define_split
- [(set (match_operand:V_ALL 0 "nonimmediate_operand")
- (unspec:V_ALL
- [(match_operand:V_ALL 1 "memory_operand")]
+ [(set (match_operand:V_MOV 0 "nonimmediate_operand")
+ (unspec:V_MOV
+ [(match_operand:V_MOV 1 "memory_operand")]
UNSPEC_SGPRBASE))
(clobber (match_scratch:<VnDI> 2))]
""
[(set (match_dup 0)
- (unspec:V_ALL [(match_dup 5) (match_dup 6) (match_dup 7)
+ (unspec:V_MOV [(match_dup 5) (match_dup 6) (match_dup 7)
(mem:BLK (scratch))]
UNSPEC_GATHER))]
{
})
(define_split
- [(set (match_operand:V_ALL 0 "nonimmediate_operand")
- (vec_merge:V_ALL
- (match_operand:V_ALL 1 "memory_operand")
- (match_operand:V_ALL 2 "")
+ [(set (match_operand:V_MOV 0 "nonimmediate_operand")
+ (vec_merge:V_MOV
+ (match_operand:V_MOV 1 "memory_operand")
+ (match_operand:V_MOV 2 "")
(match_operand:DI 3 "gcn_exec_reg_operand")))
(clobber (match_scratch:<VnDI> 4))]
""
[(set (match_dup 0)
- (vec_merge:V_ALL
- (unspec:V_ALL [(match_dup 5) (match_dup 6) (match_dup 7)
+ (vec_merge:V_MOV
+ (unspec:V_MOV [(match_dup 5) (match_dup 6) (match_dup 7)
(mem:BLK (scratch))]
UNSPEC_GATHER)
(match_dup 2)
(set_attr "laneselect" "yes")])
(define_expand "vec_set<mode>"
- [(set (match_operand:V_ALL 0 "register_operand")
- (vec_merge:V_ALL
- (vec_duplicate:V_ALL
+ [(set (match_operand:V_MOV 0 "register_operand")
+ (vec_merge:V_MOV
+ (vec_duplicate:V_MOV
(match_operand:<SCALAR_MODE> 1 "register_operand"))
(match_dup 0)
(ashift (const_int 1) (match_operand:SI 2 "gcn_alu_operand"))))]
[(set_attr "type" "vop3a")
(set_attr "length" "16")])
+(define_insn "vec_duplicate<mode><exec>"
+ [(set (match_operand:V_4REG 0 "register_operand" "= v")
+ (vec_duplicate:V_4REG
+ (match_operand:<SCALAR_MODE> 1 "gcn_alu_operand" "SvDB")))]
+ ""
+ "v_mov_b32\t%L0, %L1\;v_mov_b32\t%H0, %H1\;v_mov_b32\t%J0, %J1\;v_mov_b32\t%K0, %K1"
+ [(set_attr "type" "mult")
+ (set_attr "length" "32")])
+
(define_insn "vec_extract<mode><scalar_mode>"
[(set (match_operand:<SCALAR_MODE> 0 "register_operand" "=Sg")
(vec_select:<SCALAR_MODE>
(set_attr "exec" "none")
(set_attr "laneselect" "yes")])
+(define_insn "vec_extract<mode><scalar_mode>"
+ [(set (match_operand:<SCALAR_MODE> 0 "register_operand" "=&Sg")
+ (vec_select:<SCALAR_MODE>
+ (match_operand:V_4REG 1 "register_operand" " v")
+ (parallel [(match_operand:SI 2 "gcn_alu_operand" " SvB")])))]
+ ""
+ "v_readlane_b32 %L0, %L1, %2\;v_readlane_b32 %H0, %H1, %2\;v_readlane_b32 %J0, %J1, %2\;v_readlane_b32 %K0, %K1, %2"
+ [(set_attr "type" "vmult")
+ (set_attr "length" "32")
+ (set_attr "exec" "none")
+ (set_attr "laneselect" "yes")])
+
(define_insn "vec_extract<V_1REG:mode><V_1REG_ALT:mode>_nop"
[(set (match_operand:V_1REG_ALT 0 "register_operand" "=v,v")
(vec_select:V_1REG_ALT
[(set_attr "type" "vmult")
(set_attr "length" "0,8")])
-(define_expand "vec_extract<V_ALL:mode><V_ALL_ALT:mode>"
- [(match_operand:V_ALL_ALT 0 "register_operand")
- (match_operand:V_ALL 1 "register_operand")
+(define_insn "vec_extract<V_4REG:mode><V_4REG_ALT:mode>_nop"
+ [(set (match_operand:V_4REG_ALT 0 "register_operand" "=v,v")
+ (vec_select:V_4REG_ALT
+ (match_operand:V_4REG 1 "register_operand" " 0,v")
+ (match_operand 2 "ascending_zero_int_parallel" "")))]
+ "MODE_VF (<V_4REG_ALT:MODE>mode) < MODE_VF (<V_4REG:MODE>mode)
+ && <V_4REG_ALT:SCALAR_MODE>mode == <V_4REG:SCALAR_MODE>mode"
+ "@
+ ; in-place extract %0
+ v_mov_b32\t%L0, %L1\;v_mov_b32\t%H0, %H1\;v_mov_b32\t%J0, %J1\;v_mov_b32\t%K0, %K1"
+ [(set_attr "type" "vmult")
+ (set_attr "length" "0,16")])
+
+(define_expand "vec_extract<V_MOV:mode><V_MOV_ALT:mode>"
+ [(match_operand:V_MOV_ALT 0 "register_operand")
+ (match_operand:V_MOV 1 "register_operand")
(match_operand 2 "immediate_operand")]
- "MODE_VF (<V_ALL_ALT:MODE>mode) < MODE_VF (<V_ALL:MODE>mode)
- && <V_ALL_ALT:SCALAR_MODE>mode == <V_ALL:SCALAR_MODE>mode"
+ "MODE_VF (<V_MOV_ALT:MODE>mode) < MODE_VF (<V_MOV:MODE>mode)
+ && <V_MOV_ALT:SCALAR_MODE>mode == <V_MOV:SCALAR_MODE>mode"
{
- int numlanes = GET_MODE_NUNITS (<V_ALL_ALT:MODE>mode);
+ int numlanes = GET_MODE_NUNITS (<V_MOV_ALT:MODE>mode);
int firstlane = INTVAL (operands[2]) * numlanes;
rtx tmp;
if (firstlane == 0)
{
- rtx parallel = gen_rtx_PARALLEL (<V_ALL:MODE>mode,
+ rtx parallel = gen_rtx_PARALLEL (<V_MOV:MODE>mode,
rtvec_alloc (numlanes));
for (int i = 0; i < numlanes; i++)
XVECEXP (parallel, 0, i) = GEN_INT (i);
- emit_insn (gen_vec_extract<V_ALL:mode><V_ALL_ALT:mode>_nop
+ emit_insn (gen_vec_extract<V_MOV:mode><V_MOV_ALT:mode>_nop
(operands[0], operands[1], parallel));
} else {
/* FIXME: optimize this by using DPP where available. */
- rtx permutation = gen_reg_rtx (<V_ALL:VnSI>mode);
- emit_insn (gen_vec_series<V_ALL:vnsi> (permutation,
+ rtx permutation = gen_reg_rtx (<V_MOV:VnSI>mode);
+ emit_insn (gen_vec_series<V_MOV:vnsi> (permutation,
GEN_INT (firstlane*4),
GEN_INT (4)));
- tmp = gen_reg_rtx (<V_ALL:MODE>mode);
- emit_insn (gen_ds_bpermute<V_ALL:mode> (tmp, permutation, operands[1],
- get_exec (<V_ALL:MODE>mode)));
+ tmp = gen_reg_rtx (<V_MOV:MODE>mode);
+ emit_insn (gen_ds_bpermute<V_MOV:mode> (tmp, permutation, operands[1],
+ get_exec (<V_MOV:MODE>mode)));
emit_move_insn (operands[0],
- gen_rtx_SUBREG (<V_ALL_ALT:MODE>mode, tmp, 0));
+ gen_rtx_SUBREG (<V_MOV_ALT:MODE>mode, tmp, 0));
}
DONE;
})
(define_expand "extract_last_<mode>"
[(match_operand:<SCALAR_MODE> 0 "register_operand")
(match_operand:DI 1 "gcn_alu_operand")
- (match_operand:V_ALL 2 "register_operand")]
+ (match_operand:V_MOV 2 "register_operand")]
"can_create_pseudo_p ()"
{
rtx dst = operands[0];
[(match_operand:<SCALAR_MODE> 0 "register_operand")
(match_operand:<SCALAR_MODE> 1 "gcn_alu_operand")
(match_operand:DI 2 "gcn_alu_operand")
- (match_operand:V_ALL 3 "register_operand")]
+ (match_operand:V_MOV 3 "register_operand")]
"can_create_pseudo_p ()"
{
rtx dst = operands[0];
})
(define_expand "vec_init<mode><scalar_mode>"
- [(match_operand:V_ALL 0 "register_operand")
+ [(match_operand:V_MOV 0 "register_operand")
(match_operand 1)]
""
{
DONE;
})
-(define_expand "vec_init<V_ALL:mode><V_ALL_ALT:mode>"
- [(match_operand:V_ALL 0 "register_operand")
- (match_operand:V_ALL_ALT 1)]
- "<V_ALL:SCALAR_MODE>mode == <V_ALL_ALT:SCALAR_MODE>mode
- && MODE_VF (<V_ALL_ALT:MODE>mode) < MODE_VF (<V_ALL:MODE>mode)"
+(define_expand "vec_init<V_MOV:mode><V_MOV_ALT:mode>"
+ [(match_operand:V_MOV 0 "register_operand")
+ (match_operand:V_MOV_ALT 1)]
+ "<V_MOV:SCALAR_MODE>mode == <V_MOV_ALT:SCALAR_MODE>mode
+ && MODE_VF (<V_MOV_ALT:MODE>mode) < MODE_VF (<V_MOV:MODE>mode)"
{
gcn_expand_vector_init (operands[0], operands[1]);
DONE;
;; TODO: implement combined gather and zero_extend, but only for -msram-ecc=on
(define_expand "gather_load<mode><vnsi>"
- [(match_operand:V_ALL 0 "register_operand")
+ [(match_operand:V_MOV 0 "register_operand")
(match_operand:DI 1 "register_operand")
(match_operand:<VnSI> 2 "register_operand")
(match_operand 3 "immediate_operand")
; Allow any address expression
(define_expand "gather<mode>_expr<exec>"
- [(set (match_operand:V_ALL 0 "register_operand")
- (unspec:V_ALL
+ [(set (match_operand:V_MOV 0 "register_operand")
+ (unspec:V_MOV
[(match_operand 1 "")
(match_operand 2 "immediate_operand")
(match_operand 3 "immediate_operand")
{})
(define_insn "gather<mode>_insn_1offset<exec>"
- [(set (match_operand:V_ALL 0 "register_operand" "=v")
- (unspec:V_ALL
+ [(set (match_operand:V_MOV 0 "register_operand" "=v")
+ (unspec:V_MOV
[(plus:<VnDI> (match_operand:<VnDI> 1 "register_operand" " v")
(vec_duplicate:<VnDI>
(match_operand 2 "immediate_operand" " n")))
(set_attr "length" "12")])
(define_insn "gather<mode>_insn_1offset_ds<exec>"
- [(set (match_operand:V_ALL 0 "register_operand" "=v")
- (unspec:V_ALL
+ [(set (match_operand:V_MOV 0 "register_operand" "=v")
+ (unspec:V_MOV
[(plus:<VnSI> (match_operand:<VnSI> 1 "register_operand" " v")
(vec_duplicate:<VnSI>
(match_operand 2 "immediate_operand" " n")))
(set_attr "length" "12")])
(define_insn "gather<mode>_insn_2offsets<exec>"
- [(set (match_operand:V_ALL 0 "register_operand" "=v")
- (unspec:V_ALL
+ [(set (match_operand:V_MOV 0 "register_operand" "=v")
+ (unspec:V_MOV
[(plus:<VnDI>
(plus:<VnDI>
(vec_duplicate:<VnDI>
(match_operand:<VnSI> 1 "register_operand")
(match_operand 2 "immediate_operand")
(match_operand:SI 3 "gcn_alu_operand")
- (match_operand:V_ALL 4 "register_operand")]
+ (match_operand:V_MOV 4 "register_operand")]
""
{
rtx addr = gcn_expand_scaled_offsets (DEFAULT_ADDR_SPACE, operands[0],
[(set (mem:BLK (scratch))
(unspec:BLK
[(match_operand:<VnDI> 0 "")
- (match_operand:V_ALL 1 "register_operand")
+ (match_operand:V_MOV 1 "register_operand")
(match_operand 2 "immediate_operand")
(match_operand 3 "immediate_operand")]
UNSPEC_SCATTER))]
[(plus:<VnDI> (match_operand:<VnDI> 0 "register_operand" "v")
(vec_duplicate:<VnDI>
(match_operand 1 "immediate_operand" "n")))
- (match_operand:V_ALL 2 "register_operand" "v")
+ (match_operand:V_MOV 2 "register_operand" "v")
(match_operand 3 "immediate_operand" "n")
(match_operand 4 "immediate_operand" "n")]
UNSPEC_SCATTER))]
[(plus:<VnSI> (match_operand:<VnSI> 0 "register_operand" "v")
(vec_duplicate:<VnSI>
(match_operand 1 "immediate_operand" "n")))
- (match_operand:V_ALL 2 "register_operand" "v")
+ (match_operand:V_MOV 2 "register_operand" "v")
(match_operand 3 "immediate_operand" "n")
(match_operand 4 "immediate_operand" "n")]
UNSPEC_SCATTER))]
(sign_extend:<VnDI>
(match_operand:<VnSI> 1 "register_operand" " v")))
(vec_duplicate:<VnDI> (match_operand 2 "immediate_operand" " n")))
- (match_operand:V_ALL 3 "register_operand" " v")
+ (match_operand:V_MOV 3 "register_operand" " v")
(match_operand 4 "immediate_operand" " n")
(match_operand 5 "immediate_operand" " n")]
UNSPEC_SCATTER))]
})
(define_expand "maskload<mode>di"
- [(match_operand:V_ALL 0 "register_operand")
- (match_operand:V_ALL 1 "memory_operand")
+ [(match_operand:V_MOV 0 "register_operand")
+ (match_operand:V_MOV 1 "memory_operand")
(match_operand 2 "")]
""
{
})
(define_expand "maskstore<mode>di"
- [(match_operand:V_ALL 0 "memory_operand")
- (match_operand:V_ALL 1 "register_operand")
+ [(match_operand:V_MOV 0 "memory_operand")
+ (match_operand:V_MOV 1 "register_operand")
(match_operand 2 "")]
""
{
})
(define_expand "mask_gather_load<mode><vnsi>"
- [(match_operand:V_ALL 0 "register_operand")
+ [(match_operand:V_MOV 0 "register_operand")
(match_operand:DI 1 "register_operand")
(match_operand:<VnSI> 2 "register_operand")
(match_operand 3 "immediate_operand")
(match_operand:<VnSI> 1 "register_operand")
(match_operand 2 "immediate_operand")
(match_operand:SI 3 "gcn_alu_operand")
- (match_operand:V_ALL 4 "register_operand")
+ (match_operand:V_MOV 4 "register_operand")
(match_operand:DI 5 "")]
""
{
if (vgpr_2reg_mode_p (mode))
return 2;
/* TImode is used by DImode compare_and_swap. */
- if (mode == TImode)
+ if (vgpr_4reg_mode_p (mode))
return 4;
}
else if (rclass == VCC_CONDITIONAL_REG && mode == BImode)
Therefore, we restrict ourselved to aligned registers. */
return (vgpr_1reg_mode_p (mode)
|| (!((regno - FIRST_VGPR_REG) & 1) && vgpr_2reg_mode_p (mode))
- /* TImode is used by DImode compare_and_swap. */
- || (mode == TImode
- && !((regno - FIRST_VGPR_REG) & 3)));
+ /* TImode is used by DImode compare_and_swap,
+ and by DIVMOD V64DImode libfuncs. */
+ || (!((regno - FIRST_VGPR_REG) & 3) && vgpr_4reg_mode_p (mode)));
return false;
}
GEN_VN (PREFIX, sf##SUFFIX, A(PARAMS), A(ARGS)) \
GEN_VN (PREFIX, di##SUFFIX, A(PARAMS), A(ARGS)) \
GEN_VN (PREFIX, df##SUFFIX, A(PARAMS), A(ARGS)) \
+USE_TI (GEN_VN (PREFIX, ti##SUFFIX, A(PARAMS), A(ARGS))) \
static rtx \
gen_##PREFIX##vNm##SUFFIX (PARAMS, rtx merge_src=NULL, rtx exec=NULL) \
{ \
case E_SFmode: return gen_##PREFIX##vNsf##SUFFIX (ARGS, merge_src, exec); \
case E_DImode: return gen_##PREFIX##vNdi##SUFFIX (ARGS, merge_src, exec); \
case E_DFmode: return gen_##PREFIX##vNdf##SUFFIX (ARGS, merge_src, exec); \
+ case E_TImode: \
+ USE_TI (return gen_##PREFIX##vNti##SUFFIX (ARGS, merge_src, exec);) \
default: \
break; \
} \
return NULL_RTX; \
}
+/* These have TImode support. */
+#define USE_TI(ARGS) ARGS
+GEN_VNM (mov,, A(rtx dest, rtx src), A(dest, src))
+GEN_VNM (vec_duplicate,, A(rtx dest, rtx src), A(dest, src))
+
+/* These do not have TImode support. */
+#undef USE_TI
+#define USE_TI(ARGS)
GEN_VNM (add,3, A(rtx dest, rtx src1, rtx src2), A(dest, src1, src2))
GEN_VN (add,si3_dup, A(rtx dest, rtx src1, rtx src2), A(dest, src1, src2))
GEN_VN (add,si3_vcc_dup, A(rtx dest, rtx src1, rtx src2, rtx vcc),
A(dest, addr, src, exec))
GEN_VNM (gather,_expr, A(rtx dest, rtx addr, rtx as, rtx vol),
A(dest, addr, as, vol))
-GEN_VNM (mov,, A(rtx dest, rtx src), A(dest, src))
GEN_VN (mul,si3_dup, A(rtx dest, rtx src1, rtx src2), A(dest, src1, src2))
GEN_VN (sub,si3, A(rtx dest, rtx src1, rtx src2), A(dest, src1, src2))
-GEN_VNM (vec_duplicate,, A(rtx dest, rtx src), A(dest, src))
GEN_VN_NOEXEC (vec_series,si, A(rtx dest, rtx x, rtx c), A(dest, x, c))
+#undef USE_TI
#undef GEN_VNM
#undef GEN_VN
#undef GET_VN_FN
CODE_FOR (PREFIX, sf) \
CODE_FOR (PREFIX, di) \
CODE_FOR (PREFIX, df) \
+ CODE_FOR (PREFIX, ti) \
static int \
get_code_for_##PREFIX (machine_mode mode) \
{ \
case E_SFmode: return get_code_for_##PREFIX##vNsf (vf); \
case E_DImode: return get_code_for_##PREFIX##vNdi (vf); \
case E_DFmode: return get_code_for_##PREFIX##vNdf (vf); \
+ case E_TImode: return get_code_for_##PREFIX##vNti (vf); \
default: break; \
} \
\
|| mode == V4SFmode || mode == V4DFmode
|| mode == V2QImode || mode == V2HImode
|| mode == V2SImode || mode == V2DImode
- || mode == V2SFmode || mode == V2DFmode);
+ || mode == V2SFmode || mode == V2DFmode
+ /* TImode vectors are allowed to exist for divmod, but there
+ are almost no instructions defined for them, and the
+ autovectorizer does not use them. */
+ || mode == V64TImode || mode == V32TImode
+ || mode == V16TImode || mode == V8TImode
+ || mode == V4TImode || mode == V2TImode);
}
/* Implement TARGET_VECTORIZE_PREFERRED_SIMD_MODE.
O - print offset:n for data share operations.
^ - print "_co" suffix for GCN5 mnemonics
g - print "glc", if appropriate for given MEM
+ L - print low-part of a multi-reg value
+ H - print second part of a multi-reg value (high-part of 2-reg value)
+ J - print third part of a multi-reg value
+ K - print fourth part of a multi-reg value
*/
void
case 'H':
print_operand (file, gcn_operand_part (GET_MODE (x), x, 1), 0);
return;
+ case 'J':
+ print_operand (file, gcn_operand_part (GET_MODE (x), x, 2), 0);
+ return;
+ case 'K':
+ print_operand (file, gcn_operand_part (GET_MODE (x), x, 3), 0);
+ return;
case 'R':
/* Print a scalar register number as an integer. Temporary hack. */
gcc_assert (REG_P (x));