COSTS_N_BYTES (4), /* cost of CVT(T)PS2PI instruction. */
1, 1, 1, 1, /* reassoc int, fp, vec_int, vec_fp. */
+ {1, 1, 1}, /* latency times throughput of
+ FMA/DOT_PROD_EXPR/SAD_EXPR,
+ it's used to determine unroll
+ factor in the vectorizer. */
+ 1, /* Limit how much the autovectorizer
+ may unroll a loop. */
ix86_size_memcpy,
ix86_size_memset,
COSTS_N_BYTES (1), /* cond_taken_branch_cost. */
COSTS_N_INSNS (27), /* cost of CVTPI2PS instruction. */
COSTS_N_INSNS (27), /* cost of CVT(T)PS2PI instruction. */
1, 1, 1, 1, /* reassoc int, fp, vec_int, vec_fp. */
+ {1, 1, 1}, /* latency times throughput of
+ FMA/DOT_PROD_EXPR/SAD_EXPR,
+ it's used to determine unroll
+ factor in the vectorizer. */
+ 1, /* Limit how much the autovectorizer
+ may unroll a loop. */
i386_memcpy,
i386_memset,
COSTS_N_INSNS (3), /* cond_taken_branch_cost. */
COSTS_N_INSNS (27), /* cost of CVTPI2PS instruction. */
COSTS_N_INSNS (27), /* cost of CVT(T)PS2PI instruction. */
1, 1, 1, 1, /* reassoc int, fp, vec_int, vec_fp. */
+ {1, 1, 1}, /* latency times throughput of
+ FMA/DOT_PROD_EXPR/SAD_EXPR,
+ it's used to determine unroll
+ factor in the vectorizer. */
+ 1, /* Limit how much the autovectorizer
+ may unroll a loop. */
i486_memcpy,
i486_memset,
COSTS_N_INSNS (3), /* cond_taken_branch_cost. */
COSTS_N_INSNS (3), /* cost of CVTPI2PS instruction. */
COSTS_N_INSNS (3), /* cost of CVT(T)PS2PI instruction. */
1, 1, 1, 1, /* reassoc int, fp, vec_int, vec_fp. */
+ {1, 1, 1}, /* latency times throughput of
+ FMA/DOT_PROD_EXPR/SAD_EXPR,
+ it's used to determine unroll
+ factor in the vectorizer. */
+ 1, /* Limit how much the autovectorizer
+ may unroll a loop. */
pentium_memcpy,
pentium_memset,
COSTS_N_INSNS (3), /* cond_taken_branch_cost. */
COSTS_N_INSNS (5), /* cost of CVTPI2PS instruction. */
COSTS_N_INSNS (5), /* cost of CVT(T)PS2PI instruction. */
1, 1, 1, 1, /* reassoc int, fp, vec_int, vec_fp. */
+ {1, 1, 1}, /* latency times throughput of
+ FMA/DOT_PROD_EXPR/SAD_EXPR,
+ it's used to determine unroll
+ factor in the vectorizer. */
+ 1, /* Limit how much the autovectorizer
+ may unroll a loop. */
pentium_memcpy,
pentium_memset,
COSTS_N_INSNS (3), /* cond_taken_branch_cost. */
COSTS_N_INSNS (3), /* cost of CVTPI2PS instruction. */
COSTS_N_INSNS (3), /* cost of CVT(T)PS2PI instruction. */
1, 1, 1, 1, /* reassoc int, fp, vec_int, vec_fp. */
+ {1, 1, 1}, /* latency times throughput of
+ FMA/DOT_PROD_EXPR/SAD_EXPR,
+ it's used to determine unroll
+ factor in the vectorizer. */
+ 1, /* Limit how much the autovectorizer
+ may unroll a loop. */
pentiumpro_memcpy,
pentiumpro_memset,
COSTS_N_INSNS (3), /* cond_taken_branch_cost. */
COSTS_N_INSNS (6), /* cost of CVTPI2PS instruction. */
COSTS_N_INSNS (6), /* cost of CVT(T)PS2PI instruction. */
1, 1, 1, 1, /* reassoc int, fp, vec_int, vec_fp. */
+ {1, 1, 1}, /* latency times throughput of
+ FMA/DOT_PROD_EXPR/SAD_EXPR,
+ it's used to determine unroll
+ factor in the vectorizer. */
+ 1, /* Limit how much the autovectorizer
+ may unroll a loop. */
geode_memcpy,
geode_memset,
COSTS_N_INSNS (3), /* cond_taken_branch_cost. */
COSTS_N_INSNS (2), /* cost of CVTPI2PS instruction. */
COSTS_N_INSNS (2), /* cost of CVT(T)PS2PI instruction. */
1, 1, 1, 1, /* reassoc int, fp, vec_int, vec_fp. */
+ {1, 1, 1}, /* latency times throughput of
+ FMA/DOT_PROD_EXPR/SAD_EXPR,
+ it's used to determine unroll
+ factor in the vectorizer. */
+ 1, /* Limit how much the autovectorizer
+ may unroll a loop. */
k6_memcpy,
k6_memset,
COSTS_N_INSNS (3), /* cond_taken_branch_cost. */
COSTS_N_INSNS (4), /* cost of CVTPI2PS instruction. */
COSTS_N_INSNS (6), /* cost of CVT(T)PS2PI instruction. */
1, 1, 1, 1, /* reassoc int, fp, vec_int, vec_fp. */
+ {1, 1, 1}, /* latency times throughput of
+ FMA/DOT_PROD_EXPR/SAD_EXPR,
+ it's used to determine unroll
+ factor in the vectorizer. */
+ 1, /* Limit how much the autovectorizer
+ may unroll a loop. */
athlon_memcpy,
athlon_memset,
COSTS_N_INSNS (3), /* cond_taken_branch_cost. */
COSTS_N_INSNS (4), /* cost of CVTPI2PS instruction. */
COSTS_N_INSNS (5), /* cost of CVT(T)PS2PI instruction. */
1, 1, 1, 1, /* reassoc int, fp, vec_int, vec_fp. */
+ {1, 1, 1}, /* latency times throughput of
+ FMA/DOT_PROD_EXPR/SAD_EXPR,
+ it's used to determine unroll
+ factor in the vectorizer. */
+ 1, /* Limit how much the autovectorizer
+ may unroll a loop. */
k8_memcpy,
k8_memset,
COSTS_N_INSNS (3), /* cond_taken_branch_cost. */
COSTS_N_INSNS (7), /* cost of CVTPI2PS instruction. */
COSTS_N_INSNS (4), /* cost of CVT(T)PS2PI instruction. */
1, 1, 1, 1, /* reassoc int, fp, vec_int, vec_fp. */
+ {1, 1, 1}, /* latency times throughput of
+ FMA/DOT_PROD_EXPR/SAD_EXPR,
+ it's used to determine unroll
+ factor in the vectorizer. */
+ 1, /* Limit how much the autovectorizer
+ may unroll a loop. */
amdfam10_memcpy,
amdfam10_memset,
COSTS_N_INSNS (2), /* cond_taken_branch_cost. */
COSTS_N_INSNS (4), /* cost of CVTPI2PS instruction. */
COSTS_N_INSNS (4), /* cost of CVT(T)PS2PI instruction. */
1, 2, 1, 1, /* reassoc int, fp, vec_int, vec_fp. */
+ {1, 1, 1}, /* latency times throughput of
+ FMA/DOT_PROD_EXPR/SAD_EXPR,
+ it's used to determine unroll
+ factor in the vectorizer. */
+ 1, /* Limit how much the autovectorizer
+ may unroll a loop. */
bdver_memcpy,
bdver_memset,
COSTS_N_INSNS (4), /* cond_taken_branch_cost. */
plus/minus operations per cycle but only one multiply. This is adjusted
in ix86_reassociation_width. */
4, 4, 3, 6, /* reassoc int, fp, vec_int, vec_fp. */
+ {5, 1, 3}, /* latency times throughput of
+ FMA/DOT_PROD_EXPR/SAD_EXPR,
+ it's used to determine unroll
+ factor in the vectorizer. */
+ 4, /* Limit how much the autovectorizer
+ may unroll a loop. */
znver1_memcpy,
znver1_memset,
COSTS_N_INSNS (4), /* cond_taken_branch_cost. */
plus/minus operations per cycle but only one multiply. This is adjusted
in ix86_reassociation_width. */
4, 4, 3, 6, /* reassoc int, fp, vec_int, vec_fp. */
+ {10, 1, 3}, /* latency times throughput of
+ FMA/DOT_PROD_EXPR/SAD_EXPR,
+ it's used to determine unroll
+ factor in the vectorizer. */
+ 4, /* Limit how much the autovectorizer
+ may unroll a loop. */
znver2_memcpy,
znver2_memset,
COSTS_N_INSNS (4), /* cond_taken_branch_cost. */
plus/minus operations per cycle but only one multiply. This is adjusted
in ix86_reassociation_width. */
4, 4, 3, 6, /* reassoc int, fp, vec_int, vec_fp. */
+ {8, 1, 6}, /* latency times throughput of
+ FMA/DOT_PROD_EXPR/SAD_EXPR,
+ it's used to determine unroll
+ factor in the vectorizer. */
+ 4, /* Limit how much the autovectorizer
+ may unroll a loop. */
znver2_memcpy,
znver2_memset,
COSTS_N_INSNS (4), /* cond_taken_branch_cost. */
plus/minus operations per cycle but only one multiply. This is adjusted
in ix86_reassociation_width. */
4, 4, 3, 6, /* reassoc int, fp, vec_int, vec_fp. */
+ {8, 8, 6}, /* latency times throughput of
+ FMA/DOT_PROD_EXPR/SAD_EXPR,
+ it's used to determine unroll
+ factor in the vectorizer. */
+ 4, /* Limit how much the autovectorizer
+ may unroll a loop. */
znver2_memcpy,
znver2_memset,
COSTS_N_INSNS (4), /* cond_taken_branch_cost. */
We increase width to 6 for multiplications
in ix86_reassociation_width. */
6, 6, 4, 6, /* reassoc int, fp, vec_int, vec_fp. */
+ {8, 8, 6}, /* latency times throughput of
+ FMA/DOT_PROD_EXPR/SAD_EXPR,
+ it's used to determine unroll
+ factor in the vectorizer. */
+ 4, /* Limit how much the autovectorizer
+ may unroll a loop. */
znver2_memcpy,
znver2_memset,
COSTS_N_INSNS (4), /* cond_taken_branch_cost. */
COSTS_N_INSNS (6), /* cost of CVTPI2PS instruction. */
COSTS_N_INSNS (7), /* cost of CVT(T)PS2PI instruction. */
1, 4, 2, 2, /* reassoc int, fp, vec_int, vec_fp. */
+ {8, 1, 3}, /* latency times throughput of
+ FMA/DOT_PROD_EXPR/SAD_EXPR,
+ it's used to determine unroll
+ factor in the vectorizer. */
+ 4, /* Limit how much the autovectorizer
+ may unroll a loop. */
skylake_memcpy,
skylake_memset,
COSTS_N_INSNS (3), /* cond_taken_branch_cost. */
COSTS_N_INSNS (7), /* cost of CVTPI2PS instruction. */
COSTS_N_INSNS (6), /* cost of CVT(T)PS2PI instruction. */
1, 4, 2, 2, /* reassoc int, fp, vec_int, vec_fp. */
+ {8, 10, 3}, /* latency times throughput of
+ FMA/DOT_PROD_EXPR/SAD_EXPR,
+ it's used to determine unroll
+ factor in the vectorizer. */
+ 4, /* Limit how much the autovectorizer
+ may unroll a loop. */
icelake_memcpy,
icelake_memset,
COSTS_N_INSNS (3), /* cond_taken_branch_cost. */
COSTS_N_INSNS (7), /* cost of CVTPI2PS instruction. */
COSTS_N_INSNS (6), /* cost of CVT(T)PS2PI instruction. */
1, 4, 3, 3, /* reassoc int, fp, vec_int, vec_fp. */
+ {8, 8, 3}, /* latency times throughput of
+ FMA/DOT_PROD_EXPR/SAD_EXPR,
+ it's used to determine unroll
+ factor in the vectorizer. */
+ 4, /* Limit how much the autovectorizer
+ may unroll a loop. */
alderlake_memcpy,
alderlake_memset,
COSTS_N_INSNS (4), /* cond_taken_branch_cost. */
COSTS_N_INSNS (4), /* cost of CVTPI2PS instruction. */
COSTS_N_INSNS (4), /* cost of CVT(T)PS2PI instruction. */
1, 1, 1, 1, /* reassoc int, fp, vec_int, vec_fp. */
+ {1, 1, 1}, /* latency times throughput of
+ FMA/DOT_PROD_EXPR/SAD_EXPR,
+ it's used to determine unroll
+ factor in the vectorizer. */
+ 1, /* Limit how much the autovectorizer
+ may unroll a loop. */
btver1_memcpy,
btver1_memset,
COSTS_N_INSNS (2), /* cond_taken_branch_cost. */
COSTS_N_INSNS (4), /* cost of CVTPI2PS instruction. */
COSTS_N_INSNS (4), /* cost of CVT(T)PS2PI instruction. */
1, 1, 1, 1, /* reassoc int, fp, vec_int, vec_fp. */
+ {1, 1, 1}, /* latency times throughput of
+ FMA/DOT_PROD_EXPR/SAD_EXPR,
+ it's used to determine unroll
+ factor in the vectorizer. */
+ 1, /* Limit how much the autovectorizer
+ may unroll a loop. */
btver2_memcpy,
btver2_memset,
COSTS_N_INSNS (2), /* cond_taken_branch_cost. */
COSTS_N_INSNS (12), /* cost of CVTPI2PS instruction. */
COSTS_N_INSNS (8), /* cost of CVT(T)PS2PI instruction. */
1, 1, 1, 1, /* reassoc int, fp, vec_int, vec_fp. */
+ {1, 1, 1}, /* latency times throughput of
+ FMA/DOT_PROD_EXPR/SAD_EXPR,
+ it's used to determine unroll
+ factor in the vectorizer. */
+ 1, /* Limit how much the autovectorizer
+ may unroll a loop. */
pentium4_memcpy,
pentium4_memset,
COSTS_N_INSNS (3), /* cond_taken_branch_cost. */
COSTS_N_INSNS (12), /* cost of CVTPI2PS instruction. */
COSTS_N_INSNS (8), /* cost of CVT(T)PS2PI instruction. */
1, 1, 1, 1, /* reassoc int, fp, vec_int, vec_fp. */
+ {1, 1, 1}, /* latency times throughput of
+ FMA/DOT_PROD_EXPR/SAD_EXPR,
+ it's used to determine unroll
+ factor in the vectorizer. */
+ 1, /* Limit how much the autovectorizer
+ may unroll a loop. */
nocona_memcpy,
nocona_memset,
COSTS_N_INSNS (3), /* cond_taken_branch_cost. */
COSTS_N_INSNS (6), /* cost of CVTPI2PS instruction. */
COSTS_N_INSNS (4), /* cost of CVT(T)PS2PI instruction. */
2, 2, 2, 2, /* reassoc int, fp, vec_int, vec_fp. */
+ {8, 8, 3}, /* latency times throughput of
+ FMA/DOT_PROD_EXPR/SAD_EXPR,
+ it's used to determine unroll
+ factor in the vectorizer. */
+ 2, /* Limit how much the autovectorizer
+ may unroll a loop. */
atom_memcpy,
atom_memset,
COSTS_N_INSNS (3), /* cond_taken_branch_cost. */
COSTS_N_INSNS (4), /* cost of CVTPI2PS instruction. */
COSTS_N_INSNS (4), /* cost of CVT(T)PS2PI instruction. */
1, 2, 1, 1, /* reassoc int, fp, vec_int, vec_fp. */
+ {8, 8, 3}, /* latency times throughput of
+ FMA/DOT_PROD_EXPR/SAD_EXPR,
+ it's used to determine unroll
+ factor in the vectorizer. */
+ 1, /* Limit how much the autovectorizer
+ may unroll a loop. */
slm_memcpy,
slm_memset,
COSTS_N_INSNS (3), /* cond_taken_branch_cost. */
COSTS_N_INSNS (4), /* cost of CVTPI2PS instruction. */
COSTS_N_INSNS (4), /* cost of CVT(T)PS2PI instruction. */
1, 4, 3, 3, /* reassoc int, fp, vec_int, vec_fp. */
+ {8, 1, 3}, /* latency times throughput of
+ FMA/DOT_PROD_EXPR/SAD_EXPR,
+ it's used to determine unroll
+ factor in the vectorizer. */
+ 4, /* Limit how much the autovectorizer
+ may unroll a loop. */
tremont_memcpy,
tremont_memset,
COSTS_N_INSNS (4), /* cond_taken_branch_cost. */
COSTS_N_INSNS (3), /* cost of CVTPI2PS instruction. */
COSTS_N_INSNS (3), /* cost of CVT(T)PS2PI instruction. */
1, 4, 3, 3, /* reassoc int, fp, vec_int, vec_fp. */
+ {8, 1, 3}, /* latency times throughput of
+ FMA/DOT_PROD_EXPR/SAD_EXPR,
+ it's used to determine unroll
+ factor in the vectorizer. */
+ 4, /* Limit how much the autovectorizer
+ may unroll a loop. */
lujiazui_memcpy,
lujiazui_memset,
COSTS_N_INSNS (4), /* cond_taken_branch_cost. */
COSTS_N_INSNS (3), /* cost of CVTPI2PS instruction. */
COSTS_N_INSNS (3), /* cost of CVT(T)PS2PI instruction. */
4, 4, 4, 4, /* reassoc int, fp, vec_int, vec_fp. */
+ {8, 1, 3}, /* latency times throughput of
+ FMA/DOT_PROD_EXPR/SAD_EXPR,
+ it's used to determine unroll
+ factor in the vectorizer. */
+ 1, /* Limit how much the autovectorizer
+ may unroll a loop. */
yongfeng_memcpy,
yongfeng_memset,
COSTS_N_INSNS (3), /* cond_taken_branch_cost. */
COSTS_N_INSNS (3), /* cost of CVTPI2PS instruction. */
COSTS_N_INSNS (3), /* cost of CVT(T)PS2PI instruction. */
4, 4, 4, 4, /* reassoc int, fp, vec_int, vec_fp. */
+ {8, 1, 3}, /* latency times throughput of
+ FMA/DOT_PROD_EXPR/SAD_EXPR,
+ it's used to determine unroll
+ factor in the vectorizer. */
+ 1, /* Limit how much the autovectorizer
+ may unroll a loop. */
shijidadao_memcpy,
shijidadao_memset,
COSTS_N_INSNS (3), /* cond_taken_branch_cost. */
COSTS_N_INSNS (3), /* cost of CVTPI2PS instruction. */
COSTS_N_INSNS (3), /* cost of CVT(T)PS2PI instruction. */
1, 4, 3, 3, /* reassoc int, fp, vec_int, vec_fp. */
+ {8, 8, 3}, /* latency times throughput of
+ FMA/DOT_PROD_EXPR/SAD_EXPR,
+ it's used to determine unroll
+ factor in the vectorizer. */
+ 4, /* Limit how much the autovectorizer
+ may unroll a loop. */
generic_memcpy,
generic_memset,
COSTS_N_INSNS (4), /* cond_taken_branch_cost. */
COSTS_N_INSNS (6), /* cost of CVTPI2PS instruction. */
COSTS_N_INSNS (7), /* cost of CVT(T)PS2PI instruction. */
1, 4, 2, 2, /* reassoc int, fp, vec_int, vec_fp. */
+ {8, 1, 3}, /* latency times throughput of
+ FMA/DOT_PROD_EXPR/SAD_EXPR,
+ it's used to determine unroll
+ factor in the vectorizer. */
+ 1, /* Limit how much the autovectorizer
+ may unroll a loop. */
core_memcpy,
core_memset,
COSTS_N_INSNS (3), /* cond_taken_branch_cost. */