From: Ganesh Gopalasubramanian Date: Mon, 18 Nov 2013 09:25:21 +0000 (+0000) Subject: AMD bdver4 enablement X-Git-Tag: releases/gcc-4.9.0~2718 X-Git-Url: http://git.ipfire.org/gitweb.cgi?a=commitdiff_plain;h=ed97ad4709f095da78aa0a4f5653b9509984d579;p=thirdparty%2Fgcc.git AMD bdver4 enablement From-SVN: r204939 --- diff --git a/gcc/ChangeLog b/gcc/ChangeLog index 725f66d34c46..7f97e31708fe 100644 --- a/gcc/ChangeLog +++ b/gcc/ChangeLog @@ -1,3 +1,34 @@ +2013-11-12 Ganesh Gopalasubramanian + + * config.gcc (i[34567]86-*-linux* | ...): Add bdver4. + (case ${target}): Add bdver4. + * config/i386/bdver3.md: Add bdver4. + * config/i386/driver-i386.c: (host_detect_local_cpu): Let + -march=native recognize bdver4 processors. + * config/i386/i386-c.c (ix86_target_macros_internal): Add + bdver4 def_and_undef + * config/i386/i386.c (struct processor_costs bdver4_cost): New. + (m_BDVER4): New definition. + (m_AMD_MULTIPLE): Includes m_BDVER4. + (processor_target_table): Add bdver4 entry. + (static const char *const cpu_names): Add bdver4 entry. + (software_prefetching_beneficial_p): Add bdver3. + (ix86_option_override_internal): Add bdver4 instruction sets. + (ix86_issue_rate): Add bdver4. + (ix86_adjust_cost): Add bdver4. + (ia32_multipass_dfa_lookahead): Add bdver4. + (enum processor_model): Add M_AMDFAM15H_BDVER4. + (struct _arch_names_table): Add M_AMDFAM15H_BDVER4. + (has_dispatch): Add bdver4. + * config/i386/i386.h (TARGET_BDVER4): New definition. + (enum target_cpu_default): Add TARGET_CPU_DEFAULT_bdver4. + (enum processor_type): Add PROCESSOR_BDVER4. + * config/i386/i386.md (define_attr "cpu"): Add bdver4. + * config/i386/i386.opt (flag_dispatch_scheduler): Add bdver4. + * gcc/doc/extend.texi: Add details about bdver4. + * gcc/doc/invoke.texi: Add details about bdver4. Add + fma4 and fsgsbase for bdver3. Add fma4 for bdver2. + 2013-11-17 Ulrich Weigand * config/rs6000/rs6000.c (rs6000_emit_move): Use low word of diff --git a/gcc/config.gcc b/gcc/config.gcc index fafa8b87e07f..2907018987d8 100644 --- a/gcc/config.gcc +++ b/gcc/config.gcc @@ -1398,7 +1398,7 @@ i[34567]86-*-linux* | i[34567]86-*-kfreebsd*-gnu | i[34567]86-*-knetbsd*-gnu | i TM_MULTILIB_CONFIG=`echo $TM_MULTILIB_CONFIG | sed 's/^,//'` need_64bit_isa=yes case X"${with_cpu}" in - Xgeneric|Xatom|Xslm|Xcore2|Xcorei7|Xcorei7-avx|Xnocona|Xx86-64|Xbdver3|Xbdver2|Xbdver1|Xbtver2|Xbtver1|Xamdfam10|Xbarcelona|Xk8|Xopteron|Xathlon64|Xathlon-fx|Xathlon64-sse3|Xk8-sse3|Xopteron-sse3) + Xgeneric|Xatom|Xslm|Xcore2|Xcorei7|Xcorei7-avx|Xnocona|Xx86-64|Xbdver4|Xbdver3|Xbdver2|Xbdver1|Xbtver2|Xbtver1|Xamdfam10|Xbarcelona|Xk8|Xopteron|Xathlon64|Xathlon-fx|Xathlon64-sse3|Xk8-sse3|Xopteron-sse3) ;; X) if test x$with_cpu_64 = x; then @@ -1407,7 +1407,7 @@ i[34567]86-*-linux* | i[34567]86-*-kfreebsd*-gnu | i[34567]86-*-knetbsd*-gnu | i ;; *) echo "Unsupported CPU used in --with-cpu=$with_cpu, supported values:" 1>&2 - echo "generic atom slm core2 corei7 corei7-avx nocona x86-64 bdver3 bdver2 bdver1 btver2 btver1 amdfam10 barcelona k8 opteron athlon64 athlon-fx athlon64-sse3 k8-sse3 opteron-sse3" 1>&2 + echo "generic atom slm core2 corei7 corei7-avx nocona x86-64 bdver4 bdver3 bdver2 bdver1 btver2 btver1 amdfam10 barcelona k8 opteron athlon64 athlon-fx athlon64-sse3 k8-sse3 opteron-sse3" 1>&2 exit 1 ;; esac @@ -1519,7 +1519,7 @@ i[34567]86-*-solaris2* | x86_64-*-solaris2.1[0-9]*) tmake_file="$tmake_file i386/t-sol2-64" need_64bit_isa=yes case X"${with_cpu}" in - Xgeneric|Xatom|Xslm|Xcore2|Xcorei7|Xcorei7-avx|Xnocona|Xx86-64|Xbdver3|Xbdver2|Xbdver1|Xbtver2|Xbtver1|Xamdfam10|Xbarcelona|Xk8|Xopteron|Xathlon64|Xathlon-fx|Xathlon64-sse3|Xk8-sse3|Xopteron-sse3) + Xgeneric|Xatom|Xslm|Xcore2|Xcorei7|Xcorei7-avx|Xnocona|Xx86-64|Xbdver4|Xbdver3|Xbdver2|Xbdver1|Xbtver2|Xbtver1|Xamdfam10|Xbarcelona|Xk8|Xopteron|Xathlon64|Xathlon-fx|Xathlon64-sse3|Xk8-sse3|Xopteron-sse3) ;; X) if test x$with_cpu_64 = x; then @@ -1528,7 +1528,7 @@ i[34567]86-*-solaris2* | x86_64-*-solaris2.1[0-9]*) ;; *) echo "Unsupported CPU used in --with-cpu=$with_cpu, supported values:" 1>&2 - echo "generic atom slm core2 corei7 corei7-avx nocona x86-64 bdver3 bdver2 bdver1 btver2 btver1 amdfam10 barcelona k8 opteron athlon64 athlon-fx athlon64-sse3 k8-sse3 opteron-sse3" 1>&2 + echo "generic atom slm core2 corei7 corei7-avx nocona x86-64 bdver4 bdver3 bdver2 bdver1 btver2 btver1 amdfam10 barcelona k8 opteron athlon64 athlon-fx athlon64-sse3 k8-sse3 opteron-sse3" 1>&2 exit 1 ;; esac @@ -1604,7 +1604,7 @@ i[34567]86-*-mingw* | x86_64-*-mingw*) if test x$enable_targets = xall; then tm_defines="${tm_defines} TARGET_BI_ARCH=1" case X"${with_cpu}" in - Xgeneric|Xatom|Xslm|Xcore2|Xcorei7|Xcorei7-avx|Xnocona|Xx86-64|Xbdver3|Xbdver2|Xbdver1|Xbtver2|Xbtver1|Xamdfam10|Xbarcelona|Xk8|Xopteron|Xathlon64|Xathlon-fx|Xathlon64-sse3|Xk8-sse3|Xopteron-sse3) + Xgeneric|Xatom|Xslm|Xcore2|Xcorei7|Xcorei7-avx|Xnocona|Xx86-64|Xbdver4|Xbdver3|Xbdver2|Xbdver1|Xbtver2|Xbtver1|Xamdfam10|Xbarcelona|Xk8|Xopteron|Xathlon64|Xathlon-fx|Xathlon64-sse3|Xk8-sse3|Xopteron-sse3) ;; X) if test x$with_cpu_64 = x; then @@ -1613,7 +1613,7 @@ i[34567]86-*-mingw* | x86_64-*-mingw*) ;; *) echo "Unsupported CPU used in --with-cpu=$with_cpu, supported values:" 1>&2 - echo "generic atom slm core2 corei7 Xcorei7-avx nocona x86-64 bdver3 bdver2 bdver1 btver2 btver1 amdfam10 barcelona k8 opteron athlon64 athlon-fx athlon64-sse3 k8-sse3 opteron-sse3" 1>&2 + echo "generic atom slm core2 corei7 Xcorei7-avx nocona x86-64 bdver4 bdver3 bdver2 bdver1 btver2 btver1 amdfam10 barcelona k8 opteron athlon64 athlon-fx athlon64-sse3 k8-sse3 opteron-sse3" 1>&2 exit 1 ;; esac @@ -2911,6 +2911,10 @@ case ${target} in ;; i686-*-* | i786-*-*) case ${target_noncanonical} in + bdver4-*) + arch=bdver4 + cpu=bdver4 + ;; bdver3-*) arch=bdver3 cpu=bdver3 @@ -3020,6 +3024,10 @@ case ${target} in ;; x86_64-*-*) case ${target_noncanonical} in + bdver4-*) + arch=bdver4 + cpu=bdver4 + ;; bdver3-*) arch=bdver3 cpu=bdver3 @@ -3658,9 +3666,10 @@ case "${target}" in ;; "" | x86-64 | generic | native \ | k8 | k8-sse3 | athlon64 | athlon64-sse3 | opteron \ - | opteron-sse3 | athlon-fx | bdver3 | bdver2 | bdver1 | btver2 \ - | btver1 | amdfam10 | barcelona | nocona | core2 | corei7 \ - | corei7-avx | core-avx-i | core-avx2 | atom | slm) + | opteron-sse3 | athlon-fx | bdver4 | bdver3 | bdver2 \ + | bdver1 | btver2 | btver1 | amdfam10 | barcelona \ + | nocona | core2 | corei7 | corei7-avx | core-avx-i \ + | core-avx2 | atom | slm) # OK ;; *) diff --git a/gcc/config/i386/bdver3.md b/gcc/config/i386/bdver3.md index 421a3d1b30ef..019e9291b1b5 100644 --- a/gcc/config/i386/bdver3.md +++ b/gcc/config/i386/bdver3.md @@ -16,19 +16,19 @@ ;; along with GCC; see the file COPYING3. If not see ;; . ;; -;; AMD bdver3 Scheduling +;; AMD bdver3 and bdver4 Scheduling ;; -;; The bdver3 contains three pipelined FP units and two integer units. -;; Fetching and decoding logic is different from previous fam15 processors. -;; Fetching is done every two cycles rather than every cycle and -;; two decode units are available. The decode units therefore decode +;; The bdver3 and bdver4 contains three pipelined FP units and two integer +;; units. ;; Fetching and decoding logic is different from previous fam15 +;; processors. Fetching is done every two cycles rather than every cycle +;; and two decode units are available. The decode units therefore decode ;; four instructions in two cycles. ;; ;; The load/store queue unit is not attached to the schedulers but ;; communicates with all the execution units separately instead. ;; -;; bdver3 belong to fam15 processors. We use the same insn attribute -;; that was used for bdver1 decoding scheme. +;; bdver3 and bdver4 belong to fam15 processors. We use the same insn +;; attribute that was used for bdver1 decoding scheme. (define_automaton "bdver3,bdver3_ieu,bdver3_load,bdver3_fp,bdver3_agu") @@ -102,90 +102,90 @@ ;; Jump instructions are executed in the branch unit completely transparent to us. (define_insn_reservation "bdver3_call" 2 - (and (eq_attr "cpu" "bdver3") + (and (eq_attr "cpu" "bdver3,bdver4") (eq_attr "type" "call,callv")) "bdver3-double,(bdver3-agu | bdver3-ieu),nothing") ;; PUSH mem is double path. (define_insn_reservation "bdver3_push" 1 - (and (eq_attr "cpu" "bdver3") + (and (eq_attr "cpu" "bdver3,bdver4") (eq_attr "type" "push")) "bdver3-direct,bdver3-ieu,bdver3-store") ;; POP r16/mem are double path. (define_insn_reservation "bdver3_pop" 1 - (and (eq_attr "cpu" "bdver3") + (and (eq_attr "cpu" "bdver3,bdver4") (eq_attr "type" "pop")) "bdver3-direct,bdver3-ivector") ;; LEAVE no latency info so far, assume same with amdfam10. (define_insn_reservation "bdver3_leave" 3 - (and (eq_attr "cpu" "bdver3") + (and (eq_attr "cpu" "bdver3,bdver4") (eq_attr "type" "leave")) "bdver3-vector,bdver3-ivector") ;; LEA executes in AGU unit with 1 cycle latency on BDVER3. (define_insn_reservation "bdver3_lea" 1 - (and (eq_attr "cpu" "bdver3") + (and (eq_attr "cpu" "bdver3,bdver4") (eq_attr "type" "lea")) "bdver3-direct,bdver3-ieu") ;; MUL executes in special multiplier unit attached to IEU1. (define_insn_reservation "bdver3_imul_DI" 6 - (and (eq_attr "cpu" "bdver3") + (and (eq_attr "cpu" "bdver3,bdver4") (and (eq_attr "type" "imul") (and (eq_attr "mode" "DI") (eq_attr "memory" "none,unknown")))) "bdver3-direct,bdver3-ieu1") (define_insn_reservation "bdver3_imul" 4 - (and (eq_attr "cpu" "bdver3") + (and (eq_attr "cpu" "bdver3,bdver4") (and (eq_attr "type" "imul") (eq_attr "memory" "none,unknown"))) "bdver3-direct,bdver3-ieu1") (define_insn_reservation "bdver3_imul_mem_DI" 10 - (and (eq_attr "cpu" "bdver3") + (and (eq_attr "cpu" "bdver3,bdver4") (and (eq_attr "type" "imul") (and (eq_attr "mode" "DI") (eq_attr "memory" "load,both")))) "bdver3-direct,bdver3-load,bdver3-ieu1") (define_insn_reservation "bdver3_imul_mem" 8 - (and (eq_attr "cpu" "bdver3") + (and (eq_attr "cpu" "bdver3,bdver4") (and (eq_attr "type" "imul") (eq_attr "memory" "load,both"))) "bdver3-direct,bdver3-load,bdver3-ieu1") (define_insn_reservation "bdver3_str" 6 - (and (eq_attr "cpu" "bdver3") + (and (eq_attr "cpu" "bdver3,bdver4") (and (eq_attr "type" "str") (eq_attr "memory" "load,both,store"))) "bdver3-vector,bdver3-load,bdver3-ivector") ;; Integer instructions. (define_insn_reservation "bdver3_idirect" 1 - (and (eq_attr "cpu" "bdver3") + (and (eq_attr "cpu" "bdver3,bdver4") (and (eq_attr "bdver1_decode" "direct") (and (eq_attr "unit" "integer,unknown") (eq_attr "memory" "none,unknown")))) "bdver3-direct,(bdver3-ieu|bdver3-agu)") (define_insn_reservation "bdver3_ivector" 2 - (and (eq_attr "cpu" "bdver3") + (and (eq_attr "cpu" "bdver3,bdver4") (and (eq_attr "bdver1_decode" "vector") (and (eq_attr "unit" "integer,unknown") (eq_attr "memory" "none,unknown")))) "bdver3-vector,bdver3-ivector") (define_insn_reservation "bdver3_idirect_loadmov" 4 - (and (eq_attr "cpu" "bdver3") + (and (eq_attr "cpu" "bdver3,bdver4") (and (eq_attr "type" "imov") (eq_attr "memory" "load"))) "bdver3-direct,bdver3-load") (define_insn_reservation "bdver3_idirect_load" 5 - (and (eq_attr "cpu" "bdver3") + (and (eq_attr "cpu" "bdver3,bdver4") (and (eq_attr "bdver1_decode" "direct") (and (eq_attr "unit" "integer,unknown") (eq_attr "memory" "load")))) "bdver3-direct,bdver3-load,bdver3-ieu") (define_insn_reservation "bdver3_idirect_movstore" 5 - (and (eq_attr "cpu" "bdver3") + (and (eq_attr "cpu" "bdver3,bdver4") (and (eq_attr "type" "imov") (eq_attr "memory" "store"))) "bdver3-direct,bdver3-ieu,bdver3-store") (define_insn_reservation "bdver3_idirect_both" 4 - (and (eq_attr "cpu" "bdver3") + (and (eq_attr "cpu" "bdver3,bdver4") (and (eq_attr "bdver1_decode" "direct") (and (eq_attr "unit" "integer,unknown") (eq_attr "memory" "both")))) @@ -193,7 +193,7 @@ bdver3-ieu,bdver3-store, bdver3-store") (define_insn_reservation "bdver3_idirect_store" 4 - (and (eq_attr "cpu" "bdver3") + (and (eq_attr "cpu" "bdver3,bdver4") (and (eq_attr "bdver1_decode" "direct") (and (eq_attr "unit" "integer,unknown") (eq_attr "memory" "store")))) @@ -201,108 +201,108 @@ bdver3-store") ;; BDVER3 floating point units. (define_insn_reservation "bdver3_fldxf" 13 - (and (eq_attr "cpu" "bdver3") + (and (eq_attr "cpu" "bdver3,bdver4") (and (eq_attr "type" "fmov") (and (eq_attr "memory" "load") (eq_attr "mode" "XF")))) "bdver3-vector,bdver3-fpload2,bdver3-fvector*9") (define_insn_reservation "bdver3_fld" 2 - (and (eq_attr "cpu" "bdver3") + (and (eq_attr "cpu" "bdver3,bdver4") (and (eq_attr "type" "fmov") (eq_attr "memory" "load"))) "bdver3-direct,bdver3-fpload,bdver3-ffma") (define_insn_reservation "bdver3_fstxf" 4 - (and (eq_attr "cpu" "bdver3") + (and (eq_attr "cpu" "bdver3,bdver4") (and (eq_attr "type" "fmov") (and (eq_attr "memory" "store,both") (eq_attr "mode" "XF")))) "bdver3-vector,(bdver3-fpsched+bdver3-agu),(bdver3-store2+(bdver3-fvector*6))") (define_insn_reservation "bdver3_fst" 2 - (and (eq_attr "cpu" "bdver3") + (and (eq_attr "cpu" "bdver3,bdver4") (and (eq_attr "type" "fmov") (eq_attr "memory" "store,both"))) "bdver3-double,(bdver3-fpsched),(bdver3-fsto+bdver3-store)") (define_insn_reservation "bdver3_fist" 2 - (and (eq_attr "cpu" "bdver3") + (and (eq_attr "cpu" "bdver3,bdver4") (eq_attr "type" "fistp,fisttp")) "bdver3-double,(bdver3-fpsched),(bdver3-fsto+bdver3-store)") (define_insn_reservation "bdver3_fmov_bdver3" 2 - (and (eq_attr "cpu" "bdver3") + (and (eq_attr "cpu" "bdver3,bdver4") (eq_attr "type" "fmov")) "bdver3-direct,bdver3-fpsched,bdver3-ffma") (define_insn_reservation "bdver3_fadd_load" 10 - (and (eq_attr "cpu" "bdver3") + (and (eq_attr "cpu" "bdver3,bdver4") (and (eq_attr "type" "fop") (eq_attr "memory" "load"))) "bdver3-direct,bdver3-fpload,bdver3-ffma") (define_insn_reservation "bdver3_fadd" 6 - (and (eq_attr "cpu" "bdver3") + (and (eq_attr "cpu" "bdver3,bdver4") (eq_attr "type" "fop")) "bdver3-direct,bdver3-fpsched,bdver3-ffma") (define_insn_reservation "bdver3_fmul_load" 6 - (and (eq_attr "cpu" "bdver3") + (and (eq_attr "cpu" "bdver3,bdver4") (and (eq_attr "type" "fmul") (eq_attr "memory" "load"))) "bdver3-double,bdver3-fpload,bdver3-ffma") (define_insn_reservation "bdver3_fmul" 6 - (and (eq_attr "cpu" "bdver3") + (and (eq_attr "cpu" "bdver3,bdver4") (eq_attr "type" "fmul")) "bdver3-direct,bdver3-fpsched,bdver3-ffma") (define_insn_reservation "bdver3_fsgn" 2 - (and (eq_attr "cpu" "bdver3") + (and (eq_attr "cpu" "bdver3,bdver4") (eq_attr "type" "fsgn")) "bdver3-direct,bdver3-fpsched,bdver3-ffma") (define_insn_reservation "bdver3_fdiv_load" 42 - (and (eq_attr "cpu" "bdver3") + (and (eq_attr "cpu" "bdver3,bdver4") (and (eq_attr "type" "fdiv") (eq_attr "memory" "load"))) "bdver3-direct,bdver3-fpload,bdver3-ffma") (define_insn_reservation "bdver3_fdiv" 42 - (and (eq_attr "cpu" "bdver3") + (and (eq_attr "cpu" "bdver3,bdver4") (eq_attr "type" "fdiv")) "bdver3-direct,bdver3-fpsched,bdver3-ffma") (define_insn_reservation "bdver3_fpspc_load" 143 - (and (eq_attr "cpu" "bdver3") + (and (eq_attr "cpu" "bdver3,bdver4") (and (eq_attr "type" "fpspc") (eq_attr "memory" "load"))) "bdver3-vector,bdver3-fpload,bdver3-fvector") (define_insn_reservation "bdver3_fcmov_load" 17 - (and (eq_attr "cpu" "bdver3") + (and (eq_attr "cpu" "bdver3,bdver4") (and (eq_attr "type" "fcmov") (eq_attr "memory" "load"))) "bdver3-vector,bdver3-fpload,bdver3-fvector") (define_insn_reservation "bdver3_fcmov" 15 - (and (eq_attr "cpu" "bdver3") + (and (eq_attr "cpu" "bdver3,bdver4") (eq_attr "type" "fcmov")) "bdver3-vector,bdver3-fpsched,bdver3-fvector") (define_insn_reservation "bdver3_fcomi_load" 6 - (and (eq_attr "cpu" "bdver3") + (and (eq_attr "cpu" "bdver3,bdver4") (and (eq_attr "type" "fcmp") (and (eq_attr "bdver1_decode" "double") (eq_attr "memory" "load")))) "bdver3-double,bdver3-fpload,(bdver3-ffma | bdver3-fsto)") (define_insn_reservation "bdver3_fcomi" 2 - (and (eq_attr "cpu" "bdver3") + (and (eq_attr "cpu" "bdver3,bdver4") (and (eq_attr "bdver1_decode" "double") (eq_attr "type" "fcmp"))) "bdver3-double,bdver3-fpsched,(bdver3-ffma | bdver3-fsto)") (define_insn_reservation "bdver3_fcom_load" 6 - (and (eq_attr "cpu" "bdver3") + (and (eq_attr "cpu" "bdver3,bdver4") (and (eq_attr "type" "fcmp") (eq_attr "memory" "load"))) "bdver3-direct,bdver3-fpload,bdver3-ffma") (define_insn_reservation "bdver3_fcom" 2 - (and (eq_attr "cpu" "bdver3") + (and (eq_attr "cpu" "bdver3,bdver4") (eq_attr "type" "fcmp")) "bdver3-direct,bdver3-fpsched,bdver3-ffma") (define_insn_reservation "bdver3_fxch" 2 - (and (eq_attr "cpu" "bdver3") + (and (eq_attr "cpu" "bdver3,bdver4") (eq_attr "type" "fxch")) "bdver3-direct,bdver3-fpsched,bdver3-ffma") ;; SSE loads. (define_insn_reservation "bdver3_ssevector_avx128_unaligned_load" 4 - (and (eq_attr "cpu" "bdver3") + (and (eq_attr "cpu" "bdver3,bdver4") (and (eq_attr "type" "ssemov") (and (eq_attr "prefix" "vex") (and (eq_attr "movu" "1") @@ -310,162 +310,162 @@ (eq_attr "memory" "load")))))) "bdver3-direct,bdver3-fpload") (define_insn_reservation "bdver3_ssevector_avx256_unaligned_load" 5 - (and (eq_attr "cpu" "bdver3") + (and (eq_attr "cpu" "bdver3,bdver4") (and (eq_attr "type" "ssemov") (and (eq_attr "movu" "1") (and (eq_attr "mode" "V8SF,V4DF") (eq_attr "memory" "load"))))) "bdver3-double,bdver3-fpload") (define_insn_reservation "bdver3_ssevector_sse128_unaligned_load" 4 - (and (eq_attr "cpu" "bdver3") + (and (eq_attr "cpu" "bdver3,bdver4") (and (eq_attr "type" "ssemov") (and (eq_attr "movu" "1") (and (eq_attr "mode" "V4SF,V2DF") (eq_attr "memory" "load"))))) "bdver3-direct,bdver3-fpload,bdver3-fmal") (define_insn_reservation "bdver3_ssevector_avx128_load" 4 - (and (eq_attr "cpu" "bdver3") + (and (eq_attr "cpu" "bdver3,bdver4") (and (eq_attr "type" "ssemov") (and (eq_attr "prefix" "vex") (and (eq_attr "mode" "V4SF,V2DF,TI") (eq_attr "memory" "load"))))) "bdver3-direct,bdver3-fpload,bdver3-fmal") (define_insn_reservation "bdver3_ssevector_avx256_load" 5 - (and (eq_attr "cpu" "bdver3") + (and (eq_attr "cpu" "bdver3,bdver4") (and (eq_attr "type" "ssemov") (and (eq_attr "mode" "V8SF,V4DF,OI") (eq_attr "memory" "load")))) "bdver3-double,bdver3-fpload,bdver3-fmal") (define_insn_reservation "bdver3_ssevector_sse128_load" 4 - (and (eq_attr "cpu" "bdver3") + (and (eq_attr "cpu" "bdver3,bdver4") (and (eq_attr "type" "ssemov") (and (eq_attr "mode" "V4SF,V2DF,TI") (eq_attr "memory" "load")))) "bdver3-direct,bdver3-fpload") (define_insn_reservation "bdver3_ssescalar_movq_load" 4 - (and (eq_attr "cpu" "bdver3") + (and (eq_attr "cpu" "bdver3,bdver4") (and (eq_attr "type" "ssemov") (and (eq_attr "mode" "DI") (eq_attr "memory" "load")))) "bdver3-direct,bdver3-fpload,bdver3-fmal") (define_insn_reservation "bdver3_ssescalar_vmovss_load" 4 - (and (eq_attr "cpu" "bdver3") + (and (eq_attr "cpu" "bdver3,bdver4") (and (eq_attr "type" "ssemov") (and (eq_attr "prefix" "vex") (and (eq_attr "mode" "SF") (eq_attr "memory" "load"))))) "bdver3-direct,bdver3-fpload") (define_insn_reservation "bdver3_ssescalar_sse128_load" 4 - (and (eq_attr "cpu" "bdver3") + (and (eq_attr "cpu" "bdver3,bdver4") (and (eq_attr "type" "ssemov") (and (eq_attr "mode" "SF,DF") (eq_attr "memory" "load")))) "bdver3-direct,bdver3-fpload, bdver3-ffma") (define_insn_reservation "bdver3_mmxsse_load" 4 - (and (eq_attr "cpu" "bdver3") + (and (eq_attr "cpu" "bdver3,bdver4") (and (eq_attr "type" "mmxmov,ssemov") (eq_attr "memory" "load"))) "bdver3-direct,bdver3-fpload, bdver3-fmal") ;; SSE stores. (define_insn_reservation "bdver3_sse_store_avx256" 5 - (and (eq_attr "cpu" "bdver3") + (and (eq_attr "cpu" "bdver3,bdver4") (and (eq_attr "type" "ssemov") (and (eq_attr "mode" "V8SF,V4DF,OI") (eq_attr "memory" "store,both")))) "bdver3-double,bdver3-fpsched,((bdver3-fsto+bdver3-store)*2)") (define_insn_reservation "bdver3_sse_store" 4 - (and (eq_attr "cpu" "bdver3") + (and (eq_attr "cpu" "bdver3,bdver4") (and (eq_attr "type" "ssemov") (and (eq_attr "mode" "V4SF,V2DF,TI") (eq_attr "memory" "store,both")))) "bdver3-direct,bdver3-fpsched,((bdver3-fsto+bdver3-store)*2)") (define_insn_reservation "bdver3_mmxsse_store_short" 4 - (and (eq_attr "cpu" "bdver3") + (and (eq_attr "cpu" "bdver3,bdver4") (and (eq_attr "type" "mmxmov,ssemov") (eq_attr "memory" "store,both"))) "bdver3-direct,bdver3-fpsched,(bdver3-fsto+bdver3-store)") ;; Register moves. (define_insn_reservation "bdver3_ssevector_avx256" 3 - (and (eq_attr "cpu" "bdver3") + (and (eq_attr "cpu" "bdver3,bdver4") (and (eq_attr "type" "ssemov") (and (eq_attr "mode" "V8SF,V4DF,OI") (eq_attr "memory" "none")))) "bdver3-double,bdver3-fpsched,bdver3-fmal") (define_insn_reservation "bdver3_movss_movsd" 2 - (and (eq_attr "cpu" "bdver3") + (and (eq_attr "cpu" "bdver3,bdver4") (and (eq_attr "type" "ssemov") (and (eq_attr "mode" "SF,DF") (eq_attr "memory" "none")))) "bdver3-direct,bdver3-fpsched,bdver3-ffma") (define_insn_reservation "bdver3_mmxssemov" 2 - (and (eq_attr "cpu" "bdver3") + (and (eq_attr "cpu" "bdver3,bdver4") (and (eq_attr "type" "mmxmov,ssemov") (eq_attr "memory" "none"))) "bdver3-direct,bdver3-fpsched,bdver3-fmal") ;; SSE logs. (define_insn_reservation "bdver3_sselog_load_256" 7 - (and (eq_attr "cpu" "bdver3") + (and (eq_attr "cpu" "bdver3,bdver4") (and (eq_attr "type" "sselog,sselog1") (and (eq_attr "mode" "V8SF") (eq_attr "memory" "load")))) "bdver3-double,bdver3-fpload,bdver3-fmal") (define_insn_reservation "bdver3_sselog_256" 3 - (and (eq_attr "cpu" "bdver3") + (and (eq_attr "cpu" "bdver3,bdver4") (and (eq_attr "type" "sselog,sselog1") (eq_attr "mode" "V8SF"))) "bdver3-double,bdver3-fpsched,bdver3-fmal") (define_insn_reservation "bdver3_sselog_load" 6 - (and (eq_attr "cpu" "bdver3") + (and (eq_attr "cpu" "bdver3,bdver4") (and (eq_attr "type" "sselog,sselog1") (eq_attr "memory" "load"))) "bdver3-direct,bdver3-fpload,bdver3-fxbar") (define_insn_reservation "bdver3_sselog" 2 - (and (eq_attr "cpu" "bdver3") + (and (eq_attr "cpu" "bdver3,bdver4") (eq_attr "type" "sselog,sselog1")) "bdver3-direct,bdver3-fpsched,bdver3-fxbar") ;; SSE Shuffles (define_insn_reservation "bdver3_sseshuf_load_256" 7 - (and (eq_attr "cpu" "bdver3") + (and (eq_attr "cpu" "bdver3,bdver4") (and (eq_attr "type" "sseshuf,sseshuf1") (and (eq_attr "mode" "V8SF") (eq_attr "memory" "load")))) "bdver3-double,bdver3-fpload,bdver3-fpshuf") (define_insn_reservation "bdver3_sseshuf_load" 6 - (and (eq_attr "cpu" "bdver3") + (and (eq_attr "cpu" "bdver3,bdver4") (and (eq_attr "type" "sseshuf,sseshuf1") (eq_attr "memory" "load"))) "bdver3-direct,bdver3-fpload,bdver3-fpshuf") (define_insn_reservation "bdver3_sseshuf_256" 3 - (and (eq_attr "cpu" "bdver3") + (and (eq_attr "cpu" "bdver3,bdver4") (and (eq_attr "type" "sseshuf") (eq_attr "mode" "V8SF"))) "bdver3-double,bdver3-fpsched,bdver3-fpshuf") (define_insn_reservation "bdver3_sseshuf" 2 - (and (eq_attr "cpu" "bdver3") + (and (eq_attr "cpu" "bdver3,bdver4") (eq_attr "type" "sseshuf,sseshuf1")) "bdver3-direct,bdver3-fpsched,bdver3-fpshuf") ;; PCMP actually executes in FMAL. (define_insn_reservation "bdver3_ssecmp_load" 6 - (and (eq_attr "cpu" "bdver3") + (and (eq_attr "cpu" "bdver3,bdver4") (and (eq_attr "type" "ssecmp") (eq_attr "memory" "load"))) "bdver3-direct,bdver3-fpload,bdver3-ffma") (define_insn_reservation "bdver3_ssecmp" 2 - (and (eq_attr "cpu" "bdver3") + (and (eq_attr "cpu" "bdver3,bdver4") (eq_attr "type" "ssecmp")) "bdver3-direct,bdver3-fpsched,bdver3-ffma") (define_insn_reservation "bdver3_ssecomi_load" 6 - (and (eq_attr "cpu" "bdver3") + (and (eq_attr "cpu" "bdver3,bdver4") (and (eq_attr "type" "ssecomi") (eq_attr "memory" "load"))) "bdver3-double,bdver3-fpload,(bdver3-ffma | bdver3-fsto)") (define_insn_reservation "bdver3_ssecomi" 2 - (and (eq_attr "cpu" "bdver3") + (and (eq_attr "cpu" "bdver3,bdver4") (eq_attr "type" "ssecomi")) "bdver3-double,bdver3-fpsched,(bdver3-ffma | bdver3-fsto)") @@ -474,7 +474,7 @@ ;; 256 bit conversion. (define_insn_reservation "bdver3_vcvtX2Y_avx256_load" 8 - (and (eq_attr "cpu" "bdver3") + (and (eq_attr "cpu" "bdver3,bdver4") (and (eq_attr "type" "ssecvt") (and (eq_attr "memory" "load") (ior (ior (match_operand:V4DF 0 "register_operand") @@ -485,7 +485,7 @@ (match_operand:V8SI 1 "nonimmediate_operand"))))))) "bdver3-vector,bdver3-fpload,bdver3-fvector") (define_insn_reservation "bdver3_vcvtX2Y_avx256" 4 - (and (eq_attr "cpu" "bdver3") + (and (eq_attr "cpu" "bdver3,bdver4") (and (eq_attr "type" "ssecvt") (and (eq_attr "memory" "none") (ior (ior (match_operand:V4DF 0 "register_operand") @@ -497,40 +497,40 @@ "bdver3-vector,bdver3-fpsched,bdver3-fvector") ;; CVTSS2SD, CVTSD2SS. (define_insn_reservation "bdver3_ssecvt_cvtss2sd_load" 8 - (and (eq_attr "cpu" "bdver3") + (and (eq_attr "cpu" "bdver3,bdver4") (and (eq_attr "type" "ssecvt") (and (eq_attr "mode" "SF,DF") (eq_attr "memory" "load")))) "bdver3-direct,bdver3-fpload,bdver3-fcvt") (define_insn_reservation "bdver3_ssecvt_cvtss2sd" 4 - (and (eq_attr "cpu" "bdver3") + (and (eq_attr "cpu" "bdver3,bdver4") (and (eq_attr "type" "ssecvt") (and (eq_attr "mode" "SF,DF") (eq_attr "memory" "none")))) "bdver3-direct,bdver3-fpsched,bdver3-fcvt") ;; CVTSI2SD, CVTSI2SS, CVTSI2SDQ, CVTSI2SSQ. (define_insn_reservation "bdver3_sseicvt_cvtsi2sd_load" 8 - (and (eq_attr "cpu" "bdver3") + (and (eq_attr "cpu" "bdver3,bdver4") (and (eq_attr "type" "sseicvt") (and (eq_attr "mode" "SF,DF") (eq_attr "memory" "load")))) "bdver3-direct,bdver3-fpload,bdver3-fcvt") (define_insn_reservation "bdver3_sseicvt_cvtsi2sd" 4 - (and (eq_attr "cpu" "bdver3") + (and (eq_attr "cpu" "bdver3,bdver4") (and (eq_attr "type" "sseicvt") (and (eq_attr "mode" "SF,DF") (eq_attr "memory" "none")))) "bdver3-double,bdver3-fpsched,(nothing | bdver3-fcvt)") ;; CVTPD2PS. (define_insn_reservation "bdver3_ssecvt_cvtpd2ps_load" 8 - (and (eq_attr "cpu" "bdver3") + (and (eq_attr "cpu" "bdver3,bdver4") (and (eq_attr "type" "ssecvt") (and (eq_attr "memory" "load") (and (match_operand:V4SF 0 "register_operand") (match_operand:V2DF 1 "nonimmediate_operand"))))) "bdver3-double,bdver3-fpload,(bdver3-fxbar | bdver3-fcvt)") (define_insn_reservation "bdver3_ssecvt_cvtpd2ps" 4 - (and (eq_attr "cpu" "bdver3") + (and (eq_attr "cpu" "bdver3,bdver4") (and (eq_attr "type" "ssecvt") (and (eq_attr "memory" "none") (and (match_operand:V4SF 0 "register_operand") @@ -538,7 +538,7 @@ "bdver3-double,bdver3-fpsched,(bdver3-fxbar | bdver3-fcvt)") ;; CVTPI2PS, CVTDQ2PS. (define_insn_reservation "bdver3_ssecvt_cvtdq2ps_load" 8 - (and (eq_attr "cpu" "bdver3") + (and (eq_attr "cpu" "bdver3,bdver4") (and (eq_attr "type" "ssecvt") (and (eq_attr "memory" "load") (and (match_operand:V4SF 0 "register_operand") @@ -546,7 +546,7 @@ (match_operand:V4SI 1 "nonimmediate_operand")))))) "bdver3-direct,bdver3-fpload,bdver3-fcvt") (define_insn_reservation "bdver3_ssecvt_cvtdq2ps" 4 - (and (eq_attr "cpu" "bdver3") + (and (eq_attr "cpu" "bdver3,bdver4") (and (eq_attr "type" "ssecvt") (and (eq_attr "memory" "none") (and (match_operand:V4SF 0 "register_operand") @@ -555,14 +555,14 @@ "bdver3-direct,bdver3-fpsched,bdver3-fcvt") ;; CVTDQ2PD. (define_insn_reservation "bdver3_ssecvt_cvtdq2pd_load" 8 - (and (eq_attr "cpu" "bdver3") + (and (eq_attr "cpu" "bdver3,bdver4") (and (eq_attr "type" "ssecvt") (and (eq_attr "memory" "load") (and (match_operand:V2DF 0 "register_operand") (match_operand:V4SI 1 "nonimmediate_operand"))))) "bdver3-double,bdver3-fpload,(bdver3-fxbar | bdver3-fcvt)") (define_insn_reservation "bdver3_ssecvt_cvtdq2pd" 4 - (and (eq_attr "cpu" "bdver3") + (and (eq_attr "cpu" "bdver3,bdver4") (and (eq_attr "type" "ssecvt") (and (eq_attr "memory" "none") (and (match_operand:V2DF 0 "register_operand") @@ -570,7 +570,7 @@ "bdver3-double,bdver3-fpsched,(bdver3-fxbar | bdver3-fcvt)") ;; CVTPS2PD, CVTPI2PD. (define_insn_reservation "bdver3_ssecvt_cvtps2pd_load" 6 - (and (eq_attr "cpu" "bdver3") + (and (eq_attr "cpu" "bdver3,bdver4") (and (eq_attr "type" "ssecvt") (and (eq_attr "memory" "load") (and (match_operand:V2DF 0 "register_operand") @@ -578,7 +578,7 @@ (match_operand:V4SF 1 "nonimmediate_operand")))))) "bdver3-double,bdver3-fpload,(bdver3-fxbar | bdver3-fcvt)") (define_insn_reservation "bdver3_ssecvt_cvtps2pd" 2 - (and (eq_attr "cpu" "bdver3") + (and (eq_attr "cpu" "bdver3,bdver4") (and (eq_attr "type" "ssecvt") (and (eq_attr "memory" "load") (and (match_operand:V2DF 0 "register_operand") @@ -587,27 +587,27 @@ "bdver3-double,bdver3-fpsched,(bdver3-fxbar | bdver3-fcvt)") ;; CVTSD2SI, CVTSD2SIQ, CVTSS2SI, CVTSS2SIQ, CVTTSD2SI, CVTTSD2SIQ, CVTTSS2SI, CVTTSS2SIQ. (define_insn_reservation "bdver3_ssecvt_cvtsX2si_load" 8 - (and (eq_attr "cpu" "bdver3") + (and (eq_attr "cpu" "bdver3,bdver4") (and (eq_attr "type" "sseicvt") (and (eq_attr "mode" "SI,DI") (eq_attr "memory" "load")))) "bdver3-double,bdver3-fpload,(bdver3-fcvt | bdver3-fsto)") (define_insn_reservation "bdver3_ssecvt_cvtsX2si" 4 - (and (eq_attr "cpu" "bdver3") + (and (eq_attr "cpu" "bdver3,bdver4") (and (eq_attr "type" "sseicvt") (and (eq_attr "mode" "SI,DI") (eq_attr "memory" "none")))) "bdver3-double,bdver3-fpsched,(bdver3-fcvt | bdver3-fsto)") ;; CVTPD2PI, CVTTPD2PI. (define_insn_reservation "bdver3_ssecvt_cvtpd2pi_load" 8 - (and (eq_attr "cpu" "bdver3") + (and (eq_attr "cpu" "bdver3,bdver4") (and (eq_attr "type" "ssecvt") (and (eq_attr "memory" "load") (and (match_operand:V2DF 1 "nonimmediate_operand") (match_operand:V2SI 0 "register_operand"))))) "bdver3-double,bdver3-fpload,(bdver3-fcvt | bdver3-fxbar)") (define_insn_reservation "bdver3_ssecvt_cvtpd2pi" 4 - (and (eq_attr "cpu" "bdver3") + (and (eq_attr "cpu" "bdver3,bdver4") (and (eq_attr "type" "ssecvt") (and (eq_attr "memory" "none") (and (match_operand:V2DF 1 "nonimmediate_operand") @@ -615,14 +615,14 @@ "bdver3-double,bdver3-fpsched,(bdver3-fcvt | bdver3-fxbar)") ;; CVTPD2DQ, CVTTPD2DQ. (define_insn_reservation "bdver3_ssecvt_cvtpd2dq_load" 6 - (and (eq_attr "cpu" "bdver3") + (and (eq_attr "cpu" "bdver3,bdver4") (and (eq_attr "type" "ssecvt") (and (eq_attr "memory" "load") (and (match_operand:V2DF 1 "nonimmediate_operand") (match_operand:V4SI 0 "register_operand"))))) "bdver3-double,bdver3-fpload,(bdver3-fcvt | bdver3-fxbar)") (define_insn_reservation "bdver3_ssecvt_cvtpd2dq" 2 - (and (eq_attr "cpu" "bdver3") + (and (eq_attr "cpu" "bdver3,bdver4") (and (eq_attr "type" "ssecvt") (and (eq_attr "memory" "none") (and (match_operand:V2DF 1 "nonimmediate_operand") @@ -630,7 +630,7 @@ "bdver3-double,bdver3-fpsched,(bdver3-fcvt | bdver3-fxbar)") ;; CVTPS2PI, CVTTPS2PI, CVTPS2DQ, CVTTPS2DQ. (define_insn_reservation "bdver3_ssecvt_cvtps2pi_load" 8 - (and (eq_attr "cpu" "bdver3") + (and (eq_attr "cpu" "bdver3,bdver4") (and (eq_attr "type" "ssecvt") (and (eq_attr "memory" "load") (and (match_operand:V4SF 1 "nonimmediate_operand") @@ -638,7 +638,7 @@ (match_operand: V4SI 0 "register_operand")))))) "bdver3-direct,bdver3-fpload,bdver3-fcvt") (define_insn_reservation "bdver3_ssecvt_cvtps2pi" 4 - (and (eq_attr "cpu" "bdver3") + (and (eq_attr "cpu" "bdver3,bdver4") (and (eq_attr "type" "ssecvt") (and (eq_attr "memory" "none") (and (match_operand:V4SF 1 "nonimmediate_operand") @@ -648,100 +648,100 @@ ;; SSE MUL, ADD, and MULADD. (define_insn_reservation "bdver3_ssemuladd_load_256" 11 - (and (eq_attr "cpu" "bdver3") + (and (eq_attr "cpu" "bdver3,bdver4") (and (eq_attr "type" "ssemul,sseadd,sseadd1,ssemuladd") (and (eq_attr "mode" "V8SF,V4DF") (eq_attr "memory" "load")))) "bdver3-double,bdver3-fpload,bdver3-ffma") (define_insn_reservation "bdver3_ssemuladd_256" 7 - (and (eq_attr "cpu" "bdver3") + (and (eq_attr "cpu" "bdver3,bdver4") (and (eq_attr "type" "ssemul,sseadd,sseadd1,ssemuladd") (and (eq_attr "mode" "V8SF,V4DF") (eq_attr "memory" "none")))) "bdver3-double,bdver3-fpsched,bdver3-ffma") (define_insn_reservation "bdver3_ssemuladd_load" 10 - (and (eq_attr "cpu" "bdver3") + (and (eq_attr "cpu" "bdver3,bdver4") (and (eq_attr "type" "ssemul,sseadd,sseadd1,ssemuladd") (eq_attr "memory" "load"))) "bdver3-direct,bdver3-fpload,bdver3-ffma") (define_insn_reservation "bdver3_ssemuladd" 6 - (and (eq_attr "cpu" "bdver3") + (and (eq_attr "cpu" "bdver3,bdver4") (and (eq_attr "type" "ssemul,sseadd,sseadd1,ssemuladd") (eq_attr "memory" "none"))) "bdver3-direct,bdver3-fpsched,bdver3-ffma") (define_insn_reservation "bdver3_sseimul_load" 8 - (and (eq_attr "cpu" "bdver3") + (and (eq_attr "cpu" "bdver3,bdver4") (and (eq_attr "type" "sseimul") (eq_attr "memory" "load"))) "bdver3-direct,bdver3-fpload,bdver3-fmma") (define_insn_reservation "bdver3_sseimul" 4 - (and (eq_attr "cpu" "bdver3") + (and (eq_attr "cpu" "bdver3,bdver4") (and (eq_attr "type" "sseimul") (eq_attr "memory" "none"))) "bdver3-direct,bdver3-fpsched,bdver3-fmma") (define_insn_reservation "bdver3_sseiadd_load" 6 - (and (eq_attr "cpu" "bdver3") + (and (eq_attr "cpu" "bdver3,bdver4") (and (eq_attr "type" "sseiadd") (eq_attr "memory" "load"))) "bdver3-direct,bdver3-fpload,bdver3-fmal") (define_insn_reservation "bdver3_sseiadd" 2 - (and (eq_attr "cpu" "bdver3") + (and (eq_attr "cpu" "bdver3,bdver4") (and (eq_attr "type" "sseiadd") (eq_attr "memory" "none"))) "bdver3-direct,bdver3-fpsched,bdver3-fmal") ;; SSE DIV: no throughput information (assume same as amdfam10). (define_insn_reservation "bdver3_ssediv_double_load_256" 27 - (and (eq_attr "cpu" "bdver3") + (and (eq_attr "cpu" "bdver3,bdver4") (and (eq_attr "type" "ssediv") (and (eq_attr "mode" "V4DF") (eq_attr "memory" "load")))) "bdver3-double,bdver3-fpload,(bdver3-ffma0*17 | bdver3-ffma1*17)") (define_insn_reservation "bdver3_ssediv_double_256" 27 - (and (eq_attr "cpu" "bdver3") + (and (eq_attr "cpu" "bdver3,bdver4") (and (eq_attr "type" "ssediv") (and (eq_attr "mode" "V4DF") (eq_attr "memory" "none")))) "bdver3-double,bdver3-fpsched,(bdver3-ffma0*17 | bdver3-ffma1*17)") (define_insn_reservation "bdver3_ssediv_single_load_256" 27 - (and (eq_attr "cpu" "bdver3") + (and (eq_attr "cpu" "bdver3,bdver4") (and (eq_attr "type" "ssediv") (and (eq_attr "mode" "V8SF") (eq_attr "memory" "load")))) "bdver3-double,bdver3-fpload,(bdver3-ffma0*17 | bdver3-ffma1*17)") (define_insn_reservation "bdver3_ssediv_single_256" 24 - (and (eq_attr "cpu" "bdver3") + (and (eq_attr "cpu" "bdver3,bdver4") (and (eq_attr "type" "ssediv") (and (eq_attr "mode" "V8SF") (eq_attr "memory" "none")))) "bdver3-double,bdver3-fpsched,(bdver3-ffma0*17 | bdver3-ffma1*17)") (define_insn_reservation "bdver3_ssediv_double_load" 27 - (and (eq_attr "cpu" "bdver3") + (and (eq_attr "cpu" "bdver3,bdver4") (and (eq_attr "type" "ssediv") (and (eq_attr "mode" "DF,V2DF") (eq_attr "memory" "load")))) "bdver3-direct,bdver3-fpload,(bdver3-ffma0*17 | bdver3-ffma1*17)") (define_insn_reservation "bdver3_ssediv_double" 27 - (and (eq_attr "cpu" "bdver3") + (and (eq_attr "cpu" "bdver3,bdver4") (and (eq_attr "type" "ssediv") (and (eq_attr "mode" "DF,V2DF") (eq_attr "memory" "none")))) "bdver3-direct,bdver3-fpsched,(bdver3-ffma0*17 | bdver3-ffma1*17)") (define_insn_reservation "bdver3_ssediv_single_load" 27 - (and (eq_attr "cpu" "bdver3") + (and (eq_attr "cpu" "bdver3,bdver4") (and (eq_attr "type" "ssediv") (and (eq_attr "mode" "SF,V4SF") (eq_attr "memory" "load")))) "bdver3-direct,bdver3-fpload,(bdver3-ffma0*17 | bdver3-ffma1*17)") (define_insn_reservation "bdver3_ssediv_single" 24 - (and (eq_attr "cpu" "bdver3") + (and (eq_attr "cpu" "bdver3,bdver4") (and (eq_attr "type" "ssediv") (and (eq_attr "mode" "SF,V4SF") (eq_attr "memory" "none")))) "bdver3-direct,bdver3-fpsched,(bdver3-ffma0*17 | bdver3-ffma1*17)") (define_insn_reservation "bdver3_sseins" 3 - (and (eq_attr "cpu" "bdver3") + (and (eq_attr "cpu" "bdver3,bdver4") (and (eq_attr "type" "sseins") (eq_attr "mode" "TI"))) "bdver3-direct,bdver3-fpsched,bdver3-fxbar") diff --git a/gcc/config/i386/driver-i386.c b/gcc/config/i386/driver-i386.c index 823f92da8c33..a4a1f40548a2 100644 --- a/gcc/config/i386/driver-i386.c +++ b/gcc/config/i386/driver-i386.c @@ -550,6 +550,8 @@ const char *host_detect_local_cpu (int argc, const char **argv) processor = PROCESSOR_GEODE; else if (has_movbe) processor = PROCESSOR_BTVER2; + else if (has_avx2) + processor = PROCESSOR_BDVER4; else if (has_xsaveopt) processor = PROCESSOR_BDVER3; else if (has_bmi) @@ -772,6 +774,9 @@ const char *host_detect_local_cpu (int argc, const char **argv) case PROCESSOR_BDVER3: cpu = "bdver3"; break; + case PROCESSOR_BDVER4: + cpu = "bdver4"; + break; case PROCESSOR_BTVER1: cpu = "btver1"; break; diff --git a/gcc/config/i386/i386-c.c b/gcc/config/i386/i386-c.c index 1c053b1e51eb..18c2929d10f0 100644 --- a/gcc/config/i386/i386-c.c +++ b/gcc/config/i386/i386-c.c @@ -117,6 +117,10 @@ ix86_target_macros_internal (HOST_WIDE_INT isa_flag, def_or_undef (parse_in, "__bdver3"); def_or_undef (parse_in, "__bdver3__"); break; + case PROCESSOR_BDVER4: + def_or_undef (parse_in, "__bdver4"); + def_or_undef (parse_in, "__bdver4__"); + break; case PROCESSOR_BTVER1: def_or_undef (parse_in, "__btver1"); def_or_undef (parse_in, "__btver1__"); @@ -224,6 +228,9 @@ ix86_target_macros_internal (HOST_WIDE_INT isa_flag, case PROCESSOR_BDVER3: def_or_undef (parse_in, "__tune_bdver3__"); break; + case PROCESSOR_BDVER4: + def_or_undef (parse_in, "__tune_bdver4__"); + break; case PROCESSOR_BTVER1: def_or_undef (parse_in, "__tune_btver1__"); break; diff --git a/gcc/config/i386/i386.c b/gcc/config/i386/i386.c index 565d8fa6ae65..bb6d15a2b2ac 100644 --- a/gcc/config/i386/i386.c +++ b/gcc/config/i386/i386.c @@ -1161,6 +1161,92 @@ struct processor_costs bdver3_cost = { 1, /* cond_not_taken_branch_cost. */ }; +/* BDVER4 has optimized REP instruction for medium sized blocks, but for + very small blocks it is better to use loop. For large blocks, libcall + can do nontemporary accesses and beat inline considerably. */ +static stringop_algs bdver4_memcpy[2] = { + {libcall, {{6, loop, false}, {14, unrolled_loop, false}, + {-1, rep_prefix_4_byte, false}}}, + {libcall, {{16, loop, false}, {8192, rep_prefix_8_byte, false}, + {-1, libcall, false}}}}; +static stringop_algs bdver4_memset[2] = { + {libcall, {{8, loop, false}, {24, unrolled_loop, false}, + {2048, rep_prefix_4_byte, false}, {-1, libcall, false}}}, + {libcall, {{48, unrolled_loop, false}, {8192, rep_prefix_8_byte, false}, + {-1, libcall, false}}}}; +struct processor_costs bdver4_cost = { + COSTS_N_INSNS (1), /* cost of an add instruction */ + COSTS_N_INSNS (1), /* cost of a lea instruction */ + COSTS_N_INSNS (1), /* variable shift costs */ + COSTS_N_INSNS (1), /* constant shift costs */ + {COSTS_N_INSNS (4), /* cost of starting multiply for QI */ + COSTS_N_INSNS (4), /* HI */ + COSTS_N_INSNS (4), /* SI */ + COSTS_N_INSNS (6), /* DI */ + COSTS_N_INSNS (6)}, /* other */ + 0, /* cost of multiply per each bit set */ + {COSTS_N_INSNS (19), /* cost of a divide/mod for QI */ + COSTS_N_INSNS (35), /* HI */ + COSTS_N_INSNS (51), /* SI */ + COSTS_N_INSNS (83), /* DI */ + COSTS_N_INSNS (83)}, /* other */ + COSTS_N_INSNS (1), /* cost of movsx */ + COSTS_N_INSNS (1), /* cost of movzx */ + 8, /* "large" insn */ + 9, /* MOVE_RATIO */ + 4, /* cost for loading QImode using movzbl */ + {5, 5, 4}, /* cost of loading integer registers + in QImode, HImode and SImode. + Relative to reg-reg move (2). */ + {4, 4, 4}, /* cost of storing integer registers */ + 2, /* cost of reg,reg fld/fst */ + {5, 5, 12}, /* cost of loading fp registers + in SFmode, DFmode and XFmode */ + {4, 4, 8}, /* cost of storing fp registers + in SFmode, DFmode and XFmode */ + 2, /* cost of moving MMX register */ + {4, 4}, /* cost of loading MMX registers + in SImode and DImode */ + {4, 4}, /* cost of storing MMX registers + in SImode and DImode */ + 2, /* cost of moving SSE register */ + {4, 4, 4}, /* cost of loading SSE registers + in SImode, DImode and TImode */ + {4, 4, 4}, /* cost of storing SSE registers + in SImode, DImode and TImode */ + 2, /* MMX or SSE register to integer */ + 16, /* size of l1 cache. */ + 2048, /* size of l2 cache. */ + 64, /* size of prefetch block */ + /* New AMD processors never drop prefetches; if they cannot be performed + immediately, they are queued. We set number of simultaneous prefetches + to a large constant to reflect this (it probably is not a good idea not + to limit number of prefetches at all, as their execution also takes some + time). */ + 100, /* number of parallel prefetches */ + 2, /* Branch cost */ + COSTS_N_INSNS (6), /* cost of FADD and FSUB insns. */ + COSTS_N_INSNS (6), /* cost of FMUL instruction. */ + COSTS_N_INSNS (42), /* cost of FDIV instruction. */ + COSTS_N_INSNS (2), /* cost of FABS instruction. */ + COSTS_N_INSNS (2), /* cost of FCHS instruction. */ + COSTS_N_INSNS (52), /* cost of FSQRT instruction. */ + + bdver4_memcpy, + bdver4_memset, + 6, /* scalar_stmt_cost. */ + 4, /* scalar load_cost. */ + 4, /* scalar_store_cost. */ + 6, /* vec_stmt_cost. */ + 0, /* vec_to_scalar_cost. */ + 2, /* scalar_to_vec_cost. */ + 4, /* vec_align_load_cost. */ + 4, /* vec_unalign_load_cost. */ + 4, /* vec_store_cost. */ + 2, /* cond_taken_branch_cost. */ + 1, /* cond_not_taken_branch_cost. */ +}; + /* BTVER1 has optimized REP instruction for medium sized blocks, but for very small blocks it is better to use loop. For large blocks, libcall can do nontemporary accesses and beat inline considerably. */ @@ -1850,9 +1936,10 @@ const struct processor_costs *ix86_cost = &pentium_cost; #define m_BDVER1 (1<