]> git.ipfire.org Git - thirdparty/gcc.git/commitdiff
i386.md: Comments on fma4 instruction selection reflect requirement on register press...
authorUros Bizjak <uros@gcc.gnu.org>
Wed, 12 Sep 2012 15:23:01 +0000 (17:23 +0200)
committerUros Bizjak <uros@gcc.gnu.org>
Wed, 12 Sep 2012 15:23:01 +0000 (17:23 +0200)
2012-09-12  Ganesh Gopalasubramanian  <Ganesh.Gopalasubramanian@amd.com>

* config/i386/i386.md : Comments on fma4 instruction
selection reflect requirement on register pressure based
cost model.

* config/i386/driver-i386.c (host_detect_local_cpu): fma4
flag is set-reset as informed by the cpuid flag.

* config/i386/i386.c (processor_alias_table): fma4
flag is enabled for bdver2.

From-SVN: r191226

gcc/ChangeLog
gcc/config/i386/driver-i386.c
gcc/config/i386/i386.c
gcc/config/i386/i386.md

index fe066e7aa6a34a6714568793185b101754f9a4fe..345ea6a6287b6580df11d1eeeebf026fbe24850c 100644 (file)
@@ -1,3 +1,15 @@
+2012-09-12  Ganesh Gopalasubramanian  <Ganesh.Gopalasubramanian@amd.com>
+
+       * config/i386/i386.md : Comments on fma4 instruction
+       selection reflect requirement on register pressure based
+       cost model.
+
+       * config/i386/driver-i386.c (host_detect_local_cpu): fma4
+       flag is set-reset as informed by the cpuid flag.
+
+       * config/i386/i386.c (processor_alias_table): fma4
+       flag is enabled for bdver2.
+
 2012-09-12  Richard Guenther  <rguenther@suse.de>
 
        PR tree-optimization/54489
 
 2012-09-11  Diego Novillo  <dnovillo@google.com>
 
-       * var-tracking.c (vt_add_function_parameter): Adjust for VEC
-       changes.
+       * var-tracking.c (vt_add_function_parameter): Adjust for VEC changes.
 
 2012-09-11  Dominique Dhumieres  <dominiq@lps.ens.fr>
 
-       * config/darwin.c (darwin_asm_named_section): Adjust for
-       VEC changes.
+       * config/darwin.c (darwin_asm_named_section): Adjust for VEC changes.
        (darwin_asm_dwarf_section): Likewise.
 
 2012-09-11  Martin Jambor  <mjambor@suse.cz>
 
 2012-09-11  Richard Guenther  <rguenther@suse.de>
 
-       * graphite-scop-detection.c (move_sd_regions): Adjust for VEC
-       changes.
+       * graphite-scop-detection.c (move_sd_regions): Adjust for VEC changes.
        (scopdet_basic_block_info): Likewise.
        (build_scops_1): Likewise.
        (limit_scops): Likewise.
+
 2012-09-11  Richard Guenther  <rguenther@suse.de>
 
        PR middle-end/54515
 2012-09-09  Mark Kettenis  <kettenis@gnu.org>
 
        * config/openbsd-stdint.h (INTMAX_TYPE, UINTMAX_TYPE): Define.
-                       
+
 2012-09-09  Jan Hubicka  <jh@suse.cz>
 
        * passes.c (ipa_write_summaries_1): Set state;
        (lto_symtab_encoder_delete_node): New function.
        (lto_symtab_encoder_encode_body_p, lto_set_symtab_encoder_encode_body,
        lto_symtab_encoder_encode_initializer_p,
-       lto_set_symtab_encoder_encode_initializer, lto_symtab_encoder_in_partition_p,
+       lto_set_symtab_encoder_encode_initializer,
+       lto_symtab_encoder_in_partition_p,
        lto_symtab_encoder_in_partition_p): Update.
        (compute_ltrans_boundary): Take encoder as an input.
        * passes.c (ipa_write_summaries_1): Update.
 
 2012-09-08  John David Anglin  <dave.anglin@nrc-cnrc.gc.ca>
 
-       * config/pa/pa.c (hppa_rtx_costs): Update costs for large integer modes.
+       * config/pa/pa.c (hppa_rtx_costs): Update costs for large
+       integer modes.
 
 2012-09-08  Andi Kleen  <ak@linux.intel.com>
 
-       * gcc/lto/lto.c (do_whole_program_analysis): 
-       Fix last broken patch
+       * gcc/lto/lto.c (do_whole_program_analysis): Fix last broken patch.
 
 2012-09-08  Andi Kleen  <ak@linux.intel.com>
 
        PR tree-optimization/53986
        * tree-vrp.c (extract_range_from_multiplicative_op_1): Allow
        LSHIFT_EXPR.
-       (extract_range_from_binary_expr_1): Handle LSHIFT with constant range as
-       shift amount.
+       (extract_range_from_binary_expr_1): Handle LSHIFT with constant
+       range as shift amount.
 
 2012-09-07  Segher Boessenkool  <segher@kernel.crashing.org>
 
        (call_value_nonlocal_aix32): Ditto.
        (call_value_nonlocal_aix64): Ditto.
 
-2012-09-06  Andi Kleen <ak@linux.intel.com>
+2012-09-06  Andi Kleen  <ak@linux.intel.com>
 
        * doc/invoke.texi (-ffat-lto-objects): Clarify that gcc-ar
        et.al. should be used.
 
 2012-09-06  Uros Bizjak  <ubizjak@gmail.com>
 
-       * configure.ac (hle prefixes): Remove .code64.
+       * configure.ac (hle prefixes): Remove .code64 directive.
        * configure: Regenerated.
 
 2012-09-06  Kyrylo Tkachov  <kyrylo.tkachov@arm.com>
        * config/sh/sh.md (cbranchsi4): Remove TARGET_CBRANCHDI4 check and
        always invoke expand_cbranchsi4.
 
-2012-09-03  Andi Kleen <ak@linux.intel.com>
+2012-09-03  Andi Kleen  <ak@linux.intel.com>
 
        * tree-ssa-sccvn.c (vn_reference_fold_indirect): Initialize
        addr_offset always.
        * cgraphunit.c (cgraph_analyze_function): Use gimple_has_body_p.
 
 2012-05-02  Kirill Yukhin  <kirill.yukhin@intel.com>
-           Andi Kleen <ak@linux.intel.com>
+           Andi Kleen  <ak@linux.intel.com>
 
        * coretypes.h (MEMMODEL_MASK): New.
        * builtins.c (get_memmodel): Add val. Call target.memmodel_check
index 79bf75ffaeb7b3a812c5216fb7c0f79eb5134e71..bda4e0222776f924078236c78d2d4d4e37e7f2c1 100644 (file)
@@ -472,8 +472,6 @@ const char *host_detect_local_cpu (int argc, const char **argv)
       has_abm = ecx & bit_ABM;
       has_lwp = ecx & bit_LWP;
       has_fma4 = ecx & bit_FMA4;
-      if (vendor == signature_AMD_ebx && has_fma4 && has_fma)
-       has_fma4 = 0;
       has_xop = ecx & bit_XOP;
       has_tbm = ecx & bit_TBM;
       has_lzcnt = ecx & bit_LZCNT;
index 62d3a8c990b62e0b3746c230a1993cb9cf038706..69a3377e15010b04fca7762423059c2cb3b76a5a 100644 (file)
@@ -3164,7 +3164,7 @@ ix86_option_override_internal (bool main_args_p)
       {"bdver2", PROCESSOR_BDVER2, CPU_BDVER2,
        PTA_64BIT | PTA_MMX | PTA_SSE | PTA_SSE2 | PTA_SSE3
        | PTA_SSE4A | PTA_CX16 | PTA_ABM | PTA_SSSE3 | PTA_SSE4_1
-       | PTA_SSE4_2 | PTA_AES | PTA_PCLMUL | PTA_AVX
+       | PTA_SSE4_2 | PTA_AES | PTA_PCLMUL | PTA_AVX | PTA_FMA4
        | PTA_XOP | PTA_LWP | PTA_BMI | PTA_TBM | PTA_F16C
        | PTA_FMA},
       {"btver1", PROCESSOR_BTVER1, CPU_GENERIC64,
index 898e0156248859db9e289fe614adb4642c92cf04..05d22ddb3dc0cf0dc819105ebf0ef7ea83c2e3cc 100644 (file)
         (eq_attr "isa" "noavx2") (symbol_ref "!TARGET_AVX2")
         (eq_attr "isa" "bmi2") (symbol_ref "TARGET_BMI2")
         (eq_attr "isa" "fma") (symbol_ref "TARGET_FMA")
-        ;; Disable generation of FMA4 instructions for generic code
-        ;; since FMA3 is preferred for targets that implement both
-        ;; instruction sets.
+        ;; Fma instruction selection has to be done based on
+        ;; register pressure. For generating fma4, a cost model
+        ;; based on register pressure is required. Till then,
+        ;; fma4 instruction is disabled for targets that implement
+        ;; both fma and fma4 instruction sets.
         (eq_attr "isa" "fma4")
           (symbol_ref "TARGET_FMA4 && !TARGET_FMA")
        ]