]> git.ipfire.org Git - thirdparty/gcc.git/commitdiff
i386: correct x87&SSE multiplication modeling in znver.md
authorAlexander Monakov <amonakov@ispras.ru>
Tue, 1 Nov 2022 14:53:13 +0000 (17:53 +0300)
committerAlexander Monakov <amonakov@ispras.ru>
Wed, 16 Nov 2022 13:41:39 +0000 (16:41 +0300)
All multiplication instructions are fully pipelined, except AVX256
instructions on Zen 1, which issue over two cycles on a 128-bit unit.
Correct the model accordingly to reduce combinatorial explosion in
automaton tables.

Top znver table sizes in insn-automata.o:

Before:

30056 r znver1_fp_min_issue_delay
120224 r znver1_fp_transitions

After:

6720 r znver1_fp_min_issue_delay
53760 r znver1_fp_transitions

gcc/ChangeLog:

PR target/87832
* config/i386/znver.md: (znver1_fp_op_mul): Correct cycles in
the reservation.
(znver1_fp_op_mul_load): Ditto.
(znver1_mmx_mul): Ditto.
(znver1_mmx_load): Ditto.
(znver1_ssemul_ss_ps): Ditto.
(znver1_ssemul_ss_ps_load): Ditto.
(znver1_ssemul_avx256_ps): Ditto.
(znver1_ssemul_avx256_ps_load): Ditto.
(znver1_ssemul_sd_pd): Ditto.
(znver1_ssemul_sd_pd_load): Ditto.
(znver2_ssemul_sd_pd): Ditto.
(znver2_ssemul_sd_pd_load): Ditto.
(znver1_ssemul_avx256_pd): Ditto.
(znver1_ssemul_avx256_pd_load): Ditto.
(znver1_sseimul): Ditto.
(znver1_sseimul_avx256): Ditto.
(znver1_sseimul_load): Ditto.
(znver1_sseimul_avx256_load): Ditto.
(znver1_sseimul_di): Ditto.
(znver1_sseimul_load_di): Ditto.

gcc/config/i386/znver.md

index c52f8b532ecbaea6db318a642d4b64ab3d8c5d4d..882f250f1b623d9490a2ad38e52d8c616457c380 100644 (file)
                         (and (eq_attr "cpu" "znver1,znver2,znver3")
                              (and (eq_attr "type" "fop,fmul")
                                   (eq_attr "memory" "none")))
-                        "znver1-direct,znver1-fp0*5")
+                        "znver1-direct,znver1-fp0")
 
 (define_insn_reservation "znver1_fp_op_mul_load" 12 
                         (and (eq_attr "cpu" "znver1,znver2,znver3")
                              (and (eq_attr "type" "fop,fmul")
                                   (eq_attr "memory" "load")))
-                        "znver1-direct,znver1-load,znver1-fp0*5")
+                        "znver1-direct,znver1-load,znver1-fp0")
 
 (define_insn_reservation "znver1_fp_op_imul_load" 16
                         (and (eq_attr "cpu" "znver1,znver2,znver3")
                         (and (eq_attr "cpu" "znver1,znver2,znver3")
                              (and (eq_attr "type" "mmxmul")
                                   (eq_attr "memory" "none")))
-                         "znver1-direct,znver1-fp0*3")
+                         "znver1-direct,znver1-fp0")
 
 (define_insn_reservation "znver1_mmx_load" 10
                         (and (eq_attr "cpu" "znver1,znver2,znver3")
                              (and (eq_attr "type" "mmxmul")
                                   (eq_attr "memory" "load")))
-                        "znver1-direct,znver1-load,znver1-fp0*3")
+                        "znver1-direct,znver1-load,znver1-fp0")
 
 ;; TODO
 (define_insn_reservation "znver1_avx256_log" 1
                                              (eq_attr "mode" "V8SF,V4SF,SF,V4DF,V2DF,DF")))
                              (and (eq_attr "type" "ssemul")
                                   (eq_attr "memory" "none")))
-                        "znver1-direct,(znver1-fp0|znver1-fp1)*3")
+                        "znver1-direct,znver1-fp0|znver1-fp1")
 
 (define_insn_reservation "znver1_ssemul_ss_ps_load" 10 
                         (and (ior (and (eq_attr "cpu" "znver1")
                                              (eq_attr "mode" "V8SF,V4SF,SF")))
                              (and (eq_attr "type" "ssemul")
                                   (eq_attr "memory" "load")))
-                        "znver1-direct,znver1-load,(znver1-fp0|znver1-fp1)*3")
+                        "znver1-direct,znver1-load,znver1-fp0|znver1-fp1")
 
 (define_insn_reservation "znver1_ssemul_avx256_ps" 3
                         (and (eq_attr "cpu" "znver1")
                              (and (eq_attr "mode" "V8SF")
                                   (and (eq_attr "type" "ssemul")
                                        (eq_attr "memory" "none"))))
-                        "znver1-double,(znver1-fp0|znver1-fp1)*3")
+                        "znver1-double,znver1-fp0*2|znver1-fp1*2")
 
 (define_insn_reservation "znver1_ssemul_avx256_ps_load" 10
                         (and (eq_attr "cpu" "znver1")
                              (and (eq_attr "mode" "V8SF")
                                   (and (eq_attr "type" "ssemul")
                                        (eq_attr "memory" "load"))))
-                        "znver1-double,znver1-load,(znver1-fp0|znver1-fp1)*3")
+                        "znver1-double,znver1-load,znver1-fp0*2|znver1-fp1*2")
 
 (define_insn_reservation "znver1_ssemul_sd_pd" 4
                         (and (eq_attr "cpu" "znver1")
                              (and (eq_attr "mode" "V2DF,DF")
                                   (and (eq_attr "type" "ssemul")
                                        (eq_attr "memory" "none"))))
-                        "znver1-direct,(znver1-fp0|znver1-fp1)*4")
+                        "znver1-direct,znver1-fp0|znver1-fp1")
 
 (define_insn_reservation "znver1_ssemul_sd_pd_load" 11
                         (and (eq_attr "cpu" "znver1")
                              (and (eq_attr "mode" "V2DF,DF")
                                   (and (eq_attr "type" "ssemul")
                                        (eq_attr "memory" "load"))))
-                        "znver1-direct,znver1-load,(znver1-fp0|znver1-fp1)*4")
+                        "znver1-direct,znver1-load,znver1-fp0|znver1-fp1")
 
 (define_insn_reservation "znver2_ssemul_sd_pd" 3
                         (and (eq_attr "cpu" "znver2,znver3")
                              (and (eq_attr "type" "ssemul")
                                   (eq_attr "memory" "none")))
-                        "znver1-direct,(znver1-fp0|znver1-fp1)*3")
+                        "znver1-direct,znver1-fp0|znver1-fp1")
 
 (define_insn_reservation "znver2_ssemul_sd_pd_load" 10
                         (and (eq_attr "cpu" "znver2,znver3")
                              (and (eq_attr "type" "ssemul")
                                   (eq_attr "memory" "load")))
-                        "znver1-direct,znver1-load,(znver1-fp0|znver1-fp1)*3")
+                        "znver1-direct,znver1-load,znver1-fp0|znver1-fp1")
 
 
 (define_insn_reservation "znver1_ssemul_avx256_pd" 5
                              (and (eq_attr "mode" "V4DF")
                                   (and (eq_attr "type" "ssemul")
                                        (eq_attr "memory" "none"))))
-                        "znver1-double,(znver1-fp0|znver1-fp1)*4")
+                        "znver1-double,znver1-fp0*2|znver1-fp1*2")
 
 (define_insn_reservation "znver1_ssemul_avx256_pd_load" 12
                         (and (eq_attr "cpu" "znver1")
                              (and (eq_attr "mode" "V4DF")
                                   (and (eq_attr "type" "ssemul")
                                        (eq_attr "memory" "load"))))
-                        "znver1-double,znver1-load,(znver1-fp0|znver1-fp1)*4")
+                        "znver1-double,znver1-load,znver1-fp0*2|znver1-fp1*2")
 
 ;;SSE imul
 (define_insn_reservation "znver1_sseimul" 3
                                              (eq_attr "mode" "TI,OI")))
                              (and (eq_attr "type" "sseimul")
                                   (eq_attr "memory" "none")))
-                        "znver1-direct,znver1-fp0*3")
+                        "znver1-direct,znver1-fp0")
 
 (define_insn_reservation "znver1_sseimul_avx256" 4
                         (and (eq_attr "cpu" "znver1,znver2,znver3")
                              (and (eq_attr "mode" "OI")
                                   (and (eq_attr "type" "sseimul")
                                        (eq_attr "memory" "none"))))
-                        "znver1-double,znver1-fp0*4")
+                        "znver1-double,znver1-fp0*2")
 
 (define_insn_reservation "znver1_sseimul_load" 10
                         (and (ior (and (eq_attr "cpu" "znver1")
                                        (eq_attr "mode" "TI,OI")))
                              (and (eq_attr "type" "sseimul")
                                   (eq_attr "memory" "load")))
-                        "znver1-direct,znver1-load,znver1-fp0*3")
+                        "znver1-direct,znver1-load,znver1-fp0")
 
 (define_insn_reservation "znver1_sseimul_avx256_load" 11
                         (and (eq_attr "cpu" "znver1,znver2,znver3")
                              (and (eq_attr "mode" "OI")
                                   (and (eq_attr "type" "sseimul")
                                        (eq_attr "memory" "load"))))
-                        "znver1-double,znver1-load,znver1-fp0*4")
+                        "znver1-double,znver1-load,znver1-fp0*2")
 
 (define_insn_reservation "znver1_sseimul_di" 3 
                         (and (eq_attr "cpu" "znver1,znver2,znver3")
                              (and (eq_attr "mode" "DI")
                                   (and (eq_attr "memory" "none")
                                        (eq_attr "type" "sseimul"))))
-                        "znver1-direct,znver1-fp0*3")
+                        "znver1-direct,znver1-fp0")
 
 (define_insn_reservation "znver1_sseimul_load_di" 10 
                         (and (eq_attr "cpu" "znver1,znver2,znver3")
                              (and (eq_attr "mode" "DI")
                                   (and (eq_attr "type" "sseimul")
                                        (eq_attr "memory" "load"))))
-                        "znver1-direct,znver1-load,znver1-fp0*3")
+                        "znver1-direct,znver1-load,znver1-fp0")
 
 ;; SSE compares
 (define_insn_reservation "znver1_sse_cmp" 1