]> git.ipfire.org Git - thirdparty/gcc.git/commitdiff
target/119010 - more DFmode handling in zn4zn5 reservations
authorRichard Biener <rguenther@suse.de>
Thu, 27 Mar 2025 07:21:10 +0000 (08:21 +0100)
committerRichard Biener <rguenth@gcc.gnu.org>
Thu, 27 Mar 2025 13:31:55 +0000 (14:31 +0100)
The following adds DFmode where V1DFmode and SFmode were handled.
This resolves missing reservations for adds, subs [with memory]
and for FMAs for the testcase I'm looking at.  Resolved cases are

-;;      16--> b  0: i 237 xmm3=xmm3+[r9*0x8+si]                   :nothing
-;;      29--> b  0: i 246 xmm3=xmm3+xmm1                          :nothing
-;;      46--> b  0: i 296 xmm1=xmm1-xmm3                          :nothing

I've done search-and-replace for this, the catched cases look reasonable
though I'm of course not sure all of them can actually happen.

This also fixes the matched type for the znver{4,5}_sse_muladd_load
reservations from sseshuf to ssemuladd, resolving

-;;       1--> b  0: i 161 xmm0={-xmm0*xmm27+[cx+ax]}              :nothing
-;;      22--> b  0: i 229 xmm11={-xmm11*xmm7+[di*0x8+dx]}         :nothing

PR target/119010
* config/i386/zn4zn5.md (znver4_sse_add, znver4_sse_add_load,
znver5_sse_add_load, znver4_sse_add1, znver4_sse_add1_load,
znver5_sse_add1_load, znver4_sse_mul, znver4_sse_mul_load,
znver5_sse_mul_load, znver4_sse_cvt, znver4_sse_cvt_load,
znver5_sse_cvt_load, znver4_sse_shuf, znver5_sse_shuf,
znver4_sse_shuf_load, znver5_sse_shuf_load,
znver4_sse_cmp_avx128, znver5_sse_cmp_avx128,
znver4_sse_cmp_avx128_load, znver5_sse_cmp_avx128_load):
Also handle DFmode.
(znver4_sse_muladd_load, znver5_sse_muladd_load): Use
ssemuladd type.

gcc/config/i386/zn4zn5.md

index 954cdc528d6601a3edb91c708b5920e79290762d..5a3960e9d010f542872c3f6342f445d99df70c12 100644 (file)
 (define_insn_reservation "znver4_sse_add" 3
                         (and (eq_attr "cpu" "znver4,znver5")
                              (and (eq_attr "type" "sseadd")
-                                  (and (eq_attr "mode" "V8SF,V4DF,V4SF,V2DF,V2SF,V1DF,SF")
+                                  (and (eq_attr "mode" "V8SF,V4DF,V4SF,V2DF,V2SF,V1DF,DF,SF")
                                    (eq_attr "memory" "none"))))
                         "znver4-direct,znver4-fpu2|znver4-fpu3")
 
 (define_insn_reservation "znver4_sse_add_load" 8
                         (and (eq_attr "cpu" "znver4")
                              (and (eq_attr "type" "sseadd")
-                                  (and (eq_attr "mode" "V8SF,V4DF,V4SF,V2DF,V2SF,V1DF,SF")
+                                  (and (eq_attr "mode" "V8SF,V4DF,V4SF,V2DF,V2SF,V1DF,DF,SF")
                                    (eq_attr "memory" "load"))))
                         "znver4-direct,znver4-load,znver4-fpu2|znver4-fpu3")
 
 (define_insn_reservation "znver5_sse_add_load" 8
                         (and (eq_attr "cpu" "znver5")
                              (and (eq_attr "type" "sseadd")
-                                  (and (eq_attr "mode" "V8SF,V4DF,V4SF,V2DF,V2SF,V1DF,SF")
+                                  (and (eq_attr "mode" "V8SF,V4DF,V4SF,V2DF,V2SF,V1DF,DF,SF")
                                    (eq_attr "memory" "load"))))
                         "znver4-direct,znver5-load,znver4-fpu2|znver4-fpu3")
 
 (define_insn_reservation "znver4_sse_add1" 4
                         (and (eq_attr "cpu" "znver4,znver5")
                              (and (eq_attr "type" "sseadd1")
-                                  (and (eq_attr "mode" "V8SF,V4DF,V4SF,V2DF,V2SF,V1DF,SF")
+                                  (and (eq_attr "mode" "V8SF,V4DF,V4SF,V2DF,V2SF,V1DF,DF,SF")
                                    (eq_attr "memory" "none"))))
                         "znver4-vector,znver4-fvector*2")
 
 (define_insn_reservation "znver4_sse_add1_load" 9
                         (and (eq_attr "cpu" "znver4")
                              (and (eq_attr "type" "sseadd1")
-                                  (and (eq_attr "mode" "V8SF,V4DF,V4SF,V2DF,V2SF,V1DF,SF")
+                                  (and (eq_attr "mode" "V8SF,V4DF,V4SF,V2DF,V2SF,V1DF,DF,SF")
                                    (eq_attr "memory" "load"))))
                         "znver4-vector,znver4-load,znver4-fvector*2")
 
 (define_insn_reservation "znver5_sse_add1_load" 9
                         (and (eq_attr "cpu" "znver5")
                              (and (eq_attr "type" "sseadd1")
-                                  (and (eq_attr "mode" "V8SF,V4DF,V4SF,V2DF,V2SF,V1DF,SF")
+                                  (and (eq_attr "mode" "V8SF,V4DF,V4SF,V2DF,V2SF,V1DF,DF,SF")
                                    (eq_attr "memory" "load"))))
                         "znver4-vector,znver5-load,znver4-fvector*2")
 
 (define_insn_reservation "znver4_sse_mul" 3
                         (and (eq_attr "cpu" "znver4,znver5")
                              (and (eq_attr "type" "ssemul")
-                                  (and (eq_attr "mode" "V8SF,V4DF,V4SF,V2DF,V2SF,V1DF,SF")
+                                  (and (eq_attr "mode" "V8SF,V4DF,V4SF,V2DF,V2SF,V1DF,DF,SF")
                                    (eq_attr "memory" "none"))))
                         "znver4-direct,znver4-fpu0|znver4-fpu1")
 
 (define_insn_reservation "znver4_sse_mul_load" 8
                         (and (eq_attr "cpu" "znver4")
                              (and (eq_attr "type" "ssemul")
-                                  (and (eq_attr "mode" "V8SF,V4DF,V4SF,V2DF,V2SF,V1DF,SF")
+                                  (and (eq_attr "mode" "V8SF,V4DF,V4SF,V2DF,V2SF,V1DF,DF,SF")
                                    (eq_attr "memory" "load"))))
                         "znver4-direct,znver4-load,znver4-fpu0|znver4-fpu1")
 
 (define_insn_reservation "znver5_sse_mul_load" 8
                         (and (eq_attr "cpu" "znver5")
                              (and (eq_attr "type" "ssemul")
-                                  (and (eq_attr "mode" "V8SF,V4DF,V4SF,V2DF,V2SF,V1DF,SF")
+                                  (and (eq_attr "mode" "V8SF,V4DF,V4SF,V2DF,V2SF,V1DF,DF,SF")
                                    (eq_attr "memory" "load"))))
                         "znver4-direct,znver5-load,znver4-fpu0|znver4-fpu1")
 
 (define_insn_reservation "znver4_sse_cvt" 3
                         (and (eq_attr "cpu" "znver4,znver5")
                              (and (eq_attr "type" "ssecvt")
-                                  (and (eq_attr "mode" "V8SF,V4DF,V4SF,V2DF,V2SF,V1DF,SF")
+                                  (and (eq_attr "mode" "V8SF,V4DF,V4SF,V2DF,V2SF,V1DF,DF,SF")
                                    (eq_attr "memory" "none"))))
                         "znver4-direct,znver4-fpu2|znver4-fpu3")
 
 (define_insn_reservation "znver4_sse_cvt_load" 8
                         (and (eq_attr "cpu" "znver4")
                              (and (eq_attr "type" "ssecvt")
-                                  (and (eq_attr "mode" "V8SF,V4DF,V4SF,V2DF,V2SF,V1DF,SF")
+                                  (and (eq_attr "mode" "V8SF,V4DF,V4SF,V2DF,V2SF,V1DF,DF,SF")
                                    (eq_attr "memory" "load"))))
                         "znver4-direct,znver4-load,znver4-fpu2|znver4-fpu3")
 
 (define_insn_reservation "znver5_sse_cvt_load" 8
                         (and (eq_attr "cpu" "znver5")
                              (and (eq_attr "type" "ssecvt")
-                                  (and (eq_attr "mode" "V8SF,V4DF,V4SF,V2DF,V2SF,V1DF,SF")
+                                  (and (eq_attr "mode" "V8SF,V4DF,V4SF,V2DF,V2SF,V1DF,DF,SF")
                                    (eq_attr "memory" "load"))))
                         "znver4-direct,znver5-load,znver4-fpu2|znver4-fpu3")
 
 (define_insn_reservation "znver4_sse_shuf" 1
                         (and (eq_attr "cpu" "znver4")
                              (and (eq_attr "type" "sseshuf")
-                                  (and (eq_attr "mode" "V8SF,V4DF,V4SF,V2DF,V2SF,V1DF,SF")
+                                  (and (eq_attr "mode" "V8SF,V4DF,V4SF,V2DF,V2SF,V1DF,DF,SF")
                                    (eq_attr "memory" "none"))))
                         "znver4-direct,znver4-fpu1|znver4-fpu2")
 
 (define_insn_reservation "znver5_sse_shuf" 1
                         (and (eq_attr "cpu" "znver5")
                              (and (eq_attr "type" "sseshuf")
-                                  (and (eq_attr "mode" "V8SF,V4DF,V4SF,V2DF,V2SF,V1DF,SF")
+                                  (and (eq_attr "mode" "V8SF,V4DF,V4SF,V2DF,V2SF,V1DF,DF,SF")
                                    (eq_attr "memory" "none"))))
                         "znver4-direct,znver4-fpu1|znver4-fpu2|znver4-fpu3")
 
 (define_insn_reservation "znver4_sse_shuf_load" 6
                         (and (eq_attr "cpu" "znver4")
                              (and (eq_attr "type" "sseshuf")
-                                  (and (eq_attr "mode" "V8SF,V4DF,V4SF,V2DF,V2SF,V1DF,SF")
+                                  (and (eq_attr "mode" "V8SF,V4DF,V4SF,V2DF,V2SF,V1DF,DF,SF")
                                    (eq_attr "memory" "load"))))
                         "znver4-direct,znver4-load,znver4-fpu")
 
 (define_insn_reservation "znver5_sse_shuf_load" 6
                         (and (eq_attr "cpu" "znver5")
                              (and (eq_attr "type" "sseshuf")
-                                  (and (eq_attr "mode" "V8SF,V4DF,V4SF,V2DF,V2SF,V1DF,SF")
+                                  (and (eq_attr "mode" "V8SF,V4DF,V4SF,V2DF,V2SF,V1DF,DF,SF")
                                    (eq_attr "memory" "load"))))
                         "znver4-direct,znver5-load,znver4-fpu")
 
 (define_insn_reservation "znver4_sse_cmp_avx128" 3
                         (and (eq_attr "cpu" "znver4")
                              (and (eq_attr "type" "ssecmp")
-                                  (and (eq_attr "mode" "V4SF,V2DF,V2SF,V1DF,SF")
+                                  (and (eq_attr "mode" "V4SF,V2DF,V2SF,V1DF,DF,SF")
                                    (and (eq_attr "prefix" "evex")
                                         (eq_attr "memory" "none")))))
                         "znver4-direct,znver4-fpu0*2|znver4-fpu1*2")
 (define_insn_reservation "znver5_sse_cmp_avx128" 3
                         (and (eq_attr "cpu" "znver5")
                              (and (eq_attr "type" "ssecmp")
-                                  (and (eq_attr "mode" "V4SF,V2DF,V2SF,V1DF,SF")
+                                  (and (eq_attr "mode" "V4SF,V2DF,V2SF,V1DF,DF,SF")
                                    (and (eq_attr "prefix" "evex")
                                         (eq_attr "memory" "none")))))
                         "znver4-direct,znver4-fpu1|znver4-fpu2")
 (define_insn_reservation "znver4_sse_cmp_avx128_load" 9
                         (and (eq_attr "cpu" "znver4")
                              (and (eq_attr "type" "ssecmp")
-                                  (and (eq_attr "mode" "V4SF,V2DF,V2SF,V1DF,SF")
+                                  (and (eq_attr "mode" "V4SF,V2DF,V2SF,V1DF,DF,SF")
                                    (and (eq_attr "prefix" "evex")
                                         (eq_attr "memory" "load")))))
                         "znver4-direct,znver4-load,znver4-fpu0*2|znver4-fpu1*2")
 (define_insn_reservation "znver5_sse_cmp_avx128_load" 9
                         (and (eq_attr "cpu" "znver5")
                              (and (eq_attr "type" "ssecmp")
-                                  (and (eq_attr "mode" "V4SF,V2DF,V2SF,V1DF,SF")
+                                  (and (eq_attr "mode" "V4SF,V2DF,V2SF,V1DF,DF,SF")
                                    (and (eq_attr "prefix" "evex")
                                         (eq_attr "memory" "load")))))
                         "znver4-direct,znver5-load,znver4-fpu1|znver4-fpu2")
 
 (define_insn_reservation "znver4_sse_muladd_load" 10
                         (and (eq_attr "cpu" "znver4")
-                             (and (eq_attr "type" "sseshuf")
+                             (and (eq_attr "type" "ssemuladd")
                                   (eq_attr "memory" "load")))
                         "znver4-direct,znver4-load,znver4-fpu0*2|znver4-fpu1*2")
 
 (define_insn_reservation "znver5_sse_muladd_load" 10
                         (and (eq_attr "cpu" "znver5")
-                             (and (eq_attr "type" "sseshuf")
+                             (and (eq_attr "type" "ssemuladd")
                                   (eq_attr "memory" "load")))
                         "znver4-direct,znver5-load,znver4-fpu1|znver4-fpu2")