]> git.ipfire.org Git - thirdparty/gcc.git/commitdiff
rs6000: Prefer assigning the MMA vector operands to altivec registers [PR105556]
authorPeter Bergner <bergner@linux.ibm.com>
Wed, 18 May 2022 02:09:29 +0000 (21:09 -0500)
committerPeter Bergner <bergner@linux.ibm.com>
Wed, 18 May 2022 02:10:27 +0000 (21:10 -0500)
When optimizing the DGEMM kernel in OpenBLAS to use MMA, the MMA code
uses all 8 accumulators, which overlap all vs0-vs31 vector registers.
Current trunk assigns one of the normal vector inputs to one of the MMA
instructions, which forces us to spill one of the accumulators to memory,
leading to poor performance.  The solution here is to replace the "wa"
constraints for the vector input operands in the MMA instruction patterns
with "v,?wa" so that we prefer using the altivec registers vs32-vs63
over the vs0-vs31 registers.

2022-05-17  Peter Bergner  <bergner@linux.ibm.com>
    Segher Boessenkool  <segher@kernel.crashing.org>

gcc/
PR target/105556
* config/rs6000/mma.md (mma_<vv>, mma_<avv>, mma_<pv>, mma_<apv>,
mma_<vvi4i4i8>, mma_<avvi4i4i8>, mma_<vvi4i4i2>, mma_<avvi4i4i2>,
mma_<vvi4i4>, mma_<avvi4i4>, mma_<pvi4i2>, mma_<apvi4i2>,
mma_<vvi4i4i4>, mma_<avvi4i4i4>): Replace "wa" constraints with "v,?wa".
Update other operands accordingly.

gcc/config/rs6000/mma.md

index 907c9d6d516fd9da14782125fe04480ce35e5b6b..a183b6a168afbe2ff4a89bec1d2e332abe82e2ec 100644 (file)
   [(set_attr "type" "mma")])
 
 (define_insn "mma_<vv>"
-  [(set (match_operand:XO 0 "fpr_reg_operand" "=&d")
-       (unspec:XO [(match_operand:V16QI 1 "vsx_register_operand" "wa")
-                   (match_operand:V16QI 2 "vsx_register_operand" "wa")]
+  [(set (match_operand:XO 0 "fpr_reg_operand" "=&d,&d")
+       (unspec:XO [(match_operand:V16QI 1 "vsx_register_operand" "v,?wa")
+                   (match_operand:V16QI 2 "vsx_register_operand" "v,?wa")]
                    MMA_VV))]
   "TARGET_MMA"
   "<vv> %A0,%x1,%x2"
   [(set_attr "type" "mma")])
 
 (define_insn "mma_<avv>"
-  [(set (match_operand:XO 0 "fpr_reg_operand" "=&d")
-       (unspec:XO [(match_operand:XO 1 "fpr_reg_operand" "0")
-                   (match_operand:V16QI 2 "vsx_register_operand" "wa")
-                   (match_operand:V16QI 3 "vsx_register_operand" "wa")]
+  [(set (match_operand:XO 0 "fpr_reg_operand" "=&d,&d")
+       (unspec:XO [(match_operand:XO 1 "fpr_reg_operand" "0,0")
+                   (match_operand:V16QI 2 "vsx_register_operand" "v,?wa")
+                   (match_operand:V16QI 3 "vsx_register_operand" "v,?wa")]
                    MMA_AVV))]
   "TARGET_MMA"
   "<avv> %A0,%x2,%x3"
   [(set_attr "type" "mma")])
 
 (define_insn "mma_<pv>"
-  [(set (match_operand:XO 0 "fpr_reg_operand" "=&d")
-       (unspec:XO [(match_operand:OO 1 "vsx_register_operand" "wa")
-                   (match_operand:V16QI 2 "vsx_register_operand" "wa")]
+  [(set (match_operand:XO 0 "fpr_reg_operand" "=&d,&d")
+       (unspec:XO [(match_operand:OO 1 "vsx_register_operand" "v,?wa")
+                   (match_operand:V16QI 2 "vsx_register_operand" "v,?wa")]
                    MMA_PV))]
   "TARGET_MMA"
   "<pv> %A0,%x1,%x2"
   [(set_attr "type" "mma")])
 
 (define_insn "mma_<apv>"
-  [(set (match_operand:XO 0 "fpr_reg_operand" "=&d")
-       (unspec:XO [(match_operand:XO 1 "fpr_reg_operand" "0")
-                   (match_operand:OO 2 "vsx_register_operand" "wa")
-                   (match_operand:V16QI 3 "vsx_register_operand" "wa")]
+  [(set (match_operand:XO 0 "fpr_reg_operand" "=&d,&d")
+       (unspec:XO [(match_operand:XO 1 "fpr_reg_operand" "0,0")
+                   (match_operand:OO 2 "vsx_register_operand" "v,?wa")
+                   (match_operand:V16QI 3 "vsx_register_operand" "v,?wa")]
                    MMA_APV))]
   "TARGET_MMA"
   "<apv> %A0,%x2,%x3"
   [(set_attr "type" "mma")])
 
 (define_insn "mma_<vvi4i4i8>"
-  [(set (match_operand:XO 0 "fpr_reg_operand" "=&d")
-       (unspec:XO [(match_operand:V16QI 1 "vsx_register_operand" "wa")
-                   (match_operand:V16QI 2 "vsx_register_operand" "wa")
-                   (match_operand:SI 3 "const_0_to_15_operand" "n")
-                   (match_operand:SI 4 "const_0_to_15_operand" "n")
-                   (match_operand:SI 5 "u8bit_cint_operand" "n")]
+  [(set (match_operand:XO 0 "fpr_reg_operand" "=&d,&d")
+       (unspec:XO [(match_operand:V16QI 1 "vsx_register_operand" "v,?wa")
+                   (match_operand:V16QI 2 "vsx_register_operand" "v,?wa")
+                   (match_operand:SI 3 "const_0_to_15_operand" "n,n")
+                   (match_operand:SI 4 "const_0_to_15_operand" "n,n")
+                   (match_operand:SI 5 "u8bit_cint_operand" "n,n")]
                    MMA_VVI4I4I8))]
   "TARGET_MMA"
   "<vvi4i4i8> %A0,%x1,%x2,%3,%4,%5"
    (set_attr "prefixed" "yes")])
 
 (define_insn "mma_<avvi4i4i8>"
-  [(set (match_operand:XO 0 "fpr_reg_operand" "=&d")
-       (unspec:XO [(match_operand:XO 1 "fpr_reg_operand" "0")
-                   (match_operand:V16QI 2 "vsx_register_operand" "wa")
-                   (match_operand:V16QI 3 "vsx_register_operand" "wa")
-                   (match_operand:SI 4 "const_0_to_15_operand" "n")
-                   (match_operand:SI 5 "const_0_to_15_operand" "n")
-                   (match_operand:SI 6 "u8bit_cint_operand" "n")]
+  [(set (match_operand:XO 0 "fpr_reg_operand" "=&d,&d")
+       (unspec:XO [(match_operand:XO 1 "fpr_reg_operand" "0,0")
+                   (match_operand:V16QI 2 "vsx_register_operand" "v,?wa")
+                   (match_operand:V16QI 3 "vsx_register_operand" "v,?wa")
+                   (match_operand:SI 4 "const_0_to_15_operand" "n,n")
+                   (match_operand:SI 5 "const_0_to_15_operand" "n,n")
+                   (match_operand:SI 6 "u8bit_cint_operand" "n,n")]
                    MMA_AVVI4I4I8))]
   "TARGET_MMA"
   "<avvi4i4i8> %A0,%x2,%x3,%4,%5,%6"
    (set_attr "prefixed" "yes")])
 
 (define_insn "mma_<vvi4i4i2>"
-  [(set (match_operand:XO 0 "fpr_reg_operand" "=&d")
-       (unspec:XO [(match_operand:V16QI 1 "vsx_register_operand" "wa")
-                   (match_operand:V16QI 2 "vsx_register_operand" "wa")
-                   (match_operand:SI 3 "const_0_to_15_operand" "n")
-                   (match_operand:SI 4 "const_0_to_15_operand" "n")
-                   (match_operand:SI 5 "const_0_to_3_operand" "n")]
+  [(set (match_operand:XO 0 "fpr_reg_operand" "=&d,&d")
+       (unspec:XO [(match_operand:V16QI 1 "vsx_register_operand" "v,?wa")
+                   (match_operand:V16QI 2 "vsx_register_operand" "v,?wa")
+                   (match_operand:SI 3 "const_0_to_15_operand" "n,n")
+                   (match_operand:SI 4 "const_0_to_15_operand" "n,n")
+                   (match_operand:SI 5 "const_0_to_3_operand" "n,n")]
                    MMA_VVI4I4I2))]
   "TARGET_MMA"
   "<vvi4i4i2> %A0,%x1,%x2,%3,%4,%5"
    (set_attr "prefixed" "yes")])
 
 (define_insn "mma_<avvi4i4i2>"
-  [(set (match_operand:XO 0 "fpr_reg_operand" "=&d")
-       (unspec:XO [(match_operand:XO 1 "fpr_reg_operand" "0")
-                   (match_operand:V16QI 2 "vsx_register_operand" "wa")
-                   (match_operand:V16QI 3 "vsx_register_operand" "wa")
-                   (match_operand:SI 4 "const_0_to_15_operand" "n")
-                   (match_operand:SI 5 "const_0_to_15_operand" "n")
-                   (match_operand:SI 6 "const_0_to_3_operand" "n")]
+  [(set (match_operand:XO 0 "fpr_reg_operand" "=&d,&d")
+       (unspec:XO [(match_operand:XO 1 "fpr_reg_operand" "0,0")
+                   (match_operand:V16QI 2 "vsx_register_operand" "v,?wa")
+                   (match_operand:V16QI 3 "vsx_register_operand" "v,?wa")
+                   (match_operand:SI 4 "const_0_to_15_operand" "n,n")
+                   (match_operand:SI 5 "const_0_to_15_operand" "n,n")
+                   (match_operand:SI 6 "const_0_to_3_operand" "n,n")]
                    MMA_AVVI4I4I2))]
   "TARGET_MMA"
   "<avvi4i4i2> %A0,%x2,%x3,%4,%5,%6"
    (set_attr "prefixed" "yes")])
 
 (define_insn "mma_<vvi4i4>"
-  [(set (match_operand:XO 0 "fpr_reg_operand" "=&d")
-       (unspec:XO [(match_operand:V16QI 1 "vsx_register_operand" "wa")
-                   (match_operand:V16QI 2 "vsx_register_operand" "wa")
-                   (match_operand:SI 3 "const_0_to_15_operand" "n")
-                   (match_operand:SI 4 "const_0_to_15_operand" "n")]
+  [(set (match_operand:XO 0 "fpr_reg_operand" "=&d,&d")
+       (unspec:XO [(match_operand:V16QI 1 "vsx_register_operand" "v,?wa")
+                   (match_operand:V16QI 2 "vsx_register_operand" "v,?wa")
+                   (match_operand:SI 3 "const_0_to_15_operand" "n,n")
+                   (match_operand:SI 4 "const_0_to_15_operand" "n,n")]
                    MMA_VVI4I4))]
   "TARGET_MMA"
   "<vvi4i4> %A0,%x1,%x2,%3,%4"
    (set_attr "prefixed" "yes")])
 
 (define_insn "mma_<avvi4i4>"
-  [(set (match_operand:XO 0 "fpr_reg_operand" "=&d")
-       (unspec:XO [(match_operand:XO 1 "fpr_reg_operand" "0")
-                   (match_operand:V16QI 2 "vsx_register_operand" "wa")
-                   (match_operand:V16QI 3 "vsx_register_operand" "wa")
-                   (match_operand:SI 4 "const_0_to_15_operand" "n")
-                   (match_operand:SI 5 "const_0_to_15_operand" "n")]
+  [(set (match_operand:XO 0 "fpr_reg_operand" "=&d,&d")
+       (unspec:XO [(match_operand:XO 1 "fpr_reg_operand" "0,0")
+                   (match_operand:V16QI 2 "vsx_register_operand" "v,?wa")
+                   (match_operand:V16QI 3 "vsx_register_operand" "v,?wa")
+                   (match_operand:SI 4 "const_0_to_15_operand" "n,n")
+                   (match_operand:SI 5 "const_0_to_15_operand" "n,n")]
                    MMA_AVVI4I4))]
   "TARGET_MMA"
   "<avvi4i4> %A0,%x2,%x3,%4,%5"
    (set_attr "prefixed" "yes")])
 
 (define_insn "mma_<pvi4i2>"
-  [(set (match_operand:XO 0 "fpr_reg_operand" "=&d")
-       (unspec:XO [(match_operand:OO 1 "vsx_register_operand" "wa")
-                   (match_operand:V16QI 2 "vsx_register_operand" "wa")
-                   (match_operand:SI 3 "const_0_to_15_operand" "n")
-                   (match_operand:SI 4 "const_0_to_3_operand" "n")]
+  [(set (match_operand:XO 0 "fpr_reg_operand" "=&d,&d")
+       (unspec:XO [(match_operand:OO 1 "vsx_register_operand" "v,?wa")
+                   (match_operand:V16QI 2 "vsx_register_operand" "v,?wa")
+                   (match_operand:SI 3 "const_0_to_15_operand" "n,n")
+                   (match_operand:SI 4 "const_0_to_3_operand" "n,n")]
                    MMA_PVI4I2))]
   "TARGET_MMA"
   "<pvi4i2> %A0,%x1,%x2,%3,%4"
    (set_attr "prefixed" "yes")])
 
 (define_insn "mma_<apvi4i2>"
-  [(set (match_operand:XO 0 "fpr_reg_operand" "=&d")
-       (unspec:XO [(match_operand:XO 1 "fpr_reg_operand" "0")
-                   (match_operand:OO 2 "vsx_register_operand" "wa")
-                   (match_operand:V16QI 3 "vsx_register_operand" "wa")
-                   (match_operand:SI 4 "const_0_to_15_operand" "n")
-                   (match_operand:SI 5 "const_0_to_3_operand" "n")]
+  [(set (match_operand:XO 0 "fpr_reg_operand" "=&d,&d")
+       (unspec:XO [(match_operand:XO 1 "fpr_reg_operand" "0,0")
+                   (match_operand:OO 2 "vsx_register_operand" "v,?wa")
+                   (match_operand:V16QI 3 "vsx_register_operand" "v,?wa")
+                   (match_operand:SI 4 "const_0_to_15_operand" "n,n")
+                   (match_operand:SI 5 "const_0_to_3_operand" "n,n")]
                    MMA_APVI4I2))]
   "TARGET_MMA"
   "<apvi4i2> %A0,%x2,%x3,%4,%5"
    (set_attr "prefixed" "yes")])
 
 (define_insn "mma_<vvi4i4i4>"
-  [(set (match_operand:XO 0 "fpr_reg_operand" "=&d")
-       (unspec:XO [(match_operand:V16QI 1 "vsx_register_operand" "wa")
-                   (match_operand:V16QI 2 "vsx_register_operand" "wa")
-                   (match_operand:SI 3 "const_0_to_15_operand" "n")
-                   (match_operand:SI 4 "const_0_to_15_operand" "n")
-                   (match_operand:SI 5 "const_0_to_15_operand" "n")]
+  [(set (match_operand:XO 0 "fpr_reg_operand" "=&d,&d")
+       (unspec:XO [(match_operand:V16QI 1 "vsx_register_operand" "v,?wa")
+                   (match_operand:V16QI 2 "vsx_register_operand" "v,?wa")
+                   (match_operand:SI 3 "const_0_to_15_operand" "n,n")
+                   (match_operand:SI 4 "const_0_to_15_operand" "n,n")
+                   (match_operand:SI 5 "const_0_to_15_operand" "n,n")]
                    MMA_VVI4I4I4))]
   "TARGET_MMA"
   "<vvi4i4i4> %A0,%x1,%x2,%3,%4,%5"
    (set_attr "prefixed" "yes")])
 
 (define_insn "mma_<avvi4i4i4>"
-  [(set (match_operand:XO 0 "fpr_reg_operand" "=&d")
-       (unspec:XO [(match_operand:XO 1 "fpr_reg_operand" "0")
-                   (match_operand:V16QI 2 "vsx_register_operand" "wa")
-                   (match_operand:V16QI 3 "vsx_register_operand" "wa")
-                   (match_operand:SI 4 "const_0_to_15_operand" "n")
-                   (match_operand:SI 5 "const_0_to_15_operand" "n")
-                   (match_operand:SI 6 "const_0_to_15_operand" "n")]
+  [(set (match_operand:XO 0 "fpr_reg_operand" "=&d,&d")
+       (unspec:XO [(match_operand:XO 1 "fpr_reg_operand" "0,0")
+                   (match_operand:V16QI 2 "vsx_register_operand" "v,?wa")
+                   (match_operand:V16QI 3 "vsx_register_operand" "v,?wa")
+                   (match_operand:SI 4 "const_0_to_15_operand" "n,n")
+                   (match_operand:SI 5 "const_0_to_15_operand" "n,n")
+                   (match_operand:SI 6 "const_0_to_15_operand" "n,n")]
                    MMA_AVVI4I4I4))]
   "TARGET_MMA"
   "<avvi4i4i4> %A0,%x2,%x3,%4,%5,%6"