]> git.ipfire.org Git - thirdparty/gcc.git/commitdiff
re PR middle-end/54400 (recognize vector reductions)
authorMarc Glisse <marc.glisse@inria.fr>
Mon, 8 Oct 2012 20:45:56 +0000 (22:45 +0200)
committerMarc Glisse <glisse@gcc.gnu.org>
Mon, 8 Oct 2012 20:45:56 +0000 (20:45 +0000)
2012-10-08  Marc Glisse  <marc.glisse@inria.fr>

gcc/
PR target/54400
* config/i386/i386.md (type attribute): Add sseadd1.
(unit attribute): Add support for sseadd1.
(memory attribute): Likewise.
* config/i386/athlon.md: Likewise.
* config/i386/core2.md: Likewise.
* config/i386/atom.md: Likewise.
* config/i386/ppro.md: Likewise.
* config/i386/bdver1.md: Likewise.
* config/i386/sse.md (sse3_h<plusminus_insn>v2df3): split into...
(sse3_haddv2df3): ... expander.
(*sse3_haddv2df3): ... define_insn. Accept permuted operands.
(sse3_hsubv2df3): ... define_insn.
(*sse3_haddv2df3_low): New define_insn.
(*sse3_hsubv2df3_low): New define_insn.

gcc/testsuite/
PR target/54400
* gcc.target/i386/pr54400.c: New testcase.

From-SVN: r192223

gcc/ChangeLog
gcc/config/i386/athlon.md
gcc/config/i386/atom.md
gcc/config/i386/bdver1.md
gcc/config/i386/core2.md
gcc/config/i386/i386.md
gcc/config/i386/ppro.md
gcc/config/i386/sse.md
gcc/testsuite/ChangeLog
gcc/testsuite/gcc.target/i386/pr54400.c [new file with mode: 0644]

index ba74c1bbc1f47509fed5068cf4d1c784d000c00c..afa36d19c8f28bd94e08c140ea6b1212270154e0 100644 (file)
@@ -1,3 +1,21 @@
+2012-10-08  Marc Glisse  <marc.glisse@inria.fr>
+
+       PR target/54400
+       * config/i386/i386.md (type attribute): Add sseadd1.
+       (unit attribute): Add support for sseadd1.
+       (memory attribute): Likewise.
+       * config/i386/athlon.md: Likewise.
+       * config/i386/core2.md: Likewise.
+       * config/i386/atom.md: Likewise.
+       * config/i386/ppro.md: Likewise.
+       * config/i386/bdver1.md: Likewise.
+       * config/i386/sse.md (sse3_h<plusminus_insn>v2df3): split into...
+       (sse3_haddv2df3): ... expander.
+       (*sse3_haddv2df3): ... define_insn. Accept permuted operands.
+       (sse3_hsubv2df3): ... define_insn.
+       (*sse3_haddv2df3_low): New define_insn.
+       (*sse3_hsubv2df3_low): New define_insn.
+
 2012-10-08  Jan Hubicka  <jh@suse.cz>
 
        * loop-unswitch.c (unswitch_single_loop): Use
index 401cb0daf766013dee6e8d0546fedb3a8d77104e..1a2d607150bef0510ac4eca371228df5d84b180a 100644 (file)
                         "athlon-direct,athlon-fpsched,athlon-fadd")
 (define_insn_reservation "athlon_sseadd_load" 4
                         (and (eq_attr "cpu" "athlon")
-                             (and (eq_attr "type" "sseadd")
+                             (and (eq_attr "type" "sseadd,sseadd1")
                                   (and (eq_attr "mode" "SF,DF,DI")
                                        (eq_attr "memory" "load"))))
                         "athlon-direct,athlon-fpload,athlon-fadd")
 (define_insn_reservation "athlon_sseadd_load_k8" 6
                         (and (eq_attr "cpu" "k8,generic64,amdfam10")
-                             (and (eq_attr "type" "sseadd")
+                             (and (eq_attr "type" "sseadd,sseadd1")
                                   (and (eq_attr "mode" "SF,DF,DI")
                                        (eq_attr "memory" "load"))))
                         "athlon-direct,athlon-fploadk8,athlon-fadd")
 (define_insn_reservation "athlon_sseadd" 4
                         (and (eq_attr "cpu" "athlon,k8,generic64,amdfam10")
-                             (and (eq_attr "type" "sseadd")
+                             (and (eq_attr "type" "sseadd,sseadd1")
                                   (eq_attr "mode" "SF,DF,DI")))
                         "athlon-direct,athlon-fpsched,athlon-fadd")
 (define_insn_reservation "athlon_sseaddvector_load" 5
                         (and (eq_attr "cpu" "athlon")
-                             (and (eq_attr "type" "sseadd")
+                             (and (eq_attr "type" "sseadd,sseadd1")
                                   (eq_attr "memory" "load")))
                         "athlon-vector,athlon-fpload2,(athlon-fadd*2)")
 (define_insn_reservation "athlon_sseaddvector_load_k8" 7
                         (and (eq_attr "cpu" "k8,generic64")
-                             (and (eq_attr "type" "sseadd")
+                             (and (eq_attr "type" "sseadd,sseadd1")
                                   (eq_attr "memory" "load")))
                         "athlon-double,athlon-fpload2k8,(athlon-fadd*2)")
 (define_insn_reservation "athlon_sseaddvector_load_amdfam10" 6
                         (and (eq_attr "cpu" "amdfam10")
-                             (and (eq_attr "type" "sseadd")
+                             (and (eq_attr "type" "sseadd,sseadd1")
                                   (eq_attr "memory" "load")))
                         "athlon-direct,athlon-fploadk8,athlon-fadd")
 (define_insn_reservation "athlon_sseaddvector" 5
                         (and (eq_attr "cpu" "athlon")
-                             (eq_attr "type" "sseadd"))
+                             (eq_attr "type" "sseadd,sseadd1"))
                         "athlon-vector,athlon-fpsched,(athlon-fadd*2)")
 (define_insn_reservation "athlon_sseaddvector_k8" 5
                         (and (eq_attr "cpu" "k8,generic64")
-                             (eq_attr "type" "sseadd"))
+                             (eq_attr "type" "sseadd,sseadd1"))
                         "athlon-double,athlon-fpsched,(athlon-fadd*2)")
 (define_insn_reservation "athlon_sseaddvector_amdfam10" 4
                         (and (eq_attr "cpu" "amdfam10")
-                             (eq_attr "type" "sseadd"))
+                             (eq_attr "type" "sseadd,sseadd1"))
                         "athlon-direct,athlon-fpsched,athlon-fadd")
 
 ;; Conversions behaves very irregularly and the scheduling is critical here.
index 3c2b9575857071ba6a03be3c675c249af53277ca..f24fd5ca824957d0d0aaa63bbc079d9d857bdb6c 100644 (file)
 ;; no memory simple
 (define_insn_reservation  "atom_sseadd" 5
   (and (eq_attr "cpu" "atom")
-       (and (eq_attr "type" "sseadd")
+       (and (eq_attr "type" "sseadd,sseadd1")
             (and (eq_attr "memory" "none")
                  (and (eq_attr "mode" "!V2DF")
                       (eq_attr "atom_unit" "!complex")))))
 ;; memory simple
 (define_insn_reservation  "atom_sseadd_mem" 5
   (and (eq_attr "cpu" "atom")
-       (and (eq_attr "type" "sseadd")
+       (and (eq_attr "type" "sseadd,sseadd1")
             (and (eq_attr "memory" "!none")
                  (and (eq_attr "mode" "!V2DF")
                       (eq_attr "atom_unit" "!complex")))))
 ;; maxps, minps, *pd, hadd, hsub
 (define_insn_reservation  "atom_sseadd_3" 8
   (and (eq_attr "cpu" "atom")
-       (and (eq_attr "type" "sseadd")
+       (and (eq_attr "type" "sseadd,sseadd1")
             (ior (eq_attr "mode" "V2DF") (eq_attr "atom_unit" "complex"))))
   "atom-complex, atom-all-eu*7")
 
index 10f95ffa75eaeaa2649d661ee37268d64950a6ed..2367785ff735f634c1d1e0c4e750d6051ee2a372 100644 (file)
 ;; SSE MUL, ADD, and MULADD.
 (define_insn_reservation "bdver1_ssemuladd_load_256" 11
                         (and (eq_attr "cpu" "bdver1,bdver2")
-                             (and (eq_attr "type" "ssemul,sseadd,ssemuladd")
+                             (and (eq_attr "type" "ssemul,sseadd,sseadd1,ssemuladd")
                                   (and (eq_attr "mode" "V8SF,V4DF")
                                        (eq_attr "memory" "load"))))
                         "bdver1-double,bdver1-fpload,bdver1-ffma")
 (define_insn_reservation "bdver1_ssemuladd_256" 7
                         (and (eq_attr "cpu" "bdver1,bdver2")
-                             (and (eq_attr "type" "ssemul,sseadd,ssemuladd")
+                             (and (eq_attr "type" "ssemul,sseadd,sseadd1,ssemuladd")
                                   (and (eq_attr "mode" "V8SF,V4DF")
                                        (eq_attr "memory" "none"))))
                         "bdver1-double,bdver1-fpsched,bdver1-ffma")
 (define_insn_reservation "bdver1_ssemuladd_load" 10
                         (and (eq_attr "cpu" "bdver1,bdver2")
-                             (and (eq_attr "type" "ssemul,sseadd,ssemuladd")
+                             (and (eq_attr "type" "ssemul,sseadd,sseadd1,ssemuladd")
                                   (eq_attr "memory" "load")))
                         "bdver1-direct,bdver1-fpload,bdver1-ffma")
 (define_insn_reservation "bdver1_ssemuladd" 6
                         (and (eq_attr "cpu" "bdver1,bdver2")
-                             (and (eq_attr "type" "ssemul,sseadd,ssemuladd")
+                             (and (eq_attr "type" "ssemul,sseadd,sseadd1,ssemuladd")
                                   (eq_attr "memory" "none")))
                         "bdver1-direct,bdver1-fpsched,bdver1-ffma")
 (define_insn_reservation "bdver1_sseimul_load" 8
index d154cdc077f2431736c58aeb5ca5fac1dbb6e050..5abc77b62a1fc23be8efb456843ec0a0781f44ec 100644 (file)
@@ -36,7 +36,7 @@
   (cond [(eq_attr "type" "fmov,fop,fsgn,fmul,fdiv,fpspc,fcmov,fcmp,fxch,fistp,fisttp,frndint")
           (const_string "float")
         (eq_attr "type" "sselog,sselog1,sseiadd,sseiadd1,sseishft,sseishft1,sseimul,
-                         sse,ssemov,sseadd,ssemul,ssecmp,ssecomi,ssecvt,
+                         sse,ssemov,sseadd,sseadd1,ssemul,ssecmp,ssecomi,ssecvt,
                          ssecvt1,sseicvt,ssediv,sseins,ssemuladd,sse4arg")
           (cond [(eq_attr "mode" "V4DF,V8SF,V2DF,V4SF,SF,DF")
                    (const_string "float")
 (define_insn_reservation "c2_sse_addcmp" 3
                         (and (eq_attr "cpu" "core2,corei7")
                              (and (eq_attr "memory" "none")
-                                  (eq_attr "type" "sseadd,ssecmp,ssecomi")))
+                                  (eq_attr "type" "sseadd,sseadd1,ssecmp,ssecomi")))
                         "c2_decodern,c2_p1")
 
 (define_insn_reservation "c2_sse_addcmp_load" 3
                         (and (eq_attr "cpu" "core2,corei7")
                              (and (eq_attr "memory" "load")
-                                  (eq_attr "type" "sseadd,ssecmp,ssecomi")))
+                                  (eq_attr "type" "sseadd,sseadd1,ssecmp,ssecomi")))
                         "c2_decodern,c2_p2+c2_p1")
 
 (define_insn_reservation "c2_sse_mul_SF" 4
index 5886478e3fff0fe6fa0ae2e18fec41ad759f333f..fa10cb4a42760e7257448cb743f1512180e6fee2 100644 (file)
    str,bitmanip,
    fmov,fop,fsgn,fmul,fdiv,fpspc,fcmov,fcmp,fxch,fistp,fisttp,frndint,
    sselog,sselog1,sseiadd,sseiadd1,sseishft,sseishft1,sseimul,
-   sse,ssemov,sseadd,ssemul,ssecmp,ssecomi,ssecvt,ssecvt1,sseicvt,ssediv,sseins,
-   ssemuladd,sse4arg,lwp,
+   sse,ssemov,sseadd,sseadd1,ssemul,ssecmp,ssecomi,ssecvt,ssecvt1,sseicvt,
+   ssediv,sseins,ssemuladd,sse4arg,lwp,
    mmx,mmxmov,mmxadd,mmxmul,mmxcmp,mmxcvt,mmxshft"
   (const_string "other"))
 
   (cond [(eq_attr "type" "fmov,fop,fsgn,fmul,fdiv,fpspc,fcmov,fcmp,fxch,fistp,fisttp,frndint")
           (const_string "i387")
         (eq_attr "type" "sselog,sselog1,sseiadd,sseiadd1,sseishft,sseishft1,sseimul,
-                         sse,ssemov,sseadd,ssemul,ssecmp,ssecomi,ssecvt,
+                         sse,ssemov,sseadd,sseadd1,ssemul,ssecmp,ssecomi,ssecvt,
                          ssecvt1,sseicvt,ssediv,sseins,ssemuladd,sse4arg")
           (const_string "sse")
         (eq_attr "type" "mmx,mmxmov,mmxadd,mmxmul,mmxcmp,mmxcvt,mmxshft")
                   imov,imovx,icmp,test,bitmanip,
                   fmov,fcmp,fsgn,
                   sse,ssemov,ssecmp,ssecomi,ssecvt,ssecvt1,sseicvt,sselog1,
-                  sseiadd1,mmx,mmxmov,mmxcmp,mmxcvt")
+                  sseadd1,sseiadd1,mmx,mmxmov,mmxcmp,mmxcvt")
              (match_operand 2 "memory_operand"))
           (const_string "load")
         (and (eq_attr "type" "icmov,ssemuladd,sse4arg")
index bc1cb59d1c61f2ced0e873a8bab11544b0ce4c8e..f82b694beaddf60fdcda9d59528f3bc40eb399ec 100644 (file)
                         (and (eq_attr "cpu" "pentiumpro")
                              (and (eq_attr "memory" "none")
                                   (and (eq_attr "mode" "SF")
-                                       (eq_attr "type" "sseadd"))))
+                                       (eq_attr "type" "sseadd,sseadd1"))))
                         "decodern,p1")
 
 (define_insn_reservation "ppro_sse_add_SF_load" 3
                         (and (eq_attr "cpu" "pentiumpro")
                              (and (eq_attr "memory" "load")
                                   (and (eq_attr "mode" "SF")
-                                       (eq_attr "type" "sseadd"))))
+                                       (eq_attr "type" "sseadd,sseadd1"))))
                         "decoder0,p2+p1")
 
 (define_insn_reservation "ppro_sse_cmp_SF" 3
                         (and (eq_attr "cpu" "pentiumpro")
                              (and (eq_attr "memory" "none")
                                   (and (eq_attr "mode" "V4SF")
-                                       (eq_attr "type" "sseadd"))))
+                                       (eq_attr "type" "sseadd,sseadd1"))))
                         "decoder0,p1*2")
 
 (define_insn_reservation "ppro_sse_add_V4SF_load" 3
                         (and (eq_attr "cpu" "pentiumpro")
                              (and (eq_attr "memory" "load")
                                   (and (eq_attr "mode" "V4SF")
-                                       (eq_attr "type" "sseadd"))))
+                                       (eq_attr "type" "sseadd,sseadd1"))))
                         "decoder0,(p2+p1)*2")
 
 (define_insn_reservation "ppro_sse_cmp_V4SF" 3
index d7fadd0df1f343f2944a10feba653ec3b11908f8..a73c815eb56964093d66c510a57a028d9f2628e2 100644 (file)
    (set_attr "prefix" "vex")
    (set_attr "mode" "V4DF")])
 
-(define_insn "sse3_h<plusminus_insn>v2df3"
+(define_expand "sse3_haddv2df3"
+  [(set (match_operand:V2DF 0 "register_operand")
+       (vec_concat:V2DF
+         (plus:DF
+           (vec_select:DF
+             (match_operand:V2DF 1 "register_operand")
+             (parallel [(const_int 0)]))
+           (vec_select:DF (match_dup 1) (parallel [(const_int 1)])))
+         (plus:DF
+           (vec_select:DF
+             (match_operand:V2DF 2 "nonimmediate_operand")
+             (parallel [(const_int 0)]))
+           (vec_select:DF (match_dup 2) (parallel [(const_int 1)])))))]
+  "TARGET_SSE3")
+
+(define_insn "*sse3_haddv2df3"
+  [(set (match_operand:V2DF 0 "register_operand" "=x,x")
+       (vec_concat:V2DF
+         (plus:DF
+           (vec_select:DF
+             (match_operand:V2DF 1 "register_operand" "0,x")
+             (parallel [(match_operand:SI 3 "const_0_to_1_operand")]))
+           (vec_select:DF
+             (match_dup 1)
+             (parallel [(match_operand:SI 4 "const_0_to_1_operand")])))
+         (plus:DF
+           (vec_select:DF
+             (match_operand:V2DF 2 "nonimmediate_operand" "xm,xm")
+             (parallel [(match_operand:SI 5 "const_0_to_1_operand")]))
+           (vec_select:DF
+             (match_dup 2)
+             (parallel [(match_operand:SI 6 "const_0_to_1_operand")])))))]
+  "TARGET_SSE3
+   && INTVAL (operands[3]) != INTVAL (operands[4])
+   && INTVAL (operands[5]) != INTVAL (operands[6])"
+  "@
+   haddpd\t{%2, %0|%0, %2}
+   vhaddpd\t{%2, %1, %0|%0, %1, %2}"
+  [(set_attr "isa" "noavx,avx")
+   (set_attr "type" "sseadd")
+   (set_attr "prefix" "orig,vex")
+   (set_attr "mode" "V2DF")])
+
+(define_insn "sse3_hsubv2df3"
   [(set (match_operand:V2DF 0 "register_operand" "=x,x")
        (vec_concat:V2DF
-         (plusminus:DF
+         (minus:DF
            (vec_select:DF
              (match_operand:V2DF 1 "register_operand" "0,x")
              (parallel [(const_int 0)]))
            (vec_select:DF (match_dup 1) (parallel [(const_int 1)])))
-         (plusminus:DF
+         (minus:DF
            (vec_select:DF
              (match_operand:V2DF 2 "nonimmediate_operand" "xm,xm")
              (parallel [(const_int 0)]))
            (vec_select:DF (match_dup 2) (parallel [(const_int 1)])))))]
   "TARGET_SSE3"
   "@
-   h<plusminus_mnemonic>pd\t{%2, %0|%0, %2}
-   vh<plusminus_mnemonic>pd\t{%2, %1, %0|%0, %1, %2}"
+   hsubpd\t{%2, %0|%0, %2}
+   vhsubpd\t{%2, %1, %0|%0, %1, %2}"
   [(set_attr "isa" "noavx,avx")
    (set_attr "type" "sseadd")
    (set_attr "prefix" "orig,vex")
    (set_attr "mode" "V2DF")])
 
+(define_insn "*sse3_haddv2df3_low"
+  [(set (match_operand:DF 0 "register_operand" "=x,x")
+       (plus:DF
+         (vec_select:DF
+           (match_operand:V2DF 1 "register_operand" "0,x")
+           (parallel [(match_operand:SI 2 "const_0_to_1_operand")]))
+         (vec_select:DF
+           (match_dup 1)
+           (parallel [(match_operand:SI 3 "const_0_to_1_operand")]))))]
+  "TARGET_SSE3
+   && INTVAL (operands[2]) != INTVAL (operands[3])"
+  "@
+   haddpd\t{%0, %0|%0, %0}
+   vhaddpd\t{%1, %1, %0|%0, %1, %1}"
+  [(set_attr "isa" "noavx,avx")
+   (set_attr "type" "sseadd1")
+   (set_attr "prefix" "orig,vex")
+   (set_attr "mode" "V2DF")])
+
+(define_insn "*sse3_hsubv2df3_low"
+  [(set (match_operand:DF 0 "register_operand" "=x,x")
+       (minus:DF
+         (vec_select:DF
+           (match_operand:V2DF 1 "register_operand" "0,x")
+           (parallel [(const_int 0)]))
+         (vec_select:DF
+           (match_dup 1)
+           (parallel [(const_int 1)]))))]
+  "TARGET_SSE3"
+  "@
+   hsubpd\t{%0, %0|%0, %0}
+   vhsubpd\t{%1, %1, %0|%0, %1, %1}"
+  [(set_attr "isa" "noavx,avx")
+   (set_attr "type" "sseadd1")
+   (set_attr "prefix" "orig,vex")
+   (set_attr "mode" "V2DF")])
+
 (define_insn "avx_h<plusminus_insn>v8sf3"
   [(set (match_operand:V8SF 0 "register_operand" "=x")
        (vec_concat:V8SF
index a2f300d8a85a1fa7ebbc97be99aa7656dc00d37b..08e0a6902834b03f1bb1cdc024312f5a8958567c 100644 (file)
@@ -1,3 +1,8 @@
+2012-10-08  Marc Glisse  <marc.glisse@inria.fr>
+
+       PR target/54400
+       * gcc.target/i386/pr54400.c: New testcase.
+
 2012-10-08  Jakub Jelinek  <jakub@redhat.com>
 
        PR c++/54858
diff --git a/gcc/testsuite/gcc.target/i386/pr54400.c b/gcc/testsuite/gcc.target/i386/pr54400.c
new file mode 100644 (file)
index 0000000..5ed5ba0
--- /dev/null
@@ -0,0 +1,53 @@
+/* { dg-do compile } */
+/* { dg-options "-O2 -msse3 -mfpmath=sse" } */
+
+#include <x86intrin.h>
+
+double f (__m128d p)
+{
+  return p[0] - p[1];
+}
+
+double g1 (__m128d p)
+{
+  return p[0] + p[1];
+}
+
+double g2 (__m128d p)
+{
+  return p[1] + p[0];
+}
+
+__m128d h (__m128d p, __m128d q)
+{
+  __m128d r = { p[0] - p[1], q[0] - q[1] };
+  return r;
+}
+
+__m128d i1 (__m128d p, __m128d q)
+{
+  __m128d r = { p[0] + p[1], q[0] + q[1] };
+  return r;
+}
+
+__m128d i2 (__m128d p, __m128d q)
+{
+  __m128d r = { p[0] + p[1], q[1] + q[0] };
+  return r;
+}
+
+__m128d i3 (__m128d p, __m128d q)
+{
+  __m128d r = { p[1] + p[0], q[0] + q[1] };
+  return r;
+}
+
+__m128d i4 (__m128d p, __m128d q)
+{
+  __m128d r = { p[1] + p[0], q[1] + q[0] };
+  return r;
+}
+
+/* { dg-final { scan-assembler-times "hsubpd" 2 } } */
+/* { dg-final { scan-assembler-times "haddpd" 6 } } */
+/* { dg-final { scan-assembler-not "unpck" } } */