]> git.ipfire.org Git - thirdparty/gcc.git/commitdiff
i386: Add mult-high and shift patterns for 4-byte vectors [PR100637]
authorUros Bizjak <ubizjak@gmail.com>
Thu, 20 May 2021 09:11:21 +0000 (11:11 +0200)
committerUros Bizjak <ubizjak@gmail.com>
Thu, 20 May 2021 09:12:31 +0000 (11:12 +0200)
2021-05-20  Uroš Bizjak  <ubizjak@gmail.com>

gcc/
PR target/100637
* config/i386/mmx.md (Yv_Yw): Revert adding V4QI and V2HI modes.
(*<plusminus:insn><VI_32:mode>3): Use Yw instad of <Yv_Yw> constrint.
(<s>mulv4hi3_highpart): New expander.
(*<s>mulv2hi3_highpart): New insn pattern.
(<s>mulv2hi3_higpart): New expander.
(*<any_shift:insn>v2hi3): New insn pattern.
(<any_shift:insn>v2hi3): New expander.
* config/i386/sse.md (smulhrsv2hi3): New expander.
(*smulhrsv2hi3): New insn pattern.

gcc/testsuite/

PR target/100637
* gcc.target/i386/pr100637-1w.c (shl, ashr, lshr): New tests.

gcc/config/i386/mmx.md
gcc/config/i386/sse.md
gcc/testsuite/gcc.target/i386/pr100637-1w.c

index d8479782e90c79dc10ba1b6583960ca64c087531..948ba479c32d497f22494693a4baab20cca44854 100644 (file)
@@ -78,8 +78,7 @@
   [(V2SF "v2si") (V2SI "v2si") (V4HI "v4hi") (V8QI "v8qi")])
 
 (define_mode_attr Yv_Yw
-  [(V8QI "Yw") (V4QI "Yw") (V4HI "Yw") (V2HI "Yw")
-   (V2SI "Yv") (V1DI "Yv") (V2SF "Yv")])
+  [(V8QI "Yw") (V4HI "Yw") (V2SI "Yv") (V1DI "Yv") (V2SF "Yv")])
 
 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
 ;;
   "ix86_fixup_binary_operands_no_copy (<CODE>, <MODE>mode, operands);")
 
 (define_insn "*<insn><mode>3"
-  [(set (match_operand:VI_32 0 "register_operand" "=x,<Yv_Yw>")
+  [(set (match_operand:VI_32 0 "register_operand" "=x,Yw")
         (plusminus:VI_32
-         (match_operand:VI_32 1 "register_operand" "<comm>0,<Yv_Yw>")
-         (match_operand:VI_32 2 "register_operand" "x,<Yv_Yw>")))]
+         (match_operand:VI_32 1 "register_operand" "<comm>0,Yw")
+         (match_operand:VI_32 2 "register_operand" "x,Yw")))]
   "TARGET_SSE2
    && ix86_binary_operator_ok (<CODE>, <MODE>mode, operands)"
   "@
    (set_attr "type" "mmxmul,ssemul,ssemul")
    (set_attr "mode" "DI,TI,TI")])
 
+(define_expand "<s>mulv4hi3_highpart"
+  [(set (match_operand:V4HI 0 "register_operand")
+       (truncate:V4HI
+         (lshiftrt:V4SI
+           (mult:V4SI
+             (any_extend:V4SI
+               (match_operand:V4HI 1 "register_operand"))
+             (any_extend:V4SI
+               (match_operand:V4HI 2 "register_operand")))
+           (const_int 16))))]
+  "TARGET_MMX_WITH_SSE"
+  "ix86_fixup_binary_operands_no_copy (MULT, V4HImode, operands);")
+
+(define_insn "*<s>mulv2hi3_highpart"
+  [(set (match_operand:V2HI 0 "register_operand" "=x,Yw")
+       (truncate:V2HI
+         (lshiftrt:V2SI
+           (mult:V2SI
+             (any_extend:V2SI
+               (match_operand:V2HI 1 "register_operand" "%0,Yw"))
+             (any_extend:V2SI
+               (match_operand:V2HI 2 "register_operand" "x,Yw")))
+           (const_int 16))))]
+  "TARGET_SSE2
+   && ix86_binary_operator_ok (MULT, V2HImode, operands)"
+  "@
+   pmulh<u>w\t{%2, %0|%0, %2}
+   vpmulh<u>w\t{%2, %1, %0|%0, %1, %2}"
+  [(set_attr "isa" "noavx,avx")
+   (set_attr "type" "ssemul")
+   (set_attr "mode" "TI")])
+
+(define_expand "<s>mulv2hi3_highpart"
+  [(set (match_operand:V2HI 0 "register_operand")
+       (truncate:V2HI
+         (lshiftrt:V2SI
+           (mult:V2SI
+             (any_extend:V2SI
+               (match_operand:V2HI 1 "register_operand"))
+             (any_extend:V2SI
+               (match_operand:V2HI 2 "register_operand")))
+           (const_int 16))))]
+  "TARGET_SSE2"
+  "ix86_fixup_binary_operands_no_copy (MULT, V2HImode, operands);")
+
 (define_expand "mmx_pmaddwd"
   [(set (match_operand:V2SI 0 "register_operand")
         (plus:V2SI
          (match_operand:DI 2 "nonmemory_operand")))]
   "TARGET_MMX_WITH_SSE")
 
+(define_insn "*<insn>v2hi3"
+  [(set (match_operand:V2HI 0 "register_operand" "=x,Yw")
+        (any_shift:V2HI
+         (match_operand:V2HI 1 "register_operand" "0,Yw")
+         (match_operand:DI 2 "nonmemory_operand" "xN,YwN")))]
+  "TARGET_SSE2"
+  "@
+   p<vshift>w\t{%2, %0|%0, %2}
+   vp<vshift>w\t{%2, %1, %0|%0, %1, %2}"
+  [(set_attr "isa" "noavx,avx")
+   (set_attr "type" "sseishft")
+   (set (attr "length_immediate")
+     (if_then_else (match_operand 2 "const_int_operand")
+       (const_string "1")
+       (const_string "0")))
+   (set_attr "mode" "TI")])
+
+(define_expand "<insn>v2hi3"
+  [(set (match_operand:V2HI 0 "register_operand")
+        (any_shift:V2HI
+         (match_operand:V2HI 1 "register_operand")
+         (match_operand:DI 2 "nonmemory_operand")))]
+  "TARGET_SSE2")
+
 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
 ;;
 ;; Parallel integral comparisons
index a4503ddcb73473484bccabcb3ffca36765057d68..0f1108f0db10baf82b9764ee034707539e6abb34 100644 (file)
    (set (attr "prefix_rex") (symbol_ref "x86_extended_reg_mentioned_p (insn)"))
    (set_attr "mode" "DI,TI,TI")])
 
+(define_expand "smulhrsv2hi3"
+  [(set (match_operand:V2HI 0 "register_operand")
+       (truncate:V2HI
+         (lshiftrt:V2SI
+           (plus:V2SI
+             (lshiftrt:V2SI
+               (mult:V2SI
+                 (sign_extend:V2SI
+                   (match_operand:V2HI 1 "register_operand"))
+                 (sign_extend:V2SI
+                   (match_operand:V2HI 2 "register_operand")))
+               (const_int 14))
+             (match_dup 3))
+           (const_int 1))))]
+  "TARGET_SSSE3"
+{
+  operands[3] = CONST1_RTX(V2HImode);
+  ix86_fixup_binary_operands_no_copy (MULT, V2HImode, operands);
+})
+
+(define_insn "*smulhrsv2hi3"
+  [(set (match_operand:V2HI 0 "register_operand" "=x,Yv")
+       (truncate:V2HI
+         (lshiftrt:V2SI
+           (plus:V2SI
+             (lshiftrt:V2SI
+               (mult:V2SI
+                 (sign_extend:V2SI
+                   (match_operand:V2HI 1 "register_operand" "%0,Yv"))
+                 (sign_extend:V2SI
+                   (match_operand:V2HI 2 "register_operand" "x,Yv")))
+               (const_int 14))
+             (match_operand:V2HI 3 "const1_operand"))
+           (const_int 1))))]
+  "TARGET_SSSE3
+   && !(MEM_P (operands[1]) && MEM_P (operands[2]))"
+  "@
+   pmulhrsw\t{%2, %0|%0, %2}
+   vpmulhrsw\t{%2, %1, %0|%0, %1, %2}"
+  [(set_attr "isa" "noavx,avx")
+   (set_attr "type" "sseimul")
+   (set_attr "prefix_extra" "1")
+   (set (attr "prefix_rex") (symbol_ref "x86_extended_reg_mentioned_p (insn)"))
+   (set_attr "mode" "TI")])
+
 (define_insn "<ssse3_avx2>_pshufb<mode>3<mask_name>"
   [(set (match_operand:VI1_AVX512 0 "register_operand" "=x,<v_Yw>")
        (unspec:VI1_AVX512
index ed1baeb3acde7cdb76f81245596045cb088e2369..fe6964044b6f7505353fb41f4df68059fb912efc 100644 (file)
@@ -3,6 +3,7 @@
 /* { dg-options "-O2 -msse2 -dp" } */
 
 typedef short __v2hi __attribute__ ((__vector_size__ (4)));
+typedef unsigned short __v2hu __attribute__ ((__vector_size__ (4)));
 
 __v2hi and (__v2hi a, __v2hi b) { return a & b; };
 /* { dg-final { scan-assembler "andv2hi3" } } */
@@ -26,3 +27,12 @@ __v2hi neg  (__v2hi a) { return -a; };
 
 __v2hi mul  (__v2hi a, __v2hi b) { return a * b; };
 /* { dg-final { scan-assembler "mulv2hi3" } } */
+
+__v2hi shl (__v2hi a, int b) { return a << b; };
+/* { dg-final { scan-assembler "ashlv2hi3" } } */
+
+__v2hi ashr (__v2hi a, int b) { return a >> b; };
+/* { dg-final { scan-assembler "ashrv2hi3" } } */
+
+__v2hu lshr  (__v2hu a, int b) { return a >> b; };
+/* { dg-final { scan-assembler "lshrv2hi3" } } */