]> git.ipfire.org Git - thirdparty/gcc.git/commitdiff
Optimize a < 0 ? -1 : 0 to (signed)a >> 31.
authorliuhongt <hongtao.liu@intel.com>
Thu, 20 Jun 2024 04:41:13 +0000 (12:41 +0800)
committerliuhongt <hongtao.liu@intel.com>
Mon, 1 Jul 2024 05:20:09 +0000 (13:20 +0800)
Try to optimize x < 0 ? -1 : 0 into (signed) x >> 31
and x < 0 ? 1 : 0 into (unsigned) x >> 31.

Add define_insn_and_split for the optimization did in
ix86_expand_int_vcond.

gcc/ChangeLog:

PR target/115517
* config/i386/sse.md ("*ashr<mode>3_1"): New
define_insn_and_split.
(*avx512_ashr<mode>3_1): Ditto.
(*avx2_lshr<mode>3_1): Ditto.
(*avx2_lshr<mode>3_2): Ditto and add 2 combine splitter after
it.
* config/i386/mmx.md (mmxscalarsize): New mode attribute.
(*mmw_ashr<mode>3_1): New define_insn_and_split.
("mmx_<insn><mode>3): Add a combine spiltter after it.
(*mmx_ashrv2hi3_1): New define_insn_and_plit, also add a
combine splitter after it.

gcc/testsuite/ChangeLog:

* gcc.target/i386/pr111023-2.c: Adjust testcase.
* gcc.target/i386/vect-div-1.c: Ditto.

gcc/config/i386/mmx.md
gcc/config/i386/sse.md
gcc/testsuite/gcc.target/i386/pr111023-2.c
gcc/testsuite/gcc.target/i386/vect-div-1.c

index 359dc90628d67ddd82329f7e8d5589007605e76c..fca28df99a151dca7b06f720515dc6131150ee54 100644 (file)
    (V4HI "hi") (V2HI "hi")
    (V8QI "qi")])
 
+(define_mode_attr mmxscalarsize
+  [(V1DI "64")
+   (V2SI "32") (V2SF "32")
+   (V4HF "16") (V4BF "16")
+   (V2HF "16") (V2BF "16")
+   (V4HI "16") (V2HI "16")
+   (V8QI "8")])
+
 (define_mode_attr Yv_Yw
   [(V8QI "Yw") (V4HI "Yw") (V2SI "Yv") (V1DI "Yv") (V2SF "Yv")])
 
        (const_string "0")))
    (set_attr "mode" "DI,TI,TI")])
 
+(define_insn_and_split "*mmx_ashr<mode>3_1"
+  [(set (match_operand:MMXMODE24 0 "register_operand")
+       (lt:MMXMODE24
+         (match_operand:MMXMODE24 1 "register_operand")
+         (match_operand:MMXMODE24 2 "const0_operand")))]
+  "TARGET_MMX_WITH_SSE && ix86_pre_reload_split ()"
+  "#"
+  "&& 1"
+  [(set (match_dup 0) (ashiftrt:MMXMODE24 (match_dup 1) (match_dup 3)))]
+  "operands[3] = gen_int_mode (<mmxscalarsize> - 1, DImode);")
+
 (define_expand "ashr<mode>3"
   [(set (match_operand:MMXMODE24 0 "register_operand")
         (ashiftrt:MMXMODE24
        (const_string "0")))
    (set_attr "mode" "DI,TI,TI")])
 
+(define_split
+  [(set (match_operand:MMXMODE248 0 "register_operand")
+       (and:MMXMODE248
+         (lt:MMXMODE248
+           (match_operand:MMXMODE248 1 "register_operand")
+           (match_operand:MMXMODE248 2 "const0_operand"))
+         (match_operand:MMXMODE248 3 "const1_operand")))]
+  "TARGET_MMX_WITH_SSE && ix86_pre_reload_split ()"
+  [(set (match_dup 0) (lshiftrt:MMXMODE248 (match_dup 1) (match_dup 4)))]
+  "operands[4] = gen_int_mode (<mmxscalarsize> - 1, DImode);")
+
 (define_expand "<insn><mode>3"
   [(set (match_operand:MMXMODE24 0 "register_operand")
         (any_lshift:MMXMODE24
        (const_string "0")))
    (set_attr "mode" "TI")])
 
+(define_insn_and_split "*mmx_ashrv2hi3_1"
+  [(set (match_operand:V2HI 0 "register_operand")
+       (lt:V2HI
+         (match_operand:V2HI 1 "register_operand")
+         (match_operand:V2HI 2 "const0_operand")))]
+  "TARGET_SSE2 && ix86_pre_reload_split ()"
+  "#"
+  "&& 1"
+  [(set (match_dup 0) (ashiftrt:V2HI (match_dup 1) (match_dup 3)))]
+  "operands[3] = gen_int_mode (15, DImode);")
+
+(define_split
+  [(set (match_operand:V2HI 0 "register_operand")
+       (and:V2HI
+         (lt:V2HI
+           (match_operand:V2HI 1 "register_operand")
+           (match_operand:V2HI 2 "const0_operand"))
+         (match_operand:V2HI 3 "const1_operand")))]
+  "TARGET_SSE2 && ix86_pre_reload_split ()"
+  [(set (match_dup 0) (lshiftrt:V2HI (match_dup 1) (match_dup 4)))]
+  "operands[4] = gen_int_mode (15, DImode);")
+
 (define_expand "<insn>v8qi3"
   [(set (match_operand:V8QI 0 "register_operand")
        (any_shift:V8QI (match_operand:V8QI 1 "register_operand")
index 3ffa1881c83bb672544c2c6b5d51bd744b7b6671..1169e93453e1b4ae4a1ae7d4e884ceb7c8f89c75 100644 (file)
    (set_attr "prefix" "orig,vex")
    (set_attr "mode" "<sseinsnmode>")])
 
+(define_insn_and_split "*ashr<mode>3_1"
+  [(set (match_operand:VI24_AVX2 0 "register_operand")
+       (lt:VI24_AVX2
+         (match_operand:VI24_AVX2 1 "register_operand")
+         (match_operand:VI24_AVX2 2 "const0_operand")))]
+  "TARGET_SSE2 && ix86_pre_reload_split ()"
+  "#"
+  "&& 1"
+  [(set (match_dup 0) (ashiftrt:VI24_AVX2 (match_dup 1) (match_dup 3)))]
+  "operands[3] = gen_int_mode (<ssescalarsize> - 1, DImode);")
+
 (define_insn "<mask_codefor>ashr<mode>3<mask_name>"
   [(set (match_operand:VI248_AVX512BW_AVX512VL 0 "register_operand" "=v,v")
        (ashiftrt:VI248_AVX512BW_AVX512VL
        (const_string "0")))
    (set_attr "mode" "<sseinsnmode>")])
 
+(define_insn_and_split "*avx512_ashr<mode>3_1"
+ [(set (match_operand:VI248_AVX512VLBW  0 "register_operand")
+       (vec_merge:VI248_AVX512VLBW
+         (match_operand:VI248_AVX512VLBW 1 "vector_all_ones_operand")
+         (match_operand:VI248_AVX512VLBW 2 "const0_operand")
+         (unspec:<avx512fmaskmode>
+           [(match_operand:VI248_AVX512VLBW 3 "nonimmediate_operand")
+            (match_operand:VI248_AVX512VLBW 4 "const0_operand")
+            (const_int 1)]
+            UNSPEC_PCMP)))]
+  "TARGET_AVX512F && ix86_pre_reload_split ()"
+  "#"
+  "&& 1"
+  [(set (match_dup 0)
+       (ashiftrt:VI248_AVX512VLBW (match_dup 3) (match_dup 5)))]
+  "operands[5] = gen_int_mode (<ssescalarsize> - 1, DImode);")
+
 (define_expand "ashr<mode>3"
   [(set (match_operand:VI248_AVX512BW 0 "register_operand")
        (ashiftrt:VI248_AVX512BW
    (set_attr "prefix" "orig,vex")
    (set_attr "mode" "<sseinsnmode>")])
 
+(define_insn_and_split "*avx2_lshr<mode>3_1"
+  [(set (match_operand:VI8_AVX2 0 "register_operand")
+       (and:VI8_AVX2
+         (gt:VI8_AVX2
+           (match_operand:VI8_AVX2 1 "register_operand")
+           (match_operand:VI8_AVX2 2 "register_operand"))
+         (match_operand:VI8_AVX2 3 "const1_operand")))]
+  "TARGET_SSE4_2 && ix86_pre_reload_split ()"
+  "#"
+  "&& 1"
+  [(set (match_dup 5) (gt:VI8_AVX2 (match_dup 1) (match_dup 2)))
+   (set (match_dup 0) (lshiftrt:VI8_AVX2 (match_dup 5) (match_dup 4)))]
+{
+  operands[4] = gen_int_mode (<ssescalarsize> - 1, DImode);
+  operands[5] = gen_reg_rtx (<MODE>mode);
+})
+
+(define_insn_and_split "*avx2_lshr<mode>3_2"
+  [(set (match_operand:VI8_AVX2 0 "register_operand")
+       (and:VI8_AVX2
+         (lt:VI8_AVX2
+           (match_operand:VI8_AVX2 1 "register_operand")
+           (match_operand:VI8_AVX2 2 "const0_operand"))
+         (match_operand:VI8_AVX2 3 "const1_operand")))]
+  "TARGET_SSE2 && ix86_pre_reload_split ()"
+  "#"
+  "&& 1"
+  [(set (match_dup 0) (lshiftrt:VI8_AVX2 (match_dup 1) (const_int 63)))])
+
+(define_split
+  [(set (match_operand:VI248_AVX2 0 "register_operand")
+       (and:VI248_AVX2
+         (lt:VI248_AVX2
+           (match_operand:VI248_AVX2 1 "register_operand")
+           (match_operand:VI248_AVX2 2 "const0_operand"))
+         (match_operand:VI248_AVX2 3 "const1_operand")))]
+  "TARGET_SSE2 && ix86_pre_reload_split ()"
+  [(set (match_dup 0) (lshiftrt:VI248_AVX2 (match_dup 1) (match_dup 4)))]
+  "operands[4] = gen_int_mode (<ssescalarsize> - 1, DImode);")
+
+(define_split
+ [(set (match_operand:VI248_AVX512VLBW  0 "register_operand")
+       (vec_merge:VI248_AVX512VLBW
+         (match_operand:VI248_AVX512VLBW 1 "const1_operand")
+         (match_operand:VI248_AVX512VLBW 2 "const0_operand")
+         (unspec:<avx512fmaskmode>
+           [(match_operand:VI248_AVX512VLBW 3 "nonimmediate_operand")
+            (match_operand:VI248_AVX512VLBW 4 "const0_operand")
+            (const_int 1)]
+            UNSPEC_PCMP)))]
+  "TARGET_AVX512F && ix86_pre_reload_split ()"
+  [(set (match_dup 0)
+       (lshiftrt:VI248_AVX512VLBW (match_dup 3) (match_dup 5)))]
+  "operands[5] = gen_int_mode (<ssescalarsize> - 1, DImode);")
+
 (define_insn "<insn><mode>3<mask_name>"
   [(set (match_operand:VI248_AVX512BW 0 "register_operand" "=v,v")
        (any_lshift:VI248_AVX512BW
index 6c69f9475442b662e21ff1da5657af4a6406f403..ba52959b3575d834c75f30729d79dd236e707010 100644 (file)
@@ -36,7 +36,7 @@ v4si_v4hi (v4si *dst, v8hi src)
   dst[0] = *(v4si *) tem;
 }
 
-/* { dg-final { scan-assembler "pcmpgtw" } } */
+/* { dg-final { scan-assembler "(?:pcmpgtw|psraw)" } } */
 /* { dg-final { scan-assembler "punpcklwd" } } */
 
 void
@@ -48,5 +48,5 @@ v2di_v2si (v2di *dst, v4si src)
   dst[0] = *(v2di *) tem;
 }
 
-/* { dg-final { scan-assembler "pcmpgtd" } } */
+/* { dg-final { scan-assembler "(?:pcmpgtd|psrad)" } } */
 /* { dg-final { scan-assembler "punpckldq" } } */
index f611088d8dfe4eda59c0e8e41a8de6cb52644dc1..2d92b9cc2f1570e5fa3c41107d64250f7ed9d39e 100644 (file)
@@ -1,5 +1,5 @@
 /* { dg-do compile } */
-/* { dg-options "-O2 -ftree-vectorize -fno-common -msse2" } */
+/* { dg-options "-O2 -ftree-vectorize -fno-common -msse4.1" } */
 
 unsigned short b[1024] = { 0 };
 int a[1024] = { 0 };