Improved V1TI (and V2DI) mode equality/inequality on x86_64.

author Roger Sayle <roger@nextmovesoftware.com>

Fri, 13 May 2022 21:26:29 +0000 (22:26 +0100)

committer Roger Sayle <roger@nextmovesoftware.com>

Fri, 13 May 2022 21:30:47 +0000 (22:30 +0100)
author Roger Sayle <roger@nextmovesoftware.com>
Fri, 13 May 2022 21:26:29 +0000 (22:26 +0100)
committer Roger Sayle <roger@nextmovesoftware.com>
Fri, 13 May 2022 21:30:47 +0000 (22:30 +0100)
diff --git a/gcc/config/i386/sse.md b/gcc/config/i386/sse.md

index 6578104fee26eb087302c41ad2a0cf8d22c34b4c..88fc5216a589cd2d954d53a9371f14558e5c4c20 100644 (file)
--- a/gcc/config/i386/sse.md
+++ b/gcc/config/i386/sse.md
@@ -4390,13 +4390,57 @@
         (match_operator:V2DI 1 ""
           [(match_operand:V2DI 2 "register_operand")
            (match_operand:V2DI 3 "vector_operand")]))]
-  "TARGET_SSE4_1"
+  "TARGET_SSE2"
  {
-  bool ok = ix86_expand_int_vec_cmp (operands);
+  bool ok;
+  if (!TARGET_SSE4_1)
+    {
+      rtx ops[4];
+      ops[0] = gen_reg_rtx (V4SImode);
+      ops[2] = gen_lowpart (V4SImode, force_reg (V2DImode, operands[2]));
+      ops[3] = gen_lowpart (V4SImode, force_reg (V2DImode, operands[3]));
+      ops[1] = gen_rtx_fmt_ee (GET_CODE (operands[1]), V4SImode,
+                              ops[2], ops[3]);
+      ok = ix86_expand_int_vec_cmp (ops);
+
+      rtx tmp1 = gen_reg_rtx (V4SImode);
+      emit_insn (gen_sse2_pshufd (tmp1, ops[0], GEN_INT (0xb1)));
+
+      rtx tmp2 = gen_reg_rtx (V4SImode);
+      emit_insn (gen_andv4si3 (tmp2, tmp1, ops[0]));
+
+      emit_move_insn (operands[0], gen_lowpart (V2DImode, tmp2));
+    }
+  else
+    ok = ix86_expand_int_vec_cmp (operands);
    gcc_assert (ok);
    DONE;
  })
  
+(define_expand "vec_cmpeqv1tiv1ti"
+  [(set (match_operand:V1TI 0 "register_operand")
+       (match_operator:V1TI 1 ""
+         [(match_operand:V1TI 2 "register_operand")
+          (match_operand:V1TI 3 "vector_operand")]))]
+  "TARGET_SSE2"
+{
+  rtx dst = gen_reg_rtx (V2DImode);
+  rtx op1 = gen_lowpart (V2DImode, force_reg (V1TImode, operands[2]));
+  rtx op2 = gen_lowpart (V2DImode, force_reg (V1TImode, operands[3]));
+  rtx cmp = gen_rtx_fmt_ee (GET_CODE (operands[1]), V2DImode, op1, op2);
+  emit_insn (gen_vec_cmpeqv2div2di (dst, cmp, op1, op2));
+
+  rtx tmp1 = gen_reg_rtx (V4SImode);
+  rtx tmp2 = gen_lowpart (V4SImode, dst);
+  emit_insn (gen_sse2_pshufd (tmp1, tmp2, GEN_INT (0x4e)));
+
+  rtx tmp3 = gen_reg_rtx (V4SImode);
+  emit_insn (gen_andv4si3 (tmp3, tmp2, tmp1));
+
+  emit_move_insn (operands[0], gen_lowpart (V1TImode, tmp3));
+  DONE;
+})
+
  (define_expand "vcond<V_512:mode><VF_512:mode>"
    [(set (match_operand:V_512 0 "register_operand")
         (if_then_else:V_512
diff --git a/gcc/testsuite/gcc.target/i386/sse2-v1ti-veq.c b/gcc/testsuite/gcc.target/i386/sse2-v1ti-veq.c

new file mode 100644 (file)

index 0000000..b3837c4
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/sse2-v1ti-veq.c
@@ -0,0 +1,12 @@
+/* { dg-do compile { target int128 } } */
+/* { dg-options "-O2 -msse2" } */
+typedef unsigned __int128 uv1ti __attribute__ ((__vector_size__ (16)));
+typedef unsigned long long uv2di __attribute__ ((__vector_size__ (16)));
+typedef unsigned int uv4si __attribute__ ((__vector_size__ (16)));
+
+uv1ti eq_v1ti(uv1ti x, uv1ti y) { return x == y; }
+uv2di eq_v2di(uv2di x, uv2di y) { return x == y; }
+uv4si eq_v4si(uv4si x, uv4si y) { return x == y; }
+
+/* { dg-final { scan-assembler-times "pcmpeq" 3 } } */
+/* { dg-final { scan-assembler "pshufd" } } */
diff --git a/gcc/testsuite/gcc.target/i386/sse2-v1ti-vne.c b/gcc/testsuite/gcc.target/i386/sse2-v1ti-vne.c

new file mode 100644 (file)

index 0000000..767b0e4
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/sse2-v1ti-vne.c
@@ -0,0 +1,13 @@
+/* { dg-do compile { target int128 } } */
+/* { dg-options "-O2 -msse2" } */
+typedef unsigned __int128 uv1ti __attribute__ ((__vector_size__ (16)));
+typedef unsigned long long uv2di __attribute__ ((__vector_size__ (16)));
+typedef unsigned int uv4si __attribute__ ((__vector_size__ (16)));
+
+uv1ti eq_v1ti(uv1ti x, uv1ti y) { return x != y; }
+uv2di eq_v2di(uv2di x, uv2di y) { return x != y; }
+uv4si eq_v4si(uv4si x, uv4si y) { return x != y; }
+
+/* { dg-final { scan-assembler-times "pcmpeq" 6 } } */
+/* { dg-final { scan-assembler-times "pxor" 3 } } */
+/* { dg-final { scan-assembler "pshufd" } } */
author	Roger Sayle <roger@nextmovesoftware.com>
	Fri, 13 May 2022 21:26:29 +0000 (22:26 +0100)
committer	Roger Sayle <roger@nextmovesoftware.com>
	Fri, 13 May 2022 21:30:47 +0000 (22:30 +0100)
gcc/config/i386/sse.md		patch \| blob \| blame \| history
gcc/testsuite/gcc.target/i386/sse2-v1ti-veq.c	[new file with mode: 0644]	patch \| blob
gcc/testsuite/gcc.target/i386/sse2-v1ti-vne.c	[new file with mode: 0644]	patch \| blob