From 919642fa4b2bc4c32910336dd200d53766801c80 Mon Sep 17 00:00:00 2001 From: Uros Bizjak Date: Fri, 5 May 2023 14:10:18 +0200 Subject: [PATCH] i386: Introduce mulv2si3 instruction For SSE2 targets the expander unpacks input elements into the correct position in the V4SI vector and emits PMULUDQ instruction. The output elements are then shuffled back to their positions in the V2SI vector. For SSE4 targets PMULLD instruction is emitted directly. gcc/ChangeLog: * config/i386/mmx.md (mulv2si3): New expander. (*mulv2si3): New insn pattern. gcc/testsuite/ChangeLog: * gcc.target/i386/sse2-mmx-mult-vec.c: New test. --- gcc/config/i386/mmx.md | 49 +++++++++++++++++++ .../gcc.target/i386/sse2-mmx-mult-vec.c | 27 ++++++++++ 2 files changed, 76 insertions(+) create mode 100644 gcc/testsuite/gcc.target/i386/sse2-mmx-mult-vec.c diff --git a/gcc/config/i386/mmx.md b/gcc/config/i386/mmx.md index 872ddbc55f22..6dd203f4fa87 100644 --- a/gcc/config/i386/mmx.md +++ b/gcc/config/i386/mmx.md @@ -2092,6 +2092,55 @@ (set_attr "type" "sseadd") (set_attr "mode" "TI")]) +(define_expand "mulv2si3" + [(set (match_operand:V2SI 0 "register_operand") + (mult:V2SI + (match_operand:V2SI 1 "register_operand") + (match_operand:V2SI 2 "register_operand")))] + "TARGET_MMX_WITH_SSE" +{ + if (!TARGET_SSE4_1) + { + rtx op1 = lowpart_subreg (V4SImode, force_reg (V2SImode, operands[1]), + V2SImode); + rtx op2 = lowpart_subreg (V4SImode, force_reg (V2SImode, operands[2]), + V2SImode); + + rtx tmp1 = gen_reg_rtx (V4SImode); + emit_insn (gen_vec_interleave_lowv4si (tmp1, op1, op1)); + rtx tmp2 = gen_reg_rtx (V4SImode); + emit_insn (gen_vec_interleave_lowv4si (tmp2, op2, op2)); + + rtx res = gen_reg_rtx (V2DImode); + emit_insn (gen_vec_widen_umult_even_v4si (res, tmp1, tmp2)); + + rtx op0 = gen_reg_rtx (V4SImode); + emit_insn (gen_sse2_pshufd_1 (op0, gen_lowpart (V4SImode, res), + const0_rtx, const2_rtx, + const0_rtx, const2_rtx)); + + emit_move_insn (operands[0], lowpart_subreg (V2SImode, op0, V4SImode)); + DONE; + } +}) + +(define_insn "*mulv2si3" + [(set (match_operand:V2SI 0 "register_operand" "=Yr,*x,v") + (mult:V2SI + (match_operand:V2SI 1 "register_operand" "%0,0,v") + (match_operand:V2SI 2 "register_operand" "Yr,*x,v")))] + "TARGET_SSE4_1 && TARGET_MMX_WITH_SSE" + "@ + pmulld\t{%2, %0|%0, %2} + pmulld\t{%2, %0|%0, %2} + vpmulld\t{%2, %1, %0|%0, %1, %2}" + [(set_attr "isa" "noavx,noavx,avx") + (set_attr "type" "sseimul") + (set_attr "prefix_extra" "1") + (set_attr "prefix" "orig,orig,vex") + (set_attr "btver2_decode" "vector") + (set_attr "mode" "TI")]) + (define_expand "mmx_mulv4hi3" [(set (match_operand:V4HI 0 "register_operand") (mult:V4HI (match_operand:V4HI 1 "register_mmxmem_operand") diff --git a/gcc/testsuite/gcc.target/i386/sse2-mmx-mult-vec.c b/gcc/testsuite/gcc.target/i386/sse2-mmx-mult-vec.c new file mode 100644 index 000000000000..cdc9a7bb8bf3 --- /dev/null +++ b/gcc/testsuite/gcc.target/i386/sse2-mmx-mult-vec.c @@ -0,0 +1,27 @@ +/* { dg-do run } */ +/* { dg-options "-O2 -ftree-vectorize -msse2" } */ +/* { dg-require-effective-target sse2 } */ + +#include "sse2-check.h" + +#define N 2 + +int a[N] = {-287807, 604344}; +int b[N] = {474362, 874120}; +int r[N]; + +int rc[N] = {914249338, -11800128}; + +static void +sse2_test (void) +{ + int i; + + for (i = 0; i < N; i++) + r[i] = a[i] * b[i]; + + /* check results: */ + for (i = 0; i < N; i++) + if (r[i] != rc[i]) + abort (); +} -- 2.47.2