From 8b974f54393ab2d2d16a0051a68c155455a92aad Mon Sep 17 00:00:00 2001 From: liuhongt Date: Mon, 8 Jan 2024 15:13:41 +0800 Subject: [PATCH] Extend usdot_prodv*qi with vpmaddwd when AVXVNNI/AVX512VNNI is not available. gcc/ChangeLog: * config/i386/sse.md (usdot_prodv*qi): Extend to VI1_AVX512 with vpmaddwd when avxvnni/avx512vnni is not available. --- gcc/config/i386/sse.md | 55 +++++++++++++++++++++++++++++++----------- 1 file changed, 41 insertions(+), 14 deletions(-) diff --git a/gcc/config/i386/sse.md b/gcc/config/i386/sse.md index 1bf50726e830..f57f36ae380e 100644 --- a/gcc/config/i386/sse.md +++ b/gcc/config/i386/sse.md @@ -29955,21 +29955,48 @@ (define_expand "usdot_prod" [(match_operand: 0 "register_operand") - (match_operand:VI1_AVX512VNNI 1 "register_operand") - (match_operand:VI1_AVX512VNNI 2 "register_operand") + (match_operand:VI1_AVX512 1 "register_operand") + (match_operand:VI1_AVX512 2 "register_operand") (match_operand: 3 "register_operand")] - "(( == 64 && TARGET_EVEX512) - || ((TARGET_AVX512VNNI && TARGET_AVX512VL) - || TARGET_AVXVNNI))" -{ - operands[1] = lowpart_subreg (mode, - force_reg (mode, operands[1]), - mode); - operands[2] = lowpart_subreg (mode, - force_reg (mode, operands[2]), - mode); - emit_insn (gen_vpdpbusd_ (operands[0], operands[3], - operands[1], operands[2])); + "TARGET_SSE2" +{ + if ( == 64 + ? TARGET_AVX512VNNI + : ((TARGET_AVX512VNNI && TARGET_AVX512VL) || TARGET_AVXVNNI)) + { + operands[1] = lowpart_subreg (mode, + force_reg (mode, operands[1]), + mode); + operands[2] = lowpart_subreg (mode, + force_reg (mode, operands[2]), + mode); + emit_insn (gen_vpdpbusd_ (operands[0], operands[3], + operands[1], operands[2])); + } + else + { + /* Emulate with vpdpwssd. */ + rtx op1_lo = gen_reg_rtx (mode); + rtx op1_hi = gen_reg_rtx (mode); + rtx op2_lo = gen_reg_rtx (mode); + rtx op2_hi = gen_reg_rtx (mode); + + emit_insn (gen_vec_unpacku_lo_ (op1_lo, operands[1])); + emit_insn (gen_vec_unpacks_lo_ (op2_lo, operands[2])); + emit_insn (gen_vec_unpacku_hi_ (op1_hi, operands[1])); + emit_insn (gen_vec_unpacks_hi_ (op2_hi, operands[2])); + + rtx res1 = gen_reg_rtx (mode); + rtx res2 = gen_reg_rtx (mode); + rtx sum = gen_reg_rtx (mode); + + emit_move_insn (sum, CONST0_RTX (mode)); + emit_insn (gen_sdot_prod (res1, op1_lo, + op2_lo, sum)); + emit_insn (gen_sdot_prod (res2, op1_hi, + op2_hi, operands[3])); + emit_insn (gen_add3 (operands[0], res1, res2)); + } DONE; }) -- 2.47.2