From: liuhongt Date: Tue, 28 Nov 2023 06:46:21 +0000 (+0800) Subject: Use vec_extact_lo instead of subreg in reduc__scal_m. X-Git-Tag: basepoints/gcc-15~4150 X-Git-Url: http://git.ipfire.org/cgi-bin/gitweb.cgi?a=commitdiff_plain;h=a1a3939bea5b0d9cbd3465d96e7e4a5222ae6c48;p=thirdparty%2Fgcc.git Use vec_extact_lo instead of subreg in reduc__scal_m. Loop vectorizer will use vec_perm to select lower part of a vector, there could be some redundancy when using subreg in reduc__scal_m, because rtl cse can't figure out vec_select lower part is just subreg. I'm trying to canonicalize vec_select to subreg like aarch64 did, but there're so many regressions, some are easy to fix, some requires middle-end adjustment. So for simplicity, the patch use vec_select instead of subreg in reduc__scal_m. gcc/ChangeLog: * config/i386/sse.md: (reduc_plus_scal_): Use vec_extract_lo instead of subreg. (reduc__scal_): Ditto. (reduc__scal_): Ditto. (reduc__scal_): Ditto. (reduc__scal_): Ditto. --- diff --git a/gcc/config/i386/sse.md b/gcc/config/i386/sse.md index 4f511693e3ff..5e0e0e9e51f9 100644 --- a/gcc/config/i386/sse.md +++ b/gcc/config/i386/sse.md @@ -3480,11 +3480,12 @@ "" { rtx tmp = gen_reg_rtx (mode); - emit_insn (gen_vec_extract_hi_ (tmp, operands[1])); rtx tmp2 = gen_reg_rtx (mode); - rtx tmp3 = gen_lowpart (mode, operands[1]); - emit_insn (gen_add3 (tmp2, tmp, tmp3)); - emit_insn (gen_reduc_plus_scal_ (operands[0], tmp2)); + rtx tmp3 = gen_reg_rtx (mode); + emit_insn (gen_vec_extract_hi_ (tmp, operands[1])); + emit_insn (gen_vec_extract_lo_ (tmp2, operands[1])); + emit_insn (gen_add3 (tmp3, tmp, tmp2)); + emit_insn (gen_reduc_plus_scal_ (operands[0], tmp3)); DONE; }) @@ -3528,11 +3529,12 @@ "" { rtx tmp = gen_reg_rtx (mode); - emit_insn (gen_vec_extract_hi_ (tmp, operands[1])); rtx tmp2 = gen_reg_rtx (mode); - emit_insn (gen_3 - (tmp2, tmp, gen_lowpart (mode, operands[1]))); - emit_insn (gen_reduc__scal_ (operands[0], tmp2)); + rtx tmp3 = gen_reg_rtx (mode); + emit_insn (gen_vec_extract_hi_ (tmp, operands[1])); + emit_insn (gen_vec_extract_lo_ (tmp2, operands[1])); + emit_insn (gen_3 (tmp3, tmp, tmp2)); + emit_insn (gen_reduc__scal_ (operands[0], tmp3)); DONE; }) @@ -3543,11 +3545,12 @@ "TARGET_AVX512F" { rtx tmp = gen_reg_rtx (mode); - emit_insn (gen_vec_extract_hi_ (tmp, operands[1])); rtx tmp2 = gen_reg_rtx (mode); - emit_insn (gen_3 - (tmp2, tmp, gen_lowpart (mode, operands[1]))); - emit_insn (gen_reduc__scal_ (operands[0], tmp2)); + rtx tmp3 = gen_reg_rtx (mode); + emit_insn (gen_vec_extract_hi_ (tmp, operands[1])); + emit_insn (gen_vec_extract_lo_ (tmp2, operands[1])); + emit_insn (gen_3 (tmp3, tmp, tmp2)); + emit_insn (gen_reduc__scal_ (operands[0], tmp3)); DONE; }) @@ -3558,14 +3561,15 @@ "TARGET_AVX2" { rtx tmp = gen_reg_rtx (mode); - emit_insn (gen_vec_extract_hi_ (tmp, operands[1])); rtx tmp2 = gen_reg_rtx (mode); - emit_insn (gen_3 - (tmp2, tmp, gen_lowpart (mode, operands[1]))); rtx tmp3 = gen_reg_rtx (mode); - ix86_expand_reduc (gen_3, tmp3, tmp2); + emit_insn (gen_vec_extract_hi_ (tmp, operands[1])); + emit_insn (gen_vec_extract_lo_ (tmp2, operands[1])); + emit_insn (gen_3 (tmp3, tmp, tmp2)); + rtx tmp4 = gen_reg_rtx (mode); + ix86_expand_reduc (gen_3, tmp4, tmp3); emit_insn (gen_vec_extract - (operands[0], tmp3, const0_rtx)); + (operands[0], tmp4, const0_rtx)); DONE; }) @@ -3637,11 +3641,12 @@ "" { rtx tmp = gen_reg_rtx (mode); - emit_insn (gen_vec_extract_hi_ (tmp, operands[1])); rtx tmp2 = gen_reg_rtx (mode); - rtx tmp3 = gen_lowpart (mode, operands[1]); - emit_insn (gen_3 (tmp2, tmp, tmp3)); - emit_insn (gen_reduc__scal_ (operands[0], tmp2)); + rtx tmp3 = gen_reg_rtx (mode); + emit_insn (gen_vec_extract_hi_ (tmp, operands[1])); + emit_insn (gen_vec_extract_lo_ (tmp2, operands[1])); + emit_insn (gen_3 (tmp3, tmp, tmp2)); + emit_insn (gen_reduc__scal_ (operands[0], tmp3)); DONE; })