From: Xi Ruoyao Date: Thu, 14 Dec 2023 17:49:40 +0000 (+0800) Subject: LoongArch: Implement FCCmode reload and cstore4 X-Git-Tag: basepoints/gcc-15~3325 X-Git-Url: http://git.ipfire.org/?a=commitdiff_plain;h=78607d122976cbfd39b0b12e9be662c47c81fed0;p=thirdparty%2Fgcc.git LoongArch: Implement FCCmode reload and cstore4 We used a branch to load floating-point comparison results into GPR. This is very slow when the branch is not predictable. Implement movfcc so we can reload FCCmode into GPRs, FPRs, and MEM. Then implement cstore4. gcc/ChangeLog: * config/loongarch/loongarch-tune.h (loongarch_rtx_cost_data::movcf2gr): New field. (loongarch_rtx_cost_data::movcf2gr_): New method. (loongarch_rtx_cost_data::use_movcf2gr): New method. * config/loongarch/loongarch-def.cc (loongarch_rtx_cost_data::loongarch_rtx_cost_data): Set movcf2gr to COSTS_N_INSNS (7) and movgr2cf to COSTS_N_INSNS (15), based on timing on LA464. (loongarch_cpu_rtx_cost_data): Set movcf2gr and movgr2cf to COSTS_N_INSNS (1) for LA664. (loongarch_rtx_cost_optimize_size): Set movcf2gr and movgr2cf to COSTS_N_INSNS (1) + 1. * config/loongarch/predicates.md (loongarch_fcmp_operator): New predicate. * config/loongarch/loongarch.md (movfcc): Change to define_expand. (movfcc_internal): New define_insn. (fcc_to_): New define_insn. (cstore4): New define_expand. * config/loongarch/loongarch.cc (loongarch_hard_regno_mode_ok_uncached): Allow FCCmode in GPRs and GPRs. (loongarch_secondary_reload): Reload FCCmode via FPR and/or GPR. (loongarch_emit_float_compare): Call gen_reg_rtx instead of loongarch_allocate_fcc. (loongarch_allocate_fcc): Remove. (loongarch_move_to_gpr_cost): Handle FCC_REGS -> GR_REGS. (loongarch_move_from_gpr_cost): Handle GR_REGS -> FCC_REGS. (loongarch_register_move_cost): Handle FCC_REGS -> FCC_REGS, FCC_REGS -> FP_REGS, and FP_REGS -> FCC_REGS. gcc/testsuite/ChangeLog: * gcc.target/loongarch/movcf2gr.c: New test. * gcc.target/loongarch/movcf2gr-via-fr.c: New test. --- diff --git a/gcc/config/loongarch/loongarch-def.cc b/gcc/config/loongarch/loongarch-def.cc index 4a8885e83438..843be78e46ed 100644 --- a/gcc/config/loongarch/loongarch-def.cc +++ b/gcc/config/loongarch/loongarch-def.cc @@ -101,15 +101,21 @@ loongarch_rtx_cost_data::loongarch_rtx_cost_data () int_mult_di (COSTS_N_INSNS (4)), int_div_si (COSTS_N_INSNS (5)), int_div_di (COSTS_N_INSNS (5)), + movcf2gr (COSTS_N_INSNS (7)), + movgr2cf (COSTS_N_INSNS (15)), branch_cost (6), memory_latency (4) {} /* The following properties cannot be looked up directly using "cpucfg". So it is necessary to provide a default value for "unknown native" tune targets (i.e. -mtune=native while PRID does not correspond to - any known "-mtune" type). Currently all numbers are default. */ + any known "-mtune" type). */ array_tune loongarch_cpu_rtx_cost_data = - array_tune (); + array_tune () + .set (CPU_LA664, + loongarch_rtx_cost_data () + .movcf2gr_ (COSTS_N_INSNS (1)) + .movgr2cf_ (COSTS_N_INSNS (1))); /* RTX costs to use when optimizing for size. We use a value slightly larger than COSTS_N_INSNS (1) for all of them @@ -125,7 +131,8 @@ const loongarch_rtx_cost_data loongarch_rtx_cost_optimize_size = .int_mult_si_ (COST_COMPLEX_INSN) .int_mult_di_ (COST_COMPLEX_INSN) .int_div_si_ (COST_COMPLEX_INSN) - .int_div_di_ (COST_COMPLEX_INSN); + .int_div_di_ (COST_COMPLEX_INSN) + .movcf2gr_ (COST_COMPLEX_INSN); array_tune loongarch_cpu_issue_rate = array_tune () .set (CPU_NATIVE, 4) diff --git a/gcc/config/loongarch/loongarch-tune.h b/gcc/config/loongarch/loongarch-tune.h index 4aa01c54c08d..7a75c8dd9d9a 100644 --- a/gcc/config/loongarch/loongarch-tune.h +++ b/gcc/config/loongarch/loongarch-tune.h @@ -35,6 +35,8 @@ struct loongarch_rtx_cost_data unsigned short int_mult_di; unsigned short int_div_si; unsigned short int_div_di; + unsigned short movcf2gr; + unsigned short movgr2cf; unsigned short branch_cost; unsigned short memory_latency; @@ -95,6 +97,18 @@ struct loongarch_rtx_cost_data return *this; } + loongarch_rtx_cost_data movcf2gr_ (unsigned short _movcf2gr) + { + movcf2gr = _movcf2gr; + return *this; + } + + loongarch_rtx_cost_data movgr2cf_ (unsigned short _movgr2cf) + { + movgr2cf = _movgr2cf; + return *this; + } + loongarch_rtx_cost_data branch_cost_ (unsigned short _branch_cost) { branch_cost = _branch_cost; @@ -106,7 +120,6 @@ struct loongarch_rtx_cost_data memory_latency = _memory_latency; return *this; } - }; /* Costs to use when optimizing for size. */ diff --git a/gcc/config/loongarch/loongarch.cc b/gcc/config/loongarch/loongarch.cc index a5b1dad4e73d..5ffd06ce9be3 100644 --- a/gcc/config/loongarch/loongarch.cc +++ b/gcc/config/loongarch/loongarch.cc @@ -5123,29 +5123,6 @@ loongarch_zero_if_equal (rtx cmp0, rtx cmp1) OPTAB_DIRECT); } -/* Allocate a floating-point condition-code register of mode MODE. */ - -static rtx -loongarch_allocate_fcc (machine_mode mode) -{ - unsigned int regno, count; - - gcc_assert (TARGET_HARD_FLOAT); - - if (mode == FCCmode) - count = 1; - else - gcc_unreachable (); - - cfun->machine->next_fcc += -cfun->machine->next_fcc & (count - 1); - if (cfun->machine->next_fcc > FCC_REG_LAST - FCC_REG_FIRST) - cfun->machine->next_fcc = 0; - - regno = FCC_REG_FIRST + cfun->machine->next_fcc; - cfun->machine->next_fcc += count; - return gen_rtx_REG (mode, regno); -} - /* Sign- or zero-extend OP0 and OP1 for integer comparisons. */ static void @@ -5260,7 +5237,7 @@ loongarch_emit_float_compare (enum rtx_code *code, rtx *op0, rtx *op1) operands for FCMP.cond.fmt, instead a reversed condition code is required and a test for false. */ *code = NE; - *op0 = loongarch_allocate_fcc (FCCmode); + *op0 = gen_reg_rtx (FCCmode); *op1 = const0_rtx; loongarch_emit_binary (cmp_code, *op0, cmp_op0, cmp_op1); @@ -6630,7 +6607,7 @@ loongarch_hard_regno_mode_ok_uncached (unsigned int regno, machine_mode mode) enum mode_class mclass; if (mode == FCCmode) - return FCC_REG_P (regno); + return FCC_REG_P (regno) || GP_REG_P (regno) || FP_REG_P (regno); size = GET_MODE_SIZE (mode); mclass = GET_MODE_CLASS (mode); @@ -6845,6 +6822,9 @@ loongarch_move_to_gpr_cost (reg_class_t from) /* MOVFR2GR, etc. */ return 4; + case FCC_REGS: + return loongarch_cost->movcf2gr; + default: return 0; } @@ -6867,6 +6847,9 @@ loongarch_move_from_gpr_cost (reg_class_t to) /* MOVGR2FR, etc. */ return 4; + case FCC_REGS: + return loongarch_cost->movgr2cf; + default: return 0; } @@ -6901,6 +6884,10 @@ loongarch_register_move_cost (machine_mode mode, reg_class_t from, if (to == dregs) return loongarch_move_to_gpr_cost (from); + /* fcc -> fcc, fcc -> fpr, or fpr -> fcc. */ + if (from == FCC_REGS || to == FCC_REGS) + return COSTS_N_INSNS (from == to ? 2 : 1); + /* Handles cases that require a GPR temporary. */ cost1 = loongarch_move_to_gpr_cost (from); if (cost1 != 0) @@ -6937,6 +6924,39 @@ loongarch_secondary_reload (bool in_p ATTRIBUTE_UNUSED, rtx x, regno = true_regnum (x); + if (mode == FCCmode) + { + if (reg_class_subset_p (rclass, FCC_REGS) && !FP_REG_P (regno)) + { + if (FCC_REG_P (regno)) + return FP_REGS; + + auto fn = in_p ? loongarch_move_from_gpr_cost + : loongarch_move_to_gpr_cost; + + if (fn (FCC_REGS) > fn (FP_REGS) + COSTS_N_INSNS (1)) + return FP_REGS; + + return GP_REG_P (regno) ? NO_REGS : GR_REGS; + } + + if (reg_class_subset_p (rclass, GR_REGS) && FCC_REG_P (regno)) + { + auto fn = in_p ? loongarch_move_to_gpr_cost + : loongarch_move_from_gpr_cost; + + if (fn (FCC_REGS) > fn (FP_REGS) + COSTS_N_INSNS (1)) + return FP_REGS; + + return NO_REGS; + } + + if (reg_class_subset_p (rclass, FP_REGS) && MEM_P (x)) + return GR_REGS; + + return NO_REGS; + } + if (reg_class_subset_p (rclass, FP_REGS)) { if (regno < 0 diff --git a/gcc/config/loongarch/loongarch.md b/gcc/config/loongarch/loongarch.md index cb5b67aa5d9b..b48e8b535249 100644 --- a/gcc/config/loongarch/loongarch.md +++ b/gcc/config/loongarch/loongarch.md @@ -2283,11 +2283,72 @@ ;; Clear one FCC register -(define_insn "movfcc" - [(set (match_operand:FCC 0 "register_operand" "=z") - (const_int 0))] +(define_expand "movfcc" + [(set (match_operand:FCC 0 "") + (match_operand:FCC 1 ""))] + "TARGET_HARD_FLOAT" +{ + if (memory_operand (operands[0], FCCmode) + && memory_operand (operands[1], FCCmode)) + operands[1] = force_reg (FCCmode, operands[1]); +}) + +(define_insn "movfcc_internal" + [(set (match_operand:FCC 0 "nonimmediate_operand" + "=z,z,*f,*f,*r,*r,*m,*f,*r,z,*r") + (match_operand:FCC 1 "reg_or_0_operand" + "J,*f,z,*f,J*r,*m,J*r,J*r,*f,*r,z"))] + "TARGET_HARD_FLOAT" + "@ + fcmp.caf.s\t%0,$f0,$f0 + movfr2cf\t%0,%1 + movcf2fr\t%0,%1 + fmov.s\t%0,%1 + or\t%0,%z1,$r0 + ld.b\t%0,%1 + st.b\t%z1,%0 + movgr2fr.w\t%0,%1 + movfr2gr.s\t%0,%1 + movgr2cf\t%0,%1 + movcf2gr\t%0,%1" + [(set_attr "type" "move") + (set_attr "mode" "FCC")]) + +(define_insn "fcc_to_" + [(set (match_operand:X 0 "register_operand" "=r") + (if_then_else:X (ne (match_operand:FCC 1 "register_operand" "0") + (const_int 0)) + (const_int 1) + (const_int 0)))] + "TARGET_HARD_FLOAT" "" - "fcmp.caf.s\t%0,$f0,$f0") + [(set_attr "length" "0") + (set_attr "type" "ghost")]) + +(define_expand "cstore4" + [(set (match_operand:SI 0 "register_operand") + (match_operator:SI 1 "loongarch_fcmp_operator" + [(match_operand:ANYF 2 "register_operand") + (match_operand:ANYF 3 "register_operand")]))] + "" + { + rtx fcc = gen_reg_rtx (FCCmode); + rtx cmp = gen_rtx_fmt_ee (GET_CODE (operands[1]), FCCmode, + operands[2], operands[3]); + + emit_insn (gen_rtx_SET (fcc, cmp)); + if (TARGET_64BIT) + { + rtx gpr = gen_reg_rtx (DImode); + emit_insn (gen_fcc_to_di (gpr, fcc)); + emit_insn (gen_rtx_SET (operands[0], + lowpart_subreg (SImode, gpr, DImode))); + } + else + emit_insn (gen_fcc_to_si (operands[0], fcc)); + + DONE; + }) ;; Conditional move instructions. diff --git a/gcc/config/loongarch/predicates.md b/gcc/config/loongarch/predicates.md index 9e9ce58cb53f..83fea08315c4 100644 --- a/gcc/config/loongarch/predicates.md +++ b/gcc/config/loongarch/predicates.md @@ -590,6 +590,10 @@ (define_predicate "loongarch_cstore_operator" (match_code "ne,eq,gt,gtu,ge,geu,lt,ltu,le,leu")) +(define_predicate "loongarch_fcmp_operator" + (match_code + "unordered,uneq,unlt,unle,eq,lt,le,ordered,ltgt,ne,ge,gt,unge,ungt")) + (define_predicate "small_data_pattern" (and (match_code "set,parallel,unspec,unspec_volatile,prefetch") (match_test "loongarch_small_data_pattern_p (op)"))) diff --git a/gcc/testsuite/gcc.target/loongarch/movcf2gr-via-fr.c b/gcc/testsuite/gcc.target/loongarch/movcf2gr-via-fr.c new file mode 100644 index 000000000000..23334a3a31fe --- /dev/null +++ b/gcc/testsuite/gcc.target/loongarch/movcf2gr-via-fr.c @@ -0,0 +1,10 @@ +/* { dg-do compile } */ +/* { dg-options "-O2 -march=loongarch64 -mtune=la464 -mabi=lp64d" } */ +/* { dg-final { scan-assembler "movcf2fr\t\\\$f\[0-9\]+,\\\$fcc" } } */ +/* { dg-final { scan-assembler "movfr2gr\\.s\t\\\$r4" } } */ + +int +t (float a, float b) +{ + return a > b; +} diff --git a/gcc/testsuite/gcc.target/loongarch/movcf2gr.c b/gcc/testsuite/gcc.target/loongarch/movcf2gr.c new file mode 100644 index 000000000000..d27c393b5edc --- /dev/null +++ b/gcc/testsuite/gcc.target/loongarch/movcf2gr.c @@ -0,0 +1,9 @@ +/* { dg-do compile } */ +/* { dg-options "-O2 -march=loongarch64 -mtune=la664 -mabi=lp64d" } */ +/* { dg-final { scan-assembler "movcf2gr\t\\\$r4,\\\$fcc" } } */ + +int +t (float a, float b) +{ + return a > b; +}