From: Kaz Kojima Date: Tue, 23 Jun 2026 02:26:21 +0000 (+0900) Subject: SH: Adjust fp-reg related move insns to work with LRA X-Git-Url: http://git.ipfire.org/cgi-bin/gitweb.cgi?a=commitdiff_plain;h=2b2e5f661dcbea7136229494148c8e0897496613;p=thirdparty%2Fgcc.git SH: Adjust fp-reg related move insns to work with LRA On SH fp move insns usually don't support displacement addressing modes. Instead it needs to use additional match_scratch constraints which LRA has trouble dealing with. Split movsf_ie_ra into several new patterns to remove match_scratch as a mitigation. For movdf constant loads add a new sub-pattern. Use a new pattern movsf_ie_rffr to handle movsf multiword subregs and disable movsf_ie_ra for reg from/to subreg of SImode. gcc/ChangeLog: PR target/55212 * config/sh/predicates.md (pc_relative_load_operand): New predicate. * config/sh/sh-protos.h (sh_movsf_ie_ra_split_p): Remove. (sh_movsf_ie_y_split_p): New proto. (sh_movsf_ie_subreg_multiword_p): New proto. * config/sh/sh.cc: (sh_movsf_ie_ra_split_p): Remove. (sh_movsf_ie_y_split_p): New function. (sh_movsf_ie_subreg_multiword_p): New function. (broken_move): Take movsf_ie_ra into account for fldi cases. * config/sh/sh.md (movdf_i4_F_z): New insn. (movdf): Use it when expanding. (movsf_ie_ra): Use define_insn instead of define_insn_and_split. Adjust alternatives. (movsf_ie_rffr): New insn_and_split. (movsf_ie_F_z, movsf_ie_Q_z, movsf_ie_y): New insns. (movsf): Use new patterns when expanding. --- diff --git a/gcc/config/sh/predicates.md b/gcc/config/sh/predicates.md index 100c0accde3..7fc1c0833b4 100644 --- a/gcc/config/sh/predicates.md +++ b/gcc/config/sh/predicates.md @@ -484,6 +484,12 @@ && sh_legitimate_index_p (mode, XEXP (plus0_rtx, 1), TARGET_SH2A, true); }) +;; Returns true if OP is a pc relative load operand. +(define_predicate "pc_relative_load_operand" + (and (match_code "mem") + (match_test "GET_MODE (op) != QImode") + (match_test "IS_PC_RELATIVE_LOAD_ADDR_P (XEXP (op, 0))"))) + ;; Returns true if OP is a valid source operand for a logical operation. (define_predicate "logical_operand" (and (match_code "subreg,reg,const_int") diff --git a/gcc/config/sh/sh-protos.h b/gcc/config/sh/sh-protos.h index 41ab6101ae1..adf227efbba 100644 --- a/gcc/config/sh/sh-protos.h +++ b/gcc/config/sh/sh-protos.h @@ -102,7 +102,8 @@ extern rtx sh_find_equiv_gbr_addr (rtx_insn* cur_insn, rtx mem); extern int sh_eval_treg_value (rtx op); extern HOST_WIDE_INT sh_disp_addr_displacement (rtx mem_op); extern int sh_max_mov_insn_displacement (machine_mode mode, bool consider_sh2a); -extern bool sh_movsf_ie_ra_split_p (rtx, rtx, rtx); +extern bool sh_movsf_ie_y_split_p (rtx, rtx); +extern bool sh_movsf_ie_subreg_multiword_p (rtx, rtx); extern void sh_expand_sym_label2reg (rtx, rtx, rtx, bool); /* Result value of sh_find_set_of_reg. */ diff --git a/gcc/config/sh/sh.cc b/gcc/config/sh/sh.cc index 4bf6ed41e06..b7e65b8ca84 100644 --- a/gcc/config/sh/sh.cc +++ b/gcc/config/sh/sh.cc @@ -4831,6 +4831,7 @@ broken_move (rtx_insn *insn) we changed this to do a constant load. In that case we don't have an r0 clobber, hence we must use fldi. */ && (TARGET_FMOVD + || sh_lra_p () || (GET_CODE (XEXP (XVECEXP (PATTERN (insn), 0, 2), 0)) == SCRATCH)) && REG_P (SET_DEST (pat)) @@ -11472,30 +11473,37 @@ sh_legitimize_address_displacement (rtx *offset1, rtx *offset2, return false; } -/* Return true if movsf insn should be split with an additional - register. */ +/* Return true if movsf insn should be split with an fpul register. */ bool -sh_movsf_ie_ra_split_p (rtx op0, rtx op1, rtx op2) +sh_movsf_ie_y_split_p (rtx op0, rtx op1) { - /* op0 == op1 */ - if (rtx_equal_p (op0, op1)) + /* f, r */ + if (REG_P (op0) + && (SUBREG_P (op1) && GET_MODE (SUBREG_REG (op1)) == SImode)) return true; - /* fy, FQ, reg */ - if (GET_CODE (op1) == CONST_DOUBLE - && ! satisfies_constraint_G (op1) - && ! satisfies_constraint_H (op1) - && REG_P (op0) - && REG_P (op2)) + /* r, f */ + if (REG_P (op1) + && (SUBREG_P (op0) && GET_MODE (SUBREG_REG (op0)) == SImode)) return true; - /* f, r, y */ - if (REG_P (op0) && FP_REGISTER_P (REGNO (op0)) - && REG_P (op1) && GENERAL_REGISTER_P (REGNO (op1)) - && REG_P (op2) && (REGNO (op2) == FPUL_REG)) + + return false; +} + +/* Return true if it moves reg from/to subreg of multiword mode. */ +bool +sh_movsf_ie_subreg_multiword_p (rtx op0, rtx op1) +{ + if (REG_P (op0) + && (SUBREG_P (op1) + && (GET_MODE (SUBREG_REG (op1)) == SCmode + || GET_MODE (SUBREG_REG (op1)) == DImode + || GET_MODE (SUBREG_REG (op1)) == TImode))) return true; - /* r, f, y */ - if (REG_P (op1) && FP_REGISTER_P (REGNO (op1)) - && REG_P (op0) && GENERAL_REGISTER_P (REGNO (op0)) - && REG_P (op2) && (REGNO (op2) == FPUL_REG)) + if (REG_P (op1) + && (SUBREG_P (op0) + && (GET_MODE (SUBREG_REG (op0)) == SCmode + || GET_MODE (SUBREG_REG (op0)) == DImode + || GET_MODE (SUBREG_REG (op0)) == TImode))) return true; return false; diff --git a/gcc/config/sh/sh.md b/gcc/config/sh/sh.md index 4b12b06dcaa..fe47a015ef8 100644 --- a/gcc/config/sh/sh.md +++ b/gcc/config/sh/sh.md @@ -5664,6 +5664,22 @@ (const_string "double") (const_string "none")))]) +;; LRA will try to satisfy the constraints in match_scratch for the memory +;; displacements and it will make issues on this target. Use R0 as a scratch +;; register for the constant load. +(define_insn "movdf_i4_F_z" + [(set (match_operand:DF 0 "fp_arith_reg_operand" "=d") + (match_operand:DF 1 "const_double_operand" "F")) + (use (reg:SI FPSCR_MODES_REG)) + (clobber (reg:SI R0_REG))] + "TARGET_FPU_DOUBLE && sh_lra_p ()" + "#" + [(set_attr "type" "pcfload") + (set (attr "length") (if_then_else (eq_attr "fmovd" "yes") (const_int 4) (const_int 8))) + (set (attr "fp_mode") (if_then_else (eq_attr "fmovd" "yes") + (const_string "double") + (const_string "none")))]) + ;; Moving DFmode between fp/general registers through memory ;; (the top of the stack) is faster than moving through fpul even for ;; little endian. Because the type of an instruction is important for its @@ -5803,6 +5819,14 @@ [(set (match_dup 0) (match_dup 0))] "") +(define_split + [(set (match_operand:SF 0 "register_operand") + (match_operand:SF 1 "register_operand")) + (use (reg:SI FPSCR_MODES_REG))] + "TARGET_SH2E && sh_lra_p () && reload_completed + && true_regnum (operands[0]) == true_regnum (operands[1])" + [(set (match_dup 0) (match_dup 0))]) + ;; fmovd substitute post-reload splits (define_split [(set (match_operand:DF 0 "register_operand" "") @@ -6047,6 +6071,14 @@ prepare_move_operands (operands, DFmode); if (TARGET_FPU_DOUBLE) { + if (sh_lra_p () + && (GET_CODE (operands[1]) == CONST_DOUBLE + && REG_P (operands[0]))) + { + emit_insn (gen_movdf_i4_F_z (operands[0], operands[1])); + DONE; + } + emit_insn (gen_movdf_i4 (operands[0], operands[1])); DONE; } @@ -6172,15 +6204,17 @@ (const_string "none") (const_string "none")])]) -(define_insn_and_split "movsf_ie_ra" +;; LRA will try to satisfy the constraints in match_scratch for the memory +;; displacements and that doesn't work well. Hence movsf_ie_ra is split +;; into multiple patterns below to avoid those issues while 'lra_in_progress'. +(define_insn "movsf_ie_ra" [(set (match_operand:SF 0 "general_movdst_operand" - "=f,r,f,f,fy,f,m, r,r,m,f,y,y,rf,r,y,<,y,y") + "=f,r,f,f,f,m, r,r,m,f,y,y,r,y,<,y,y") (match_operand:SF 1 "general_movsrc_operand" - " f,r,G,H,FQ,m,f,FQ,m,r,y,f,>,fr,y,r,y,>,y")) - (use (reg:SI FPSCR_MODES_REG)) - (clobber (match_scratch:SF 2 "=r,r,X,X,&z,r,r, X,r,r,r,r,r, y,r,r,r,r,r")) - (const_int 0)] - "TARGET_SH2E + " f,r,G,H,m,f,FQ,m,r,y,f,>,y,r,y,>,y")) + (use (reg:SI FPSCR_MODES_REG))] + "TARGET_SH2E && sh_lra_p () + && ! sh_movsf_ie_y_split_p (operands[0], operands[1]) && (arith_reg_operand (operands[0], SFmode) || fpul_operand (operands[0], SFmode) || arith_reg_operand (operands[1], SFmode) @@ -6190,7 +6224,6 @@ mov %1,%0 fldi0 %0 fldi1 %0 - # fmov.s %1,%0 fmov.s %1,%0 mov.l %1,%0 @@ -6199,31 +6232,19 @@ fsts fpul,%0 flds %1,fpul lds.l %1,%0 - # sts %1,%0 lds %1,%0 sts.l %1,%0 lds.l %1,%0 ! move optimized away" - "reload_completed - && sh_movsf_ie_ra_split_p (operands[0], operands[1], operands[2])" - [(const_int 0)] -{ - if (! rtx_equal_p (operands[0], operands[1])) - { - emit_insn (gen_movsf_ie (operands[2], operands[1])); - emit_insn (gen_movsf_ie (operands[0], operands[2])); - } -} - [(set_attr "type" "fmove,move,fmove,fmove,pcfload,fload,fstore,pcload,load, - store,fmove,fmove,load,*,fpul_gp,gp_fpul,fstore,load,nil") - (set_attr "late_fp_use" "*,*,*,*,*,*,yes,*,*,*,*,*,*,*,yes,*,yes,*,*") + [(set_attr "type" "fmove,move,fmove,fmove,fload,fstore,pcload,load, + store,fmove,fmove,load,fpul_gp,gp_fpul,fstore,load,nil") + (set_attr "late_fp_use" "*,*,*,*,*,yes,*,*,*,*,*,*,yes,*,yes,*,*") (set_attr_alternative "length" [(const_int 2) (const_int 2) (const_int 2) (const_int 2) - (const_int 4) (if_then_else (match_operand 1 "displacement_mem_operand") (const_int 4) (const_int 2)) (if_then_else (match_operand 0 "displacement_mem_operand") @@ -6236,7 +6257,6 @@ (const_int 2) (const_int 2) (const_int 2) - (const_int 4) (const_int 2) (const_int 2) (const_int 2) @@ -6248,7 +6268,6 @@ (const_string "none") (const_string "single") (const_string "single") - (const_string "none") (if_then_else (eq_attr "fmovd" "yes") (const_string "single") (const_string "none")) (if_then_else (eq_attr "fmovd" "yes") @@ -6263,15 +6282,75 @@ (const_string "none") (const_string "none") (const_string "none") + (const_string "none")])]) + +(define_insn_and_split "movsf_ie_rffr" + [(set (match_operand:SF 0 "arith_reg_dest" "=f,r,rf") + (match_operand:SF 1 "arith_reg_operand" "f,r,fr")) + (use (reg:SI FPSCR_MODES_REG)) + (clobber (match_scratch:SF 2 "=X,X,y"))] + "TARGET_SH2E && sh_lra_p ()" + "@ + fmov %1,%0 + mov %1,%0 + #" + "reload_completed + && (FP_REGISTER_P (REGNO (operands[0])) + != FP_REGISTER_P (REGNO (operands[1])))" + [(const_int 0)] +{ + emit_insn (gen_movsf_ie_ra (operands[2], operands[1])); + emit_insn (gen_movsf_ie_ra (operands[0], operands[2])); +} + [(set_attr "type" "fmove,move,*") + (set_attr_alternative "length" + [(const_int 2) + (const_int 2) + (const_int 4)]) + (set_attr_alternative "fp_mode" + [(if_then_else (eq_attr "fmovd" "yes") + (const_string "single") (const_string "none")) (const_string "none") (const_string "none")])]) +(define_insn "movsf_ie_F_z" + [(set (match_operand:SF 0 "fp_arith_reg_operand" "=f") + (match_operand:SF 1 "const_double_operand" "F")) + (use (reg:SI FPSCR_MODES_REG)) + (clobber (reg:SI R0_REG))] + "TARGET_SH2E && sh_lra_p ()" + "#" + [(set_attr "type" "pcfload") + (set_attr "length" "4")]) + +(define_insn "movsf_ie_Q_z" + [(set (match_operand:SF 0 "fpul_operand" "=y") + (match_operand:SF 1 "pc_relative_load_operand" "Q")) + (use (reg:SI FPSCR_MODES_REG)) + (clobber (reg:SI R0_REG))] + "TARGET_SH2E && sh_lra_p ()" + "#" + [(set_attr "type" "pcfload") + (set_attr "length" "4")]) + +(define_insn "movsf_ie_y" + [(set (match_operand:SF 0 "arith_reg_dest" "=fr") + (match_operand:SF 1 "arith_reg_operand" "rf")) + (use (reg:SI FPSCR_MODES_REG)) + (clobber (reg:SI FPUL_REG))] + "TARGET_SH2E && sh_lra_p ()" + "#" + [(set_attr "type" "*") + (set_attr "length" "4")]) + (define_split [(set (match_operand:SF 0 "register_operand" "") (match_operand:SF 1 "register_operand" "")) (use (reg:SI FPSCR_MODES_REG)) (clobber (reg:SI FPUL_REG))] - "TARGET_SH1" + "TARGET_SH1 + && ! fpul_operand (operands[0], SFmode) + && ! fpul_operand (operands[1], SFmode)" [(parallel [(set (reg:SF FPUL_REG) (match_dup 1)) (use (reg:SI FPSCR_MODES_REG)) (clobber (scratch:SI))]) @@ -6288,11 +6367,42 @@ prepare_move_operands (operands, SFmode); if (TARGET_SH2E) { - if (lra_in_progress) + if (sh_lra_p ()) { if (GET_CODE (operands[0]) == SCRATCH) DONE; - emit_insn (gen_movsf_ie_ra (operands[0], operands[1])); + /* reg from/to multiword subreg may be splitted to several reg from/to + subreg of SImode by subreg1 pass. This confuses our splitted + movsf logic for LRA and will end up in bad code or ICE. Use a special + pattern so that LRA can optimize this case. */ + if (! lra_in_progress && ! reload_completed + && sh_movsf_ie_subreg_multiword_p (operands[0], operands[1])) + { + emit_insn (gen_movsf_ie_rffr (operands[0], operands[1])); + DONE; + } + if (GET_CODE (operands[1]) == CONST_DOUBLE + && ! satisfies_constraint_G (operands[1]) + && ! satisfies_constraint_H (operands[1]) + && REG_P (operands[0])) + { + if (lra_in_progress) + emit_insn (gen_movsf_ie (operands[0], operands[1])); + else + emit_insn (gen_movsf_ie_F_z (operands[0], operands[1])); + } + else if (REG_P (operands[0]) && REGNO (operands[0]) == FPUL_REG + && satisfies_constraint_Q (operands[1])) + emit_insn (gen_movsf_ie_Q_z (operands[0], operands[1])); + else if (sh_movsf_ie_y_split_p (operands[0], operands[1])) + { + if (lra_in_progress) + emit_insn (gen_movsf_ie (operands[0], operands[1])); + else + emit_insn (gen_movsf_ie_y (operands[0], operands[1])); + } + else + emit_insn (gen_movsf_ie_ra (operands[0], operands[1])); DONE; }