From 55cc4377a310f00b0790eb2ab218fe130f63a408 Mon Sep 17 00:00:00 2001 From: Stefan Schulze Frielinghaus Date: Thu, 4 Dec 2025 09:36:24 +0100 Subject: [PATCH] lra: Honor exclude start regs while reusing reloads This fixes asm-hard-reg-3.c:10:1: error: unrecognizable insn: 10 | } | ^ (insn 9 18 14 2 (parallel [ (set (reg:DI 0 ax [orig:99 x ] [99]) (asm_operands:DI ("") ("=r") 0 [ (reg:SI 0 ax [100]) (reg:DI 1 dx [105]) repeated x2 ] [ (asm_input:SI ("0") asm-hard-reg-3.c:8) (asm_input:DI ("r") asm-hard-reg-3.c:8) (asm_input:DI ("{r8}") asm-hard-reg-3.c:8) ] [] asm-hard-reg-3.c:8)) (clobber (reg:CC 17 flags)) ]) "asm-hard-reg-3.c":8:3 -1 (nil)) during RTL pass: reload During get_reload_reg() a reload register may be reused and so far exclude start hard registers were not taken into account. For the test case this means operands 2 and 3 use the same reload register which gets dx assigned, although, the constraint of operand 3 refers to register r8. That in turn renders the insn unsatisfiable. A conservative approach would be to simply not reuse any reload register whenever the set of exclude start hard regs is non-empty. However, this would lead to some missed optimizations like in this example where operands 2 and 3 would land in different registers. Therefore, if both share a start hard register, still reuse the reload and refine the exclude start hard regs set. I only have a test case for inputs. However, I expect an analogue problem for outputs which is why I adapted that case, too. gcc/ChangeLog: * lra-constraints.cc (get_reload_reg): Honor exclude start regs while reusing reloads. gcc/testsuite/ChangeLog: * gcc.dg/asm-hard-reg-9.c: New test. --- gcc/lra-constraints.cc | 21 ++++++++++++++++++++- gcc/testsuite/gcc.dg/asm-hard-reg-9.c | 15 +++++++++++++++ 2 files changed, 35 insertions(+), 1 deletion(-) create mode 100644 gcc/testsuite/gcc.dg/asm-hard-reg-9.c diff --git a/gcc/lra-constraints.cc b/gcc/lra-constraints.cc index d843226c8c8..66a300bf268 100644 --- a/gcc/lra-constraints.cc +++ b/gcc/lra-constraints.cc @@ -686,7 +686,11 @@ get_reload_reg (enum op_type type, machine_mode mode, rtx original, && (int) REGNO (original) >= new_regno_start && (INSN_UID (curr_insn) >= new_insn_uid_start || ira_former_scratch_p (REGNO (original))) - && in_class_p (original, rclass, &new_class, true)) + && in_class_p (original, rclass, &new_class, true) + && (exclude_start_hard_regs == nullptr + || hard_reg_set_intersect_p ( + ~lra_reg_info[REGNO (original)].exclude_start_hard_regs, + ~*exclude_start_hard_regs))) { unsigned int regno = REGNO (original); if (lra_dump_file != NULL) @@ -698,6 +702,9 @@ get_reload_reg (enum op_type type, machine_mode mode, rtx original, lra_change_class (regno, new_class, ", change to", false); if (lra_dump_file != NULL) fprintf (lra_dump_file, "\n"); + if (exclude_start_hard_regs) + lra_reg_info[regno].exclude_start_hard_regs + |= *exclude_start_hard_regs; *result_reg = original; return false; } @@ -734,6 +741,18 @@ get_reload_reg (enum op_type type, machine_mode mode, rtx original, if (reg == NULL_RTX || GET_CODE (reg) != SUBREG) continue; } + /* If the existing reload and this have no start hard register in + common, then skip. Otherwise update exclude_start_hard_regs. */ + if (exclude_start_hard_regs + && ! hard_reg_set_empty_p (*exclude_start_hard_regs)) + { + HARD_REG_SET r = lra_reg_info[regno].exclude_start_hard_regs + | *exclude_start_hard_regs; + if (hard_reg_set_empty_p (~r)) + continue; + else + lra_reg_info[regno].exclude_start_hard_regs = r; + } *result_reg = reg; if (lra_dump_file != NULL) { diff --git a/gcc/testsuite/gcc.dg/asm-hard-reg-9.c b/gcc/testsuite/gcc.dg/asm-hard-reg-9.c new file mode 100644 index 00000000000..0866cb4554a --- /dev/null +++ b/gcc/testsuite/gcc.dg/asm-hard-reg-9.c @@ -0,0 +1,15 @@ +/* { dg-do compile { target s390*-*-* x86_64-*-* } } */ +/* { dg-options "-O2" } */ + +/* Ensure that if the reload register for operand 2 is resued for operand 3, + that exclude start hard regs coming from operand 3 are taken into account. + Otherwise a different register than r8 may be chosen rendering the insn + after LRA unsatisfiable. */ + +long +test () +{ + long x; + __asm__ ("" : "=r" (x) : "0" (1000), "r" (0l), "{r8}" (0l)); + return x; +} -- 2.47.3