From 5400778f69d19a94017561c7de02510d9c0899e6 Mon Sep 17 00:00:00 2001 From: Alex Coplan Date: Thu, 11 Jan 2024 10:16:24 +0000 Subject: [PATCH] aarch64: Fix dwarf2cfi ICEs due to recent CFI note changes [PR113077] In r14-6604-gd7ee988c491cde43d04fe25f2b3dbad9d85ded45 we changed the CFI notes attached to callee saves (in aarch64_save_callee_saves). That patch changed the ldp/stp representation to use unspecs instead of PARALLEL moves. This meant that we needed to attach CFI notes to all frame-related pair saves such that dwarf2cfi could still emit the appropriate CFI (it cannot interpret the unspecs directly). The patch also attached REG_CFA_OFFSET notes to individual saves so that the ldp/stp pass could easily preserve them when forming stps. In that change I chose to use REG_CFA_OFFSET, but as the PR shows, that choice was problematic in that REG_CFA_OFFSET requires the attached store to be expressed in terms of the current CFA register at all times. This means that even scheduling of frame-related insns can break this invariant, leading to ICEs in dwarf2cfi. The old behaviour (before that change) allowed dwarf2cfi to interpret the RTL directly for sp-relative saves. This change restores that behaviour by using REG_FRAME_RELATED_EXPR instead of REG_CFA_OFFSET. REG_FRAME_RELATED_EXPR effectively just gives a different pattern for dwarf2cfi to look at instead of the main insn pattern. That allows us to attach the old-style PARALLEL move representation in a REG_FRAME_RELATED_EXPR note and means we are free to always express the save addresses in terms of the stack pointer. Since the ldp/stp fusion pass can combine frame-related stores, this patch also updates it to preserve REG_FRAME_RELATED_EXPR notes, and additionally gives it the ability to synthesize those notes when combining sp-relative saves into an stp (the latter always needs a note due to the unspec representation, the former does not). gcc/ChangeLog: PR target/113077 * config/aarch64/aarch64-ldp-fusion.cc (filter_notes): Add fr_expr param to extract REG_FRAME_RELATED_EXPR notes. (combine_reg_notes): Handle REG_FRAME_RELATED_EXPR notes, and synthesize these if needed. Update caller ... (ldp_bb_info::fuse_pair): ... here. (ldp_bb_info::try_fuse_pair): Punt if either insn has writeback and either insn is frame-related. (find_trailing_add): Punt on frame-related insns. * config/aarch64/aarch64.cc (aarch64_save_callee_saves): Use REG_FRAME_RELATED_EXPR instead of REG_CFA_OFFSET. gcc/testsuite/ChangeLog: PR target/113077 * gcc.target/aarch64/pr113077.c: New test. --- gcc/config/aarch64/aarch64-ldp-fusion.cc | 76 +++++++++++++++++++-- gcc/config/aarch64/aarch64.cc | 71 ++++++++++--------- gcc/testsuite/gcc.target/aarch64/pr113077.c | 11 +++ 3 files changed, 120 insertions(+), 38 deletions(-) create mode 100644 gcc/testsuite/gcc.target/aarch64/pr113077.c diff --git a/gcc/config/aarch64/aarch64-ldp-fusion.cc b/gcc/config/aarch64/aarch64-ldp-fusion.cc index 2fe1b1d4d84e..689a8c884bd7 100644 --- a/gcc/config/aarch64/aarch64-ldp-fusion.cc +++ b/gcc/config/aarch64/aarch64-ldp-fusion.cc @@ -904,9 +904,11 @@ aarch64_operand_mode_for_pair_mode (machine_mode mode) // Go through the reg notes rooted at NOTE, dropping those that we should drop, // and preserving those that we want to keep by prepending them to (and // returning) RESULT. EH_REGION is used to make sure we have at most one -// REG_EH_REGION note in the resulting list. +// REG_EH_REGION note in the resulting list. FR_EXPR is used to return any +// REG_FRAME_RELATED_EXPR note we find, as these can need special handling in +// combine_reg_notes. static rtx -filter_notes (rtx note, rtx result, bool *eh_region) +filter_notes (rtx note, rtx result, bool *eh_region, rtx *fr_expr) { for (; note; note = XEXP (note, 1)) { @@ -940,6 +942,10 @@ filter_notes (rtx note, rtx result, bool *eh_region) copy_rtx (XEXP (note, 0)), result); break; + case REG_FRAME_RELATED_EXPR: + gcc_assert (!*fr_expr); + *fr_expr = copy_rtx (XEXP (note, 0)); + break; default: // Unexpected REG_NOTE kind. gcc_unreachable (); @@ -950,14 +956,49 @@ filter_notes (rtx note, rtx result, bool *eh_region) } // Return the notes that should be attached to a combination of I1 and I2, where -// *I1 < *I2. +// *I1 < *I2. LOAD_P is true for loads. static rtx -combine_reg_notes (insn_info *i1, insn_info *i2) +combine_reg_notes (insn_info *i1, insn_info *i2, bool load_p) { + // Temporary storage for REG_FRAME_RELATED_EXPR notes. + rtx fr_expr[2] = {}; + bool found_eh_region = false; rtx result = NULL_RTX; - result = filter_notes (REG_NOTES (i2->rtl ()), result, &found_eh_region); - return filter_notes (REG_NOTES (i1->rtl ()), result, &found_eh_region); + result = filter_notes (REG_NOTES (i2->rtl ()), result, + &found_eh_region, fr_expr); + result = filter_notes (REG_NOTES (i1->rtl ()), result, + &found_eh_region, fr_expr + 1); + + if (!load_p) + { + // Simple frame-related sp-relative saves don't need CFI notes, but when + // we combine them into an stp we will need a CFI note as dwarf2cfi can't + // interpret the unspec pair representation directly. + if (RTX_FRAME_RELATED_P (i1->rtl ()) && !fr_expr[0]) + fr_expr[0] = copy_rtx (PATTERN (i1->rtl ())); + if (RTX_FRAME_RELATED_P (i2->rtl ()) && !fr_expr[1]) + fr_expr[1] = copy_rtx (PATTERN (i2->rtl ())); + } + + rtx fr_pat = NULL_RTX; + if (fr_expr[0] && fr_expr[1]) + { + // Combining two frame-related insns, need to construct + // a REG_FRAME_RELATED_EXPR note which represents the combined + // operation. + RTX_FRAME_RELATED_P (fr_expr[1]) = 1; + fr_pat = gen_rtx_PARALLEL (VOIDmode, + gen_rtvec (2, fr_expr[0], fr_expr[1])); + } + else + fr_pat = fr_expr[0] ? fr_expr[0] : fr_expr[1]; + + if (fr_pat) + result = alloc_reg_note (REG_FRAME_RELATED_EXPR, + fr_pat, result); + + return result; } // Given two memory accesses in PATS, at least one of which is of a @@ -1049,6 +1090,14 @@ find_trailing_add (insn_info *insns[2], poly_int64 initial_offset, unsigned access_size) { + // Punt on frame-related insns, it is better to be conservative and + // not try to form writeback pairs here, and means we don't have to + // worry about the writeback case in forming REG_FRAME_RELATED_EXPR + // notes (see combine_reg_notes). + if ((insns[0] && RTX_FRAME_RELATED_P (insns[0]->rtl ())) + || RTX_FRAME_RELATED_P (insns[1]->rtl ())) + return nullptr; + insn_info *pair_dst = pair_range.singleton (); gcc_assert (pair_dst); @@ -1380,7 +1429,7 @@ ldp_bb_info::fuse_pair (bool load_p, return false; } - rtx reg_notes = combine_reg_notes (first, second); + rtx reg_notes = combine_reg_notes (first, second, load_p); rtx pair_pat; if (writeback_effect) @@ -1987,6 +2036,19 @@ ldp_bb_info::try_fuse_pair (bool load_p, unsigned access_size, return false; } + // Punt on frame-related insns with writeback. We probably won't see + // these in practice, but this is conservative and ensures we don't + // have to worry about these later on. + if (writeback && (RTX_FRAME_RELATED_P (i1->rtl ()) + || RTX_FRAME_RELATED_P (i2->rtl ()))) + { + if (dump_file) + fprintf (dump_file, + "rejecting pair (%d,%d): frame-related insn with writeback\n", + i1->uid (), i2->uid ()); + return false; + } + rtx *ignore = &XEXP (pats[1], load_p); for (auto use : insns[1]->uses ()) if (!use->is_mem () diff --git a/gcc/config/aarch64/aarch64.cc b/gcc/config/aarch64/aarch64.cc index a5a6b52730d6..32c7317f3600 100644 --- a/gcc/config/aarch64/aarch64.cc +++ b/gcc/config/aarch64/aarch64.cc @@ -8465,7 +8465,7 @@ aarch64_save_callee_saves (poly_int64 bytes_below_sp, emit_move_insn (move_src, gen_int_mode (aarch64_sve_vg, DImode)); } rtx base_rtx = stack_pointer_rtx; - poly_int64 cfa_offset = offset; + poly_int64 sp_offset = offset; HOST_WIDE_INT const_offset; if (mode == VNx2DImode && BYTES_BIG_ENDIAN) @@ -8490,17 +8490,12 @@ aarch64_save_callee_saves (poly_int64 bytes_below_sp, offset -= fp_offset; } rtx mem = gen_frame_mem (mode, plus_constant (Pmode, base_rtx, offset)); + rtx cfi_mem = gen_frame_mem (mode, plus_constant (Pmode, + stack_pointer_rtx, + sp_offset)); + rtx cfi_set = gen_rtx_SET (cfi_mem, reg); + bool need_cfi_note_p = (base_rtx != stack_pointer_rtx); - rtx cfa_base = stack_pointer_rtx; - if (hard_fp_valid_p && frame_pointer_needed) - { - cfa_base = hard_frame_pointer_rtx; - cfa_offset += (bytes_below_sp - frame.bytes_below_hard_fp); - } - - rtx cfa_mem = gen_frame_mem (mode, - plus_constant (Pmode, - cfa_base, cfa_offset)); unsigned int regno2; if (!aarch64_sve_mode_p (mode) && reg == move_src @@ -8514,34 +8509,48 @@ aarch64_save_callee_saves (poly_int64 bytes_below_sp, offset += GET_MODE_SIZE (mode); insn = emit_insn (aarch64_gen_store_pair (mem, reg, reg2)); - /* The first part of a frame-related parallel insn is - always assumed to be relevant to the frame - calculations; subsequent parts, are only - frame-related if explicitly marked. */ + rtx cfi_mem2 + = gen_frame_mem (mode, + plus_constant (Pmode, + stack_pointer_rtx, + sp_offset + GET_MODE_SIZE (mode))); + rtx cfi_set2 = gen_rtx_SET (cfi_mem2, reg2); + + /* The first part of a frame-related parallel insn is always + assumed to be relevant to the frame calculations; + subsequent parts, are only frame-related if + explicitly marked. */ if (aarch64_emit_cfi_for_reg_p (regno2)) - { - const auto off = cfa_offset + GET_MODE_SIZE (mode); - rtx cfa_mem2 = gen_frame_mem (mode, - plus_constant (Pmode, - cfa_base, - off)); - add_reg_note (insn, REG_CFA_OFFSET, - gen_rtx_SET (cfa_mem2, reg2)); - } + RTX_FRAME_RELATED_P (cfi_set2) = 1; + + /* Add a REG_FRAME_RELATED_EXPR note since the unspec + representation of stp cannot be understood directly by + dwarf2cfi. */ + rtx par = gen_rtx_PARALLEL (VOIDmode, + gen_rtvec (2, cfi_set, cfi_set2)); + add_reg_note (insn, REG_FRAME_RELATED_EXPR, par); regno = regno2; ++i; } - else if (mode == VNx2DImode && BYTES_BIG_ENDIAN) - insn = emit_insn (gen_aarch64_pred_mov (mode, mem, ptrue, move_src)); - else if (aarch64_sve_mode_p (mode)) - insn = emit_insn (gen_rtx_SET (mem, move_src)); else - insn = emit_move_insn (mem, move_src); + { + if (mode == VNx2DImode && BYTES_BIG_ENDIAN) + { + insn = emit_insn (gen_aarch64_pred_mov (mode, mem, + ptrue, move_src)); + need_cfi_note_p = true; + } + else if (aarch64_sve_mode_p (mode)) + insn = emit_insn (gen_rtx_SET (mem, move_src)); + else + insn = emit_move_insn (mem, move_src); + + if (frame_related_p && (need_cfi_note_p || move_src != reg)) + add_reg_note (insn, REG_FRAME_RELATED_EXPR, cfi_set); + } RTX_FRAME_RELATED_P (insn) = frame_related_p; - if (frame_related_p) - add_reg_note (insn, REG_CFA_OFFSET, gen_rtx_SET (cfa_mem, reg)); /* Emit a fake instruction to indicate that the VG save slot has been initialized. */ diff --git a/gcc/testsuite/gcc.target/aarch64/pr113077.c b/gcc/testsuite/gcc.target/aarch64/pr113077.c new file mode 100644 index 000000000000..dca202bd2baa --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/pr113077.c @@ -0,0 +1,11 @@ +/* { dg-do compile } */ +/* { dg-additional-options "-O2 -fstack-protector-strong -fstack-clash-protection" } */ +void add_key(const void *payload); +void act_keyctl_test(void) { + char buf[1030 * 1024]; + int i = 0; + for (i = 0; i < sizeof(buf); i++) + { + add_key(buf); + } +} -- 2.47.2