When we search for a point in an inseration candidate block that has
incoming live call clobbered regs we look for REG_DEAD notes of
those and indication of FLAGS reg becoming live. But we consider
insns like
(insn 807 805 6 2 (parallel [
(set (subreg:SI (reg:HI 509) 0)
(lshiftrt:SI (reg:SI 514)
(const_int 16 [0x10])))
(clobber (reg:CC 17 flags))
]) "/home/packages/tmp/onednn-3.9.1+ds/src/cpu/x64/brgemm/jit_brgemm_amx_uker.cpp":1891:25 1213 {*lshrsi3_1}
(expr_list:REG_UNUSED (reg:CC 17 flags)
(expr_list:REG_DEAD (reg:SI 514)
(nil))))
making the FLAGS_REG live despite the REG_UNUSED note or the setter
being a CLOBBER. The following optimizes this by in turn honoring
REG_UNUSED for FLAGS_REG, pruning it immediately again.
This reduces required expensive iteration to other candidate BBs,
reducing compile-time for the testcase in the PR from hours to 6s.
PR target/123137
* config/i386/i386-features.cc (ix86_emit_tls_call): Improve
local FLAGS_REG liveness calculation.
* g++.dg/torture/pr124137.C: New testcase.
rtx link;
for (link = REG_NOTES (insn); link; link = XEXP (link, 1))
- if (REG_NOTE_KIND (link) == REG_DEAD
+ if ((REG_NOTE_KIND (link) == REG_DEAD
+ || (REG_NOTE_KIND (link) == REG_UNUSED
+ && REGNO (XEXP (link, 0)) == FLAGS_REG))
&& REG_P (XEXP (link, 0)))
{
/* Mark the live caller-saved register as dead. */
--- /dev/null
+// { dg-do compile }
+// { dg-require-effective-target fpic }
+// { dg-additional-options "-fPIC" }
+
+inline int &GetErrorRef() {
+ thread_local int err;
+ return err;
+}
+int SetError_err;
+bool isREG___trans_tmp_4;
+struct Operand {
+ int : 6;
+ int kind_ : 10;
+ int bit_ : 14;
+protected:
+ int : 11;
+public:
+ bool isREG(int bit) {
+ isREG___trans_tmp_4 = kind_ && bit_ & bit;
+ return isREG___trans_tmp_4;
+ }
+ int getBit() { return bit_; }
+};
+struct Reg : Operand {
+} index_, fp8_to_f16_upconvert_to_vnni_reg_data;
+enum { i32e = 4 };
+struct RegExp {
+ RegExp(Reg &r) {
+ if (r.isREG(i32e))
+ GetErrorRef() = SetError_err;
+ if (r.getBit())
+ index_ = r;
+ }
+};
+struct X {
+ void operator[](RegExp);
+} ptr;
+void jit_brgemm_amx_uker_base_tfp8_to_f16_upconvert_to_vnni(Reg reg_buf) {
+ RegExp __trans_tmp_3 = fp8_to_f16_upconvert_to_vnni_reg_data;
+ ptr[reg_buf];
+}