]> git.ipfire.org Git - thirdparty/binutils-gdb.git/commitdiff
LoongArch: Allow to relax instructions into NOPs after handling alignment
authorWANG Xuerui <git@xen0n.name>
Sun, 6 Jul 2025 01:06:20 +0000 (09:06 +0800)
committercailulu <cailulu@loongson.cn>
Mon, 7 Jul 2025 10:09:20 +0000 (18:09 +0800)
Right now, LoongArch linker relaxation is 2-pass, since after alignment
is done, byte deletion can no longer happen. However, as the alignment
pass also shrinks text sections, new relaxation chances may well be
created after alignment is done. Although at this point we can no longer
delete unused instructions without disturbing alignment, we can still
replace them with NOPs; popular LoongArch micro-architectures can
eliminate NOPs during execution, so we can expect a (very) slight
performance improvement from those late-created relaxation chances.

To achieve this, the number of relax passes is raised to 3 for
LoongArch, and every relaxation handler except loongarch_relax_align is
migrated to a new helper loongarch_relax_delete_or_nop, that either
deletes bytes or fills the bytes to be "deleted" with NOPs, depending on
whether the containing section already has undergone alignment. Also,
since no byte can be deleted during this relax pass, in the pass the
pending_delete_ops structure is no longer allocated, and
loongarch_calc_relaxed_addr(x) degrades to the trivial "return x" in
this case.

In addition, previously when calculating distances to symbols, an
extra segment alignment must be considered, because alignment may
increase distance between sites. However in the newly added 3rd pass
code size can no longer increase for "closed" sections, so we can skip
the adjustment for them to allow for a few more relaxation chances.

A simple way to roughly measure this change's effectiveness is to check
how many pcalau12i + addi.d pairs are relaxed into pcaddi's. Taking a
Firefox 140.0.2 test build of mine as an example:

Before: 47842 pcaddi's in libxul.so
After: 48089

This is a 0.5% increase, which is kind of acceptable for a peephole
optimization like this; of which 9 are due to the "relax"ed symbol
distance treatment.

Signed-off-by: WANG Xuerui <git@xen0n.name>
bfd/elfnn-loongarch.c
ld/emultempl/loongarchelf.em
ld/testsuite/ld-loongarch-elf/ld-loongarch-elf.exp
ld/testsuite/ld-loongarch-elf/relax-after-alignment.d [new file with mode: 0644]
ld/testsuite/ld-loongarch-elf/relax-after-alignment.s [new file with mode: 0644]

index a480a7098978f28ece34dbf4e98e2815916d92fa..c75f5206f2e05cb30a80cb697817c520ec828313 100644 (file)
@@ -173,6 +173,10 @@ loongarch_elf_new_section_hook (bfd *abfd, asection *sec)
 #define loongarch_elf_hash_table(p)                                    \
     ((struct loongarch_elf_link_hash_table *) ((p)->hash))             \
 
+/* During linker relaxation, indicates whether the section has already
+   undergone alignment processing and no more byte deletion is permitted.  */
+#define loongarch_sec_closed_for_deletion(sec) ((sec)->sec_flg0)
+
 #define MINUS_ONE ((bfd_vma) 0 - 1)
 
 #define sec_addr(sec) ((sec)->output_section->vma + (sec)->output_offset)
@@ -4789,7 +4793,10 @@ loongarch_calc_relaxed_addr (struct bfd_link_info *info, bfd_vma offset)
   struct pending_delete_op *op;
   splay_tree_node node;
 
-  BFD_ASSERT (pdops != NULL);
+  if (!pdops)
+    /* Currently this means we are past the stages where byte deletion could
+       possibly happen.  */
+    return offset;
 
   /* Find the op that starts just before the given address.  */
   node = splay_tree_predecessor (pdops, (splay_tree_key)offset);
@@ -4814,9 +4821,9 @@ loongarch_calc_relaxed_addr (struct bfd_link_info *info, bfd_vma offset)
 
 static void
 loongarch_relax_delete_bytes (bfd *abfd,
-                         bfd_vma addr,
-                         size_t count,
-                         struct bfd_link_info *link_info)
+                             bfd_vma addr,
+                             size_t count,
+                             struct bfd_link_info *link_info)
 {
   struct loongarch_elf_link_hash_table *htab
       = loongarch_elf_hash_table (link_info);
@@ -4867,6 +4874,34 @@ loongarch_relax_delete_bytes (bfd *abfd,
     }
 }
 
+static void
+loongarch_relax_delete_or_nop (bfd *abfd,
+                              asection *sec,
+                              bfd_vma addr,
+                              size_t count,
+                              struct bfd_link_info *link_info)
+{
+  struct bfd_elf_section_data *data = elf_section_data (sec);
+  bfd_byte *contents = data->this_hdr.contents;
+
+  BFD_ASSERT (count % 4 == 0);
+
+  if (!loongarch_sec_closed_for_deletion (sec))
+    {
+      /* Deletions are still possible within the section.  */
+      loongarch_relax_delete_bytes (abfd, addr, count, link_info);
+      return;
+    }
+
+  /* We can no longer delete bytes in the section after enforcing alignment.
+     But as the resulting shrinkage may open up a few more relaxation chances,
+     allowing unnecessary instructions to be replaced with NOPs instead of
+     being removed altogether may still benefit performance to a lesser
+     extent.  */
+  for (; count; addr += 4, count -= 4)
+    bfd_put (32, abfd, LARCH_NOP, contents + addr);
+}
+
 static void
 loongarch_relax_perform_deletes (bfd *abfd, asection *sec,
                                 struct bfd_link_info *link_info)
@@ -5135,7 +5170,7 @@ loongarch_tls_perform_trans (bfd *abfd, asection *sec,
        bfd_put (32, abfd, LARCH_NOP, contents + rel->r_offset);
        /* link with -relax option will delete NOP.  */
        if (!info->disable_target_specific_optimizations)
-         loongarch_relax_delete_bytes (abfd, rel->r_offset, 4, info);
+         loongarch_relax_delete_or_nop (abfd, sec, rel->r_offset, 4, info);
        return true;
 
       case R_LARCH_TLS_IE_PC_HI20:
@@ -5250,7 +5285,7 @@ loongarch_relax_tls_le (bfd *abfd, asection *sec, asection *sym_sec,
            if (symval < 0x800)
              {
                rel->r_info = ELFNN_R_INFO (0, R_LARCH_NONE);
-               loongarch_relax_delete_bytes (abfd, rel->r_offset,
+               loongarch_relax_delete_or_nop (abfd, sec, rel->r_offset,
                    4, link_info);
              }
            break;
@@ -5275,8 +5310,8 @@ loongarch_relax_tls_le (bfd *abfd, asection *sec, asection *sym_sec,
          case R_LARCH_TLS_LE64_LO20:
          case R_LARCH_TLS_LE64_HI12:
            rel->r_info = ELFNN_R_INFO (0, R_LARCH_NONE);
-           loongarch_relax_delete_bytes (abfd, rel->r_offset,
-                                         4, link_info);
+           loongarch_relax_delete_or_nop (abfd, sec, rel->r_offset,
+                                          4, link_info);
            break;
 
          case R_LARCH_TLS_LE_LO12:
@@ -5339,17 +5374,22 @@ loongarch_relax_pcala_addi (bfd *abfd, asection *sec, asection *sym_sec,
     symval = sec_addr (sec)
             + loongarch_calc_relaxed_addr (info, symval - sec_addr (sec));
 
-  /* If pc and symbol not in the same segment, add/sub segment alignment.  */
-  if (!loongarch_two_sections_in_same_segment (info->output_bfd,
-                                              sec->output_section,
-                                              sym_sec->output_section))
-    max_alignment = info->maxpagesize > max_alignment ? info->maxpagesize
-                                                       : max_alignment;
-
-  if (symval > pc)
-    pc -= (max_alignment > 4 ? max_alignment : 0);
-  else if (symval < pc)
-    pc += (max_alignment > 4 ? max_alignment : 0);
+  /* If pc and symbol not in the same segment, add/sub segment alignment if the
+     section has not undergone alignment processing because distances may grow
+     after alignment.  */
+  if (!loongarch_sec_closed_for_deletion (sec))
+    {
+      if (!loongarch_two_sections_in_same_segment (info->output_bfd,
+                                                  sec->output_section,
+                                                  sym_sec->output_section))
+       max_alignment = info->maxpagesize > max_alignment ? info->maxpagesize
+                                                         : max_alignment;
+
+      if (symval > pc)
+       pc -= (max_alignment > 4 ? max_alignment : 0);
+      else if (symval < pc)
+       pc += (max_alignment > 4 ? max_alignment : 0);
+    }
 
   const uint32_t pcaddi = LARCH_OP_PCADDI;
 
@@ -5376,7 +5416,7 @@ loongarch_relax_pcala_addi (bfd *abfd, asection *sec, asection *sym_sec,
                                 R_LARCH_PCREL20_S2);
   rel_lo->r_info = ELFNN_R_INFO (0, R_LARCH_NONE);
 
-  loongarch_relax_delete_bytes (abfd, rel_lo->r_offset, 4, info);
+  loongarch_relax_delete_or_nop (abfd, sec, rel_lo->r_offset, 4, info);
 
   return true;
 }
@@ -5404,17 +5444,22 @@ loongarch_relax_call36 (bfd *abfd, asection *sec, asection *sym_sec,
     symval = sec_addr (sec)
             + loongarch_calc_relaxed_addr (info, symval - sec_addr (sec));
 
-  /* If pc and symbol not in the same segment, add/sub segment alignment.  */
-  if (!loongarch_two_sections_in_same_segment (info->output_bfd,
-                                              sec->output_section,
-                                              sym_sec->output_section))
-    max_alignment = info->maxpagesize > max_alignment ? info->maxpagesize
-                                                       : max_alignment;
-
-  if (symval > pc)
-    pc -= (max_alignment > 4 ? max_alignment : 0);
-  else if (symval < pc)
-    pc += (max_alignment > 4 ? max_alignment : 0);
+  /* If pc and symbol not in the same segment, add/sub segment alignment if the
+     section has not undergone alignment processing because distances may grow
+     after alignment.  */
+  if (!loongarch_sec_closed_for_deletion (sec))
+    {
+      if (!loongarch_two_sections_in_same_segment (info->output_bfd,
+                                                  sec->output_section,
+                                                  sym_sec->output_section))
+       max_alignment = info->maxpagesize > max_alignment ? info->maxpagesize
+                                                         : max_alignment;
+
+      if (symval > pc)
+       pc -= (max_alignment > 4 ? max_alignment : 0);
+      else if (symval < pc)
+       pc += (max_alignment > 4 ? max_alignment : 0);
+    }
 
   /* Is pcalau12i + addi.d insns?  */
   if (!LARCH_INSN_JIRL (jirl)
@@ -5436,7 +5481,7 @@ loongarch_relax_call36 (bfd *abfd, asection *sec, asection *sym_sec,
   /* Adjust relocations.  */
   rel->r_info = ELFNN_R_INFO (ELFNN_R_SYM (rel->r_info), R_LARCH_B26);
   /* Delete jirl instruction.  */
-  loongarch_relax_delete_bytes (abfd, rel->r_offset + 4, 4, info);
+  loongarch_relax_delete_or_nop (abfd, sec, rel->r_offset + 4, 4, info);
   return true;
 }
 
@@ -5468,17 +5513,22 @@ loongarch_relax_pcala_ld (bfd *abfd, asection *sec,
     symval = sec_addr (sec)
             + loongarch_calc_relaxed_addr (info, symval - sec_addr (sec));
 
-  /* If pc and symbol not in the same segment, add/sub segment alignment.  */
-  if (!loongarch_two_sections_in_same_segment (info->output_bfd,
-                                              sec->output_section,
-                                              sym_sec->output_section))
-    max_alignment = info->maxpagesize > max_alignment ? info->maxpagesize
-                                                       : max_alignment;
-
-  if (symval > pc)
-    pc -= (max_alignment > 4 ? max_alignment : 0);
-  else if (symval < pc)
-    pc += (max_alignment > 4 ? max_alignment : 0);
+  /* If pc and symbol not in the same segment, add/sub segment alignment if the
+     section has not undergone alignment processing because distances may grow
+     after alignment.  */
+  if (!loongarch_sec_closed_for_deletion (sec))
+    {
+      if (!loongarch_two_sections_in_same_segment (info->output_bfd,
+                                                  sec->output_section,
+                                                  sym_sec->output_section))
+       max_alignment = info->maxpagesize > max_alignment ? info->maxpagesize
+                                                         : max_alignment;
+
+      if (symval > pc)
+       pc -= (max_alignment > 4 ? max_alignment : 0);
+      else if (symval < pc)
+       pc += (max_alignment > 4 ? max_alignment : 0);
+    }
 
   if ((ELFNN_R_TYPE (rel_lo->r_info) != R_LARCH_GOT_PC_LO12)
       || (LARCH_GET_RD (ld) != rd)
@@ -5511,8 +5561,9 @@ bfd_elfNN_loongarch_set_data_segment_info (struct bfd_link_info *info,
     loongarch_elf_hash_table (info)->data_segment_phase = data_segment_phase;
 }
 
-/* Implement R_LARCH_ALIGN by deleting excess alignment NOPs.
-   Once we've handled an R_LARCH_ALIGN, we can't relax anything else.  */
+/* Honor R_LARCH_ALIGN requests by deleting excess alignment NOPs.
+   Once we've handled an R_LARCH_ALIGN, we can't relax anything else by deleting
+   bytes, or alignment will be disrupted.  */
 static bool
 loongarch_relax_align (bfd *abfd, asection *sec, asection *sym_sec,
                        Elf_Internal_Rela *rel,
@@ -5553,9 +5604,9 @@ loongarch_relax_align (bfd *abfd, asection *sec, asection *sym_sec,
       return false;
     }
 
-  /* Once we've handled an R_LARCH_ALIGN in a section,
-     we can't relax anything else in this section.  */
-  sec->sec_flg0 = true;
+  /* Once we've handled an R_LARCH_ALIGN in a section, we can't relax anything
+     else by deleting bytes, or alignment will be disrupted.  */
+  loongarch_sec_closed_for_deletion (sec) = true;
   rel->r_info = ELFNN_R_INFO (0, R_LARCH_NONE);
 
   /* If skipping more bytes than the specified maximum,
@@ -5600,17 +5651,22 @@ loongarch_relax_tls_ld_gd_desc (bfd *abfd, asection *sec, asection *sym_sec,
     symval = sec_addr (sec)
             + loongarch_calc_relaxed_addr (info, symval - sec_addr (sec));
 
-  /* If pc and symbol not in the same segment, add/sub segment alignment.  */
-  if (!loongarch_two_sections_in_same_segment (info->output_bfd,
-                                              sec->output_section,
-                                              sym_sec->output_section))
-    max_alignment = info->maxpagesize > max_alignment ? info->maxpagesize
-                                                       : max_alignment;
-
-  if (symval > pc)
-    pc -= (max_alignment > 4 ? max_alignment : 0);
-  else if (symval < pc)
-    pc += (max_alignment > 4 ? max_alignment : 0);
+  /* If pc and symbol not in the same segment, add/sub segment alignment if the
+     section has not undergone alignment processing because distances may grow
+     after alignment.  */
+  if (!loongarch_sec_closed_for_deletion (sec))
+    {
+      if (!loongarch_two_sections_in_same_segment (info->output_bfd,
+                                                  sec->output_section,
+                                                  sym_sec->output_section))
+       max_alignment = info->maxpagesize > max_alignment ? info->maxpagesize
+                                                         : max_alignment;
+
+      if (symval > pc)
+       pc -= (max_alignment > 4 ? max_alignment : 0);
+      else if (symval < pc)
+       pc += (max_alignment > 4 ? max_alignment : 0);
+    }
 
   const uint32_t pcaddi = LARCH_OP_PCADDI;
 
@@ -5653,7 +5709,7 @@ loongarch_relax_tls_ld_gd_desc (bfd *abfd, asection *sec, asection *sym_sec,
     }
   rel_lo->r_info = ELFNN_R_INFO (0, R_LARCH_NONE);
 
-  loongarch_relax_delete_bytes (abfd, rel_lo->r_offset, 4, info);
+  loongarch_relax_delete_or_nop (abfd, sec, rel_lo->r_offset, 4, info);
 
   return true;
 }
@@ -5697,15 +5753,25 @@ loongarch_elf_relax_section (bfd *abfd, asection *sec,
   if (htab->layout_mutating_for_relr)
     return true;
 
+  /* Definition of LoongArch linker relaxation passes:
+
+     - Pass 0: relaxes everything except R_LARCH_ALIGN, byte deletions are
+              performed; skipped if disable_target_specific_optimizations.
+     - Pass 1: handles alignment, byte deletions are performed.  Sections with
+              R_LARCH_ALIGN relocations are marked closed for further byte
+              deletion in order to not disturb alignment.  This pass is NOT
+              skipped even if disable_target_specific_optimizations is true.
+     - Pass 2: identical to Pass 0, but replacing relaxed insns with NOP in case
+              the containing section is closed for deletion; skip condition
+              also same as Pass 0.  */
+  bool is_alignment_pass = info->relax_pass == 1;
   if (bfd_link_relocatable (info)
-      || sec->sec_flg0
       || sec->reloc_count == 0
       || (sec->flags & SEC_RELOC) == 0
       || (sec->flags & SEC_HAS_CONTENTS) == 0
       /* The exp_seg_relro_adjust is enum phase_enum (0x4).  */
       || *(htab->data_segment_phase) == 4
-      || (info->disable_target_specific_optimizations
-         && info->relax_pass == 0))
+      || (info->disable_target_specific_optimizations && !is_alignment_pass))
     return true;
 
   struct bfd_elf_section_data *data = elf_section_data (sec);
@@ -5741,7 +5807,10 @@ loongarch_elf_relax_section (bfd *abfd, asection *sec,
       htab->max_alignment = max_alignment;
     }
 
-  splay_tree pdops = pending_delete_ops_new (abfd);
+  splay_tree pdops = NULL;
+  if (!loongarch_sec_closed_for_deletion (sec))
+    pdops = pending_delete_ops_new (abfd);
+
   htab->pending_delete_ops = pdops;
 
   for (unsigned int i = 0; i < sec->reloc_count; i++)
@@ -5783,7 +5852,13 @@ loongarch_elf_relax_section (bfd *abfd, asection *sec,
        }
 
       relax_func_t relax_func = NULL;
-      if (info->relax_pass == 0)
+      if (is_alignment_pass)
+       {
+         if (r_type != R_LARCH_ALIGN)
+           continue;
+         relax_func = loongarch_relax_align;
+       }
+      else
        {
          switch (r_type)
            {
@@ -5837,10 +5912,6 @@ loongarch_elf_relax_section (bfd *abfd, asection *sec,
                continue;
            }
        }
-      else if (info->relax_pass == 1 && r_type == R_LARCH_ALIGN)
-       relax_func = loongarch_relax_align;
-      else
-       continue;
 
       /* Four kind of relocations:
         Normal: symval is the symbol address.
@@ -5979,9 +6050,12 @@ loongarch_elf_relax_section (bfd *abfd, asection *sec,
                                    info, again, max_alignment);
     }
 
-  loongarch_relax_perform_deletes (abfd, sec, info);
-  htab->pending_delete_ops = NULL;
-  splay_tree_delete (pdops);
+  if (pdops)
+    {
+      loongarch_relax_perform_deletes (abfd, sec, info);
+      htab->pending_delete_ops = NULL;
+      splay_tree_delete (pdops);
+    }
 
   return true;
 }
index 928fd83ab15e4959157ec124056624185b657961..517ece1610544beabba16a82e719372a45ad4812 100644 (file)
@@ -58,7 +58,7 @@ larch_elf_before_allocation (void)
        ENABLE_RELAXATION;
     }
 
-  link_info.relax_pass = 2;
+  link_info.relax_pass = 3;
 }
 
 static void
index 2f09a69befb820a0c586ecb757461c4363ae8e9d..e23cdc87161077d20014ac5f59410f0f928c5cd9 100644 (file)
@@ -45,6 +45,7 @@ if [istarget "loongarch64-*-*"] {
     run_dump_test "underflow_s_5_20"
     run_dump_test "tls-le-norelax"
     run_dump_test "tls-le-relax"
+    run_dump_test "relax-after-alignment"
     run_dump_test "relax-medium-call"
     run_dump_test "relax-medium-call-1"
     run_dump_test "check_got_relax"
diff --git a/ld/testsuite/ld-loongarch-elf/relax-after-alignment.d b/ld/testsuite/ld-loongarch-elf/relax-after-alignment.d
new file mode 100644 (file)
index 0000000..844c518
--- /dev/null
@@ -0,0 +1,30 @@
+#name: additional relaxation chances after alignment processing
+#as:
+#ld: --defsym _start=0
+#objdump: -d --no-show-raw-insn
+
+.*:\s+file format .*
+
+
+Disassembly of section \.text:
+
+0000000120000400 <before>:
+\s*120000400:\s+pcalau12i\s+\$t0, 512
+\s*[0-9a-f]+:\s+addi\.d\s+\$t0, \$t0, 1024
+\s*[0-9a-f]+:\s+pcaddi\s+\$t0, 524286
+\s*[0-9a-f]+:\s+nop\s*
+\s*\.\.\.
+\s*120000c00:\s+pcaddi\s+\$t0, 523776
+\s*\.\.\.
+
+0000000120200400 <target>:
+\s*120200400:\s+break\s+0x0
+
+0000000120200404 <after>:
+\s*\.\.\.
+\s*1203ffbfc:\s+pcaddi\s+\$t0, -523775
+\s*\.\.\.
+\s*120400400:\s+pcaddi\s+\$t0, -524288
+\s*[0-9a-f]+:\s+nop\s*
+\s*[0-9a-f]+:\s+pcalau12i\s+\$t0, -512
+\s*[0-9a-f]+:\s+addi\.d\s+\$t0, \$t0, 1024
diff --git a/ld/testsuite/ld-loongarch-elf/relax-after-alignment.s b/ld/testsuite/ld-loongarch-elf/relax-after-alignment.s
new file mode 100644 (file)
index 0000000..24d29ec
--- /dev/null
@@ -0,0 +1,49 @@
+# 0x0 pre-relax, 0x400 post-relax
+# all addresses are additionally offset by 0x120000000 without `ld -shared`
+before:
+    la.pcrel $t0, target  # too far; should stay as pcalau12i + addi.d
+    la.pcrel $t0, target  # furthest reach of relax pass 2
+
+# 0x10 pre-relax, 0x410 post-relax
+.rept 508
+.word 0
+.endr
+
+# 0x800 {pre,post}-relax
+# 255 nops + R_LARCH_ALIGN before relaxation
+# none of the aligning nops should remain after relax pass 1
+.p2align 10
+
+# 0xbfc pre-relax, 0xc00 post-relax
+    la.pcrel $t0, target  # should become single pcaddi in relax pass 0
+
+# 0xc04 {pre,post}-relax
+.rept 523775
+.word 0
+.endr
+
+# 0x200400 {pre,post}-relax
+target:
+    break 0
+
+# 0x200404 {pre,post}-relax
+after:
+.rept 523774
+.word 0
+.endr
+
+# 0x3ffbfc {pre,post}-relax
+    la.pcrel $t0, target  # should become single pcaddi in relax pass 0
+
+# 255 nops + R_LARCH_ALIGN before relaxation
+# none of the aligning nops should remain after relax pass 1
+.p2align 10
+
+# 0x400000 pre-relax, 0x3ffc00 post-relax
+.rept 512
+.word 0
+.endr
+
+# 0x400800 pre-relax, 0x400400 post-relax
+    la.pcrel $t0, target  # furthest reach of relax pass 2
+    la.pcrel $t0, target  # too far; should stay as pcalau12i + addi.d