From: Richard Ball Date: Mon, 30 Oct 2023 15:31:26 +0000 (+0000) Subject: arm: move the switch tables for Arm to the RO data section. X-Git-Tag: basepoints/gcc-15~5128 X-Git-Url: http://git.ipfire.org/cgi-bin/gitweb.cgi?a=commitdiff_plain;h=fb1941d08fc3711b058c1e148e9ce7ed9b7dfbba;p=thirdparty%2Fgcc.git arm: move the switch tables for Arm to the RO data section. Follow up patch to arm: Use deltas for Arm switch tables This patch moves the switch tables for Arm from the .text section into the .rodata section. gcc/ChangeLog: * config/arm/aout.h: Change to use the Lrtx label. * config/arm/arm.h (CASE_VECTOR_PC_RELATIVE): Remove arm targets from (!target_pure_code) condition. (ADDR_VEC_ALIGN): Add align for tables in rodata section. * config/arm/arm.cc (arm_output_casesi): Alter the function to include .Lrtx label and remove adr instructions. * config/arm/arm.md (arm_casesi_internal): Use force_reg to generate ldr instructions that would otherwise be out of range, and change rtl to accommodate force reg. Additionally remove unnecessary register temp. (casesi): Remove pure code check for Arm. * config/arm/elf.h (JUMP_TABLES_IN_TEXT_SECTION): Remove arm targets from JUMP_TABLES_IN_TEXT_SECTION definition. gcc/testsuite/ChangeLog: * gcc.target/arm/arm-switchstatement.c: Alter the tests to change adr instruction to ldr. --- diff --git a/gcc/config/arm/aout.h b/gcc/config/arm/aout.h index 6a4c8da5f6d5..49896bb96208 100644 --- a/gcc/config/arm/aout.h +++ b/gcc/config/arm/aout.h @@ -187,16 +187,16 @@ switch (GET_MODE (body)) \ { \ case E_QImode: \ - asm_fprintf (STREAM, "\t.byte\t(%LL%d-%LL%d-4)/4\n", \ + asm_fprintf (STREAM, "\t.byte\t(%LL%d-%LLrtx%d-4)/4\n", \ VALUE, REL); \ break; \ case E_HImode: \ - asm_fprintf (STREAM, "\t.2byte\t(%LL%d-%LL%d-4)/4\n", \ + asm_fprintf (STREAM, "\t.2byte\t(%LL%d-%LLrtx%d-4)/4\n", \ VALUE, REL); \ break; \ case E_SImode: \ if (flag_pic) \ - asm_fprintf (STREAM, "\t.word\t%LL%d-%LL%d-4\n", \ + asm_fprintf (STREAM, "\t.word\t%LL%d-%LLrtx%d-4\n", \ VALUE, REL); \ else \ asm_fprintf (STREAM, "\t.word\t%LL%d\n", VALUE); \ diff --git a/gcc/config/arm/arm.cc b/gcc/config/arm/arm.cc index 4e5e6997ed55..620ef7bfb2f3 100644 --- a/gcc/config/arm/arm.cc +++ b/gcc/config/arm/arm.cc @@ -30469,44 +30469,55 @@ arm_output_iwmmxt_tinsr (rtx *operands) const char * arm_output_casesi (rtx *operands) { + char label[100]; rtx diff_vec = PATTERN (NEXT_INSN (as_a (operands[2]))); - gcc_assert (GET_CODE (diff_vec) == ADDR_DIFF_VEC); - output_asm_insn ("cmp\t%0, %1", operands); output_asm_insn ("bhi\t%l3", operands); + ASM_GENERATE_INTERNAL_LABEL (label, "Lrtx", CODE_LABEL_NUMBER (operands[2])); switch (GET_MODE (diff_vec)) { case E_QImode: - output_asm_insn ("adr\t%4, %l2", operands); if (ADDR_DIFF_VEC_FLAGS (diff_vec).offset_unsigned) - output_asm_insn ("ldrb\t%4, [%4, %0]", operands); - else - output_asm_insn ("ldrsb\t%4, [%4, %0]", operands); - return "add\t%|pc, %|pc, %4, lsl #2"; - + output_asm_insn ("ldrb\t%4, [%5, %0]", operands); + else + output_asm_insn ("ldrsb\t%4, [%5, %0]", operands); + output_asm_insn ("add\t%|pc, %|pc, %4, lsl #2", operands); + break; case E_HImode: - output_asm_insn ("adr\t%4, %l2", operands); - output_asm_insn ("add\t%4, %4, %0", operands); - if (ADDR_DIFF_VEC_FLAGS (diff_vec).offset_unsigned) - output_asm_insn ("ldrh\t%4, [%4, %0]", operands); - else - output_asm_insn ("ldrsh\t%4, [%4, %0]", operands); - return "add\t%|pc, %|pc, %4, lsl #2"; - + if (REGNO (operands[4]) != REGNO (operands[5])) + { + output_asm_insn ("add\t%4, %0, %0", operands); + if (ADDR_DIFF_VEC_FLAGS (diff_vec).offset_unsigned) + output_asm_insn ("ldrh\t%4, [%5, %4]", operands); + else + output_asm_insn ("ldrsh\t%4, [%5, %4]", operands); + } + else + { + output_asm_insn ("add\t%4, %5, %0", operands); + if (ADDR_DIFF_VEC_FLAGS (diff_vec).offset_unsigned) + output_asm_insn ("ldrh\t%4, [%4, %0]", operands); + else + output_asm_insn ("ldrsh\t%4, [%4, %0]", operands); + } + output_asm_insn ("add\t%|pc, %|pc, %4, lsl #2", operands); + break; case E_SImode: if (flag_pic) { - output_asm_insn ("adr\t%4, %l2", operands); - output_asm_insn ("ldr\t%4, [%4, %0, lsl #2]", operands); - return "add\t%|pc, %|pc, %4"; + output_asm_insn ("ldr\t%4, [%5, %0, lsl #2]", operands); + output_asm_insn ("add\t%|pc, %|pc, %4", operands); } - output_asm_insn ("adr\t%4, %l2", operands); - return "ldr\t%|pc, [%4, %0, lsl #2]"; - + else + output_asm_insn ("ldr\t%|pc, [%5, %0, lsl #2]", operands); + break; default: gcc_unreachable (); } + assemble_label (asm_out_file, label); + output_asm_insn ("nop", operands); + return ""; } /* Output a Thumb-1 casesi dispatch sequence. */ diff --git a/gcc/config/arm/arm.h b/gcc/config/arm/arm.h index 3063e3489094..a9c2752c0ea5 100644 --- a/gcc/config/arm/arm.h +++ b/gcc/config/arm/arm.h @@ -2092,10 +2092,11 @@ enum arm_auto_incmodes for the index in the tablejump instruction. */ #define CASE_VECTOR_MODE Pmode -#define CASE_VECTOR_PC_RELATIVE ((TARGET_ARM || TARGET_THUMB2 \ - || (TARGET_THUMB1 \ - && (optimize_size || flag_pic))) \ - && (!target_pure_code)) +#define CASE_VECTOR_PC_RELATIVE \ + (TARGET_ARM \ + || (!target_pure_code \ + && (TARGET_THUMB2 \ + || (TARGET_THUMB1 && (optimize_size || flag_pic))))) #define CASE_VECTOR_SHORTEN_MODE(min, max, body) \ @@ -2301,8 +2302,14 @@ extern int making_const_table; asm_fprintf (STREAM, "\tpop {%r}\n", REGNO); \ } while (0) -#define ADDR_VEC_ALIGN(JUMPTABLE) \ - ((TARGET_THUMB && GET_MODE (PATTERN (JUMPTABLE)) == SImode) ? 2 : 0) +/* If the switch table is in the code segment, additional alignment is + needed for Thumb SImode tables. Otherwise, tables in RO data have + natural alignment. */ +#define ADDR_VEC_ALIGN(TABLE) \ + (JUMP_TABLES_IN_TEXT_SECTION \ + ? ((TARGET_THUMB && GET_MODE (PATTERN (TABLE)) == SImode) ? 2 : 0) \ + : (exact_log2 (GET_MODE_ALIGNMENT (GET_MODE (PATTERN (TABLE))) \ + / BITS_PER_UNIT))) /* Alignment for case labels comes from ADDR_VEC_ALIGN; avoid the default alignment from elfos.h. */ diff --git a/gcc/config/arm/arm.md b/gcc/config/arm/arm.md index 0b2eb4bce92b..07eaf06cdeac 100644 --- a/gcc/config/arm/arm.md +++ b/gcc/config/arm/arm.md @@ -9514,7 +9514,9 @@ (match_operand:SI 2 "const_int_operand") ; total range (match_operand:SI 3 "" "") ; table label (match_operand:SI 4 "" "")] ; Out of range label - "(TARGET_32BIT || optimize_size || flag_pic) && !target_pure_code" + "(TARGET_ARM + || (!target_pure_code + && (TARGET_THUMB2 || optimize_size || flag_pic)))" " { enum insn_code code; @@ -9557,14 +9559,14 @@ (label_ref:SI (match_operand 3 "")))) (clobber (reg:CC CC_REGNUM)) (clobber (match_scratch:SI 5)) - (clobber (match_scratch:SI 6)) (use (label_ref:SI (match_operand 2 "")))])] "TARGET_ARM" { - operands[4] = gen_rtx_MULT (SImode, operands[0], GEN_INT (4)); - operands[4] = gen_rtx_PLUS (SImode, operands[4], - gen_rtx_LABEL_REF (SImode, operands[2])); - operands[4] = gen_rtx_MEM (SImode, operands[4]); + rtx vec_table_ref = force_reg (SImode, gen_rtx_LABEL_REF (SImode, operands[2])); + rtx tmp = gen_rtx_MULT (SImode, operands[0], GEN_INT (4)); + tmp = gen_rtx_PLUS (SImode, tmp, + vec_table_ref); + operands[4] = gen_rtx_MEM (SImode, tmp); MEM_READONLY_P (operands[4]) = 1; MEM_NOTRAP_P (operands[4]) = 1; }) @@ -9575,12 +9577,11 @@ (leu (match_operand:SI 0 "s_register_operand" "r") (match_operand:SI 1 "arm_rhs_operand" "rI")) (mem:SI (plus:SI (mult:SI (match_dup 0) (const_int 4)) - (label_ref:SI (match_operand 2 "" "")))) + (match_operand:SI 5 "s_register_operand" "r"))) (label_ref:SI (match_operand 3 "" "")))) (clobber (reg:CC CC_REGNUM)) - (clobber (match_scratch:SI 4 "=&r")) - (clobber (match_scratch:SI 5 "=r")) - (use (label_ref:SI (match_dup 2)))])] + (clobber (match_scratch:SI 4 "=r")) + (use (label_ref:SI (match_operand 2 "")))])] "TARGET_ARM" { return arm_output_casesi (operands); diff --git a/gcc/config/arm/elf.h b/gcc/config/arm/elf.h index 5766cb4a9888..f76682898960 100644 --- a/gcc/config/arm/elf.h +++ b/gcc/config/arm/elf.h @@ -91,11 +91,21 @@ /* Define this macro if jump tables (for `tablejump' insns) should be output in the text section, along with the assembler instructions. Otherwise, the readonly data section is used. */ -/* We put ARM and Thumb-2 jump tables in the text section, because it makes - the code more efficient, but for Thumb-1 it's better to put them out of - band unless we are generating compressed tables. */ +/* The choice of placement for jump tables is nuanced. For cores with + Harvard caches (pretty much all cases these days), there is a + benefit of maintaing a separation between I- and D-cache candidates + and that favors having jump tables in the RO data section. This + makes the dispatch sequence slightly longer as we need to load the + address of the jump table first, but we often save elsewhere as the + content of the code section becomes better packed and we need + fewer long-range branch operations. We also require the dispatch + table to be in the RO section when compiling for pure code. We + currently place jump tables in the RO-data section for Arm or + whenever pure code is required; we also do it for Thumb-1 when + using an ADDR_VEC. The remaining cases put the jump table in the + text section, but we should revisit this choice. */ #define JUMP_TABLES_IN_TEXT_SECTION \ - ((TARGET_32BIT || (TARGET_THUMB && (optimize_size || flag_pic))) \ + ((TARGET_THUMB2 || (TARGET_THUMB && (optimize_size || flag_pic))) \ && !target_pure_code) #ifndef LINK_SPEC diff --git a/gcc/testsuite/gcc.target/arm/arm-switchstatement.c b/gcc/testsuite/gcc.target/arm/arm-switchstatement.c index a7aa9d45c763..803731c479c6 100644 --- a/gcc/testsuite/gcc.target/arm/arm-switchstatement.c +++ b/gcc/testsuite/gcc.target/arm/arm-switchstatement.c @@ -53,9 +53,10 @@ inline void SIFunction (const char* flag) /* **QImode_test: ** ... -** adr (r[0-9]+), .L[0-9]+ -** ldrb \1, \[\1, r[0-9]+\] -** add pc, pc, \1, lsl #2 +** ldr (r[0-9]+), .L[0-9]+ +** ... +** ldrb (r[0-9]+), \[\1, r[0-9]+\] +** add pc, pc, \2, lsl #2 ** ... */ __attribute__ ((noinline)) __attribute__ ((noclone)) const char* QImode_test(enum z x) @@ -77,10 +78,11 @@ __attribute__ ((noinline)) __attribute__ ((noclone)) const char* QImode_test(enu /* **HImode_test: ** ... -** adr (r[0-9]+), .L[0-9]+ -** add \1, \1, (r[0-9]+) -** ldrh \1, \[\1, \2\] -** add pc, pc, \1, lsl #2 +** ldr (r[0-9]+), .L[0-9]+ +** ... +** add r[0-9]+, r[0-9]+, r[0-9]+ +** ldrh (r[0-9]+), \[r[0-9]+, r[0-9]+] +** add pc, pc, \2, lsl #2 ** ... */ __attribute__ ((noinline)) __attribute__ ((noclone)) const char* HImode_test(enum z x) @@ -102,7 +104,8 @@ __attribute__ ((noinline)) __attribute__ ((noclone)) const char* HImode_test(enu /* **SImode_test: ** ... -** adr (r[0-9]+), .L[0-9]+ +** ldr (r[0-9]+), .L[0-9]+ +** ... ** ldr pc, \[\1, r[0-9]+, lsl #2\] ** ... */ @@ -125,10 +128,11 @@ __attribute__ ((noinline)) __attribute__ ((noclone)) const char* SImode_test(enu /* **backwards_branch_test: ** ... -** adr (r[0-9]+), .L[0-9]+ -** add \1, \1, (r[0-9]+) -** ldrsh \1, \[\1, \2\] -** add pc, pc, \1, lsl #2 +** ldr (r[0-9]+), .L[0-9]+ +** ... +** add r[0-9]+, r[0-9]+, r[0-9]+ +** ldrsh (r[0-9]+), \[r[0-9]+, r[0-9]+] +** add pc, pc, \2, lsl #2 ** ... */ __attribute__ ((noinline)) __attribute__ ((noclone)) const char* backwards_branch_test(enum z x, int flag)