From: Christophe Lyon Date: Tue, 17 Dec 2019 15:43:07 +0000 (+0000) Subject: ARM: Add support for -mpure-code in thumb-1 (v6m) X-Git-Tag: releases/gcc-9.3.0~84 X-Git-Url: http://git.ipfire.org/cgi-bin/gitweb.cgi?a=commitdiff_plain;h=9c5db942ca332494ef3d79d4a7d494d83cad8304;p=thirdparty%2Fgcc.git ARM: Add support for -mpure-code in thumb-1 (v6m) This patch extends support for -mpure-code to all thumb-1 processors, by removing the need for MOVT. Symbol addresses are built using upper8_15, upper0_7, lower8_15 and lower0_7 relocations, and constants are built using sequences of movs/adds and lsls instructions. The extension of the *thumb1_movhf pattern uses always the same size (6) although it can emit a shorter sequence when possible. This is similar to what *arm32_movhf already does. CASE_VECTOR_PC_RELATIVE is now false with -mpure-code, to avoid generating invalid assembly code with differences from symbols from two different sections (the difference cannot be computed by the assembler). Tests pr45701-[12].c needed a small adjustment to avoid matching upper8_15 when looking for the r8 register. Test no-literal-pool.c is augmented with __fp16, so it now uses -mfp16-format=ieee. Test thumb1-Os-mult.c generates an inline code sequence with -mpure-code and computes the multiplication by using a sequence of add/shift rather than using the multiply instruction, so we skip it in presence of -mpure-code. With -mcpu=cortex-m0, the pure-code/no-literal-pool.c fails because code like: static char *p = "Hello World"; char * testchar () { return p + 4; } generates 2 indirections (I removed non-essential directives/code) .section .rodata .LC0: .ascii "Hello World\000" .data p: .word .LC0 .section .rodata .LC2: .word p .section .text,"0x20000006",%progbits testchar: push {r7, lr} add r7, sp, #0 movs r3, #:upper8_15:#.LC2 lsls r3, #8 adds r3, #:upper0_7:#.LC2 lsls r3, #8 adds r3, #:lower8_15:#.LC2 lsls r3, #8 adds r3, #:lower0_7:#.LC2 ldr r3, [r3] ldr r3, [r3] adds r3, r3, #4 movs r0, r3 mov sp, r7 @ sp needed pop {r7, pc} By contrast, when using -mcpu=cortex-m4, the code looks like: .section .rodata .LC0: .ascii "Hello World\000" .data p: .word .LC0 testchar: push {r7} add r7, sp, #0 movw r3, #:lower16:p movt r3, #:upper16:p ldr r3, [r3] adds r3, r3, #4 mov r0, r3 mov sp, r7 pop {r7} bx lr I haven't found yet how to make code for cortex-m0 apply upper/lower relocations to "p" instead of .LC2. The current code looks functional, but could be improved. 2020-02-25 Christophe Lyon Backport from mainline 2019-10-18 Christophe Lyon gcc/ * config/arm/arm-protos.h (thumb1_gen_const_int): Add new prototype. * config/arm/arm.c (arm_option_check_internal): Remove restriction on MOVT for -mpure-code. (thumb1_gen_const_int): New function. (thumb1_legitimate_address_p): Support -mpure-code. (thumb1_rtx_costs): Likewise. (thumb1_size_rtx_costs): Likewise. (arm_thumb1_mi_thunk): Likewise. * config/arm/arm.h (CASE_VECTOR_PC_RELATIVE): Likewise. * config/arm/thumb1.md (thumb1_movsi_symbol_ref): New. (*thumb1_movhf): Support -mpure-code. gcc/testsuite/ * gcc.target/arm/pr45701-1.c: Adjust for -mpure-code. * gcc.target/arm/pr45701-2.c: Likewise. * gcc.target/arm/pure-code/no-literal-pool.c: Add tests for __fp16. * gcc.target/arm/pure-code/pure-code.exp: Remove thumb2 and movt conditions. * gcc.target/arm/thumb1-Os-mult.c: Skip if -mpure-code is used. --- diff --git a/gcc/ChangeLog b/gcc/ChangeLog index ce5216020fab..4de9e505140e 100644 --- a/gcc/ChangeLog +++ b/gcc/ChangeLog @@ -1,3 +1,21 @@ +2020-02-25 Christophe Lyon + + Backport from mainline + 2019-12-17 Christophe Lyon + + * config/arm/arm-protos.h (thumb1_gen_const_int): Add new prototype. + * config/arm/arm.c (arm_option_check_internal): Remove restriction + on MOVT for -mpure-code. + (thumb1_gen_const_int): New function. + (thumb1_legitimate_address_p): Support -mpure-code. + (thumb1_rtx_costs): Likewise. + (thumb1_size_rtx_costs): Likewise. + (arm_thumb1_mi_thunk): Likewise. + * config/arm/arm.h (CASE_VECTOR_PC_RELATIVE): Likewise. + * config/arm/thumb1.md (thumb1_movsi_symbol_ref): New. + (*thumb1_movhf): Support -mpure-code. + * doc/invoke.texi (-mpure-code): Remove restriction on MOVT. + 2020-02-25 Jakub Jelinek PR rtl-optimization/93908 diff --git a/gcc/config/arm/arm-protos.h b/gcc/config/arm/arm-protos.h index 485bc68a618d..98beb6109b95 100644 --- a/gcc/config/arm/arm-protos.h +++ b/gcc/config/arm/arm-protos.h @@ -66,6 +66,7 @@ extern bool arm_small_register_classes_for_mode_p (machine_mode); extern int const_ok_for_arm (HOST_WIDE_INT); extern int const_ok_for_op (HOST_WIDE_INT, enum rtx_code); extern int const_ok_for_dimode_op (HOST_WIDE_INT, enum rtx_code); +extern void thumb1_gen_const_int (rtx, HOST_WIDE_INT); extern int arm_split_constant (RTX_CODE, machine_mode, rtx, HOST_WIDE_INT, rtx, rtx, int); extern int legitimate_pic_operand_p (rtx); diff --git a/gcc/config/arm/arm.c b/gcc/config/arm/arm.c index 91bb65130b83..cdfc0f9e72fc 100644 --- a/gcc/config/arm/arm.c +++ b/gcc/config/arm/arm.c @@ -2904,13 +2904,18 @@ arm_option_check_internal (struct gcc_options *opts) { const char *flag = (target_pure_code ? "-mpure-code" : "-mslow-flash-data"); + bool common_unsupported_modes = arm_arch_notm || flag_pic || TARGET_NEON; - /* We only support -mpure-code and -mslow-flash-data on M-profile targets - with MOVT. */ - if (!TARGET_HAVE_MOVT || arm_arch_notm || flag_pic || TARGET_NEON) + /* We only support -mslow-flash-data on M-profile targets with + MOVT. */ + if (target_slow_flash_data && (!TARGET_HAVE_MOVT || common_unsupported_modes)) error ("%s only supports non-pic code on M-profile targets with the " "MOVT instruction", flag); + /* We only support -mpure-code on M-profile targets. */ + if (target_pure_code && common_unsupported_modes) + error ("%s only supports non-pic code on M-profile targets", flag); + /* Cannot load addresses: -mslow-flash-data forbids literal pool and -mword-relocations forbids relocation of MOVT/MOVW. */ if (target_word_relocations) @@ -4316,6 +4321,38 @@ const_ok_for_dimode_op (HOST_WIDE_INT i, enum rtx_code code) } } +/* Emit a sequence of movs/adds/shift to produce a 32-bit constant. + Avoid generating useless code when one of the bytes is zero. */ +void +thumb1_gen_const_int (rtx op0, HOST_WIDE_INT op1) +{ + bool mov_done_p = false; + int i; + + /* Emit upper 3 bytes if needed. */ + for (i = 0; i < 3; i++) + { + int byte = (op1 >> (8 * (3 - i))) & 0xff; + + if (byte) + { + emit_set_insn (op0, mov_done_p + ? gen_rtx_PLUS (SImode,op0, GEN_INT (byte)) + : GEN_INT (byte)); + mov_done_p = true; + } + + if (mov_done_p) + emit_set_insn (op0, gen_rtx_ASHIFT (SImode, op0, GEN_INT (8))); + } + + /* Emit lower byte if needed. */ + if (!mov_done_p) + emit_set_insn (op0, GEN_INT (op1 & 0xff)); + else if (op1 & 0xff) + emit_set_insn (op0, gen_rtx_PLUS (SImode, op0, GEN_INT (op1 & 0xff))); +} + /* Emit a sequence of insns to handle a large constant. CODE is the code of the operation required, it can be any of SET, PLUS, IOR, AND, XOR, MINUS; @@ -8325,7 +8362,8 @@ thumb1_legitimate_address_p (machine_mode mode, rtx x, int strict_p) /* This is PC relative data before arm_reorg runs. */ else if (GET_MODE_SIZE (mode) >= 4 && CONSTANT_P (x) && GET_CODE (x) == SYMBOL_REF - && CONSTANT_POOL_ADDRESS_P (x) && !flag_pic) + && CONSTANT_POOL_ADDRESS_P (x) && !flag_pic + && !arm_disable_literal_pool) return 1; /* This is PC relative data after arm_reorg runs. */ @@ -8393,6 +8431,7 @@ thumb1_legitimate_address_p (machine_mode mode, rtx x, int strict_p) && GET_MODE_SIZE (mode) == 4 && GET_CODE (x) == SYMBOL_REF && CONSTANT_POOL_ADDRESS_P (x) + && !arm_disable_literal_pool && ! (flag_pic && symbol_mentioned_p (get_pool_constant (x)) && ! pcrel_constant_p (get_pool_constant (x)))) @@ -9028,7 +9067,9 @@ thumb1_rtx_costs (rtx x, enum rtx_code code, enum rtx_code outer) return 0; if (thumb_shiftable_const (INTVAL (x))) return COSTS_N_INSNS (2); - return COSTS_N_INSNS (3); + return arm_disable_literal_pool + ? COSTS_N_INSNS (8) + : COSTS_N_INSNS (3); } else if ((outer == PLUS || outer == COMPARE) && INTVAL (x) < 256 && INTVAL (x) > -256) @@ -9185,7 +9226,9 @@ thumb1_size_rtx_costs (rtx x, enum rtx_code code, enum rtx_code outer) /* See split "TARGET_THUMB1 && satisfies_constraint_K". */ if (thumb_shiftable_const (INTVAL (x))) return COSTS_N_INSNS (2); - return COSTS_N_INSNS (3); + return arm_disable_literal_pool + ? COSTS_N_INSNS (8) + : COSTS_N_INSNS (3); } else if ((outer == PLUS || outer == COMPARE) && INTVAL (x) < 256 && INTVAL (x) > -256) @@ -26832,14 +26875,41 @@ arm_thumb1_mi_thunk (FILE *file, tree, HOST_WIDE_INT delta, /* push r3 so we can use it as a temporary. */ /* TODO: Omit this save if r3 is not used. */ fputs ("\tpush {r3}\n", file); - fputs ("\tldr\tr3, ", file); + + /* With -mpure-code, we cannot load the address from the + constant pool: we build it explicitly. */ + if (target_pure_code) + { + fputs ("\tmovs\tr3, #:upper8_15:#", file); + assemble_name (file, XSTR (XEXP (DECL_RTL (function), 0), 0)); + fputc ('\n', file); + fputs ("\tlsls r3, #8\n", file); + fputs ("\tadds\tr3, #:upper0_7:#", file); + assemble_name (file, XSTR (XEXP (DECL_RTL (function), 0), 0)); + fputc ('\n', file); + fputs ("\tlsls r3, #8\n", file); + fputs ("\tadds\tr3, #:lower8_15:#", file); + assemble_name (file, XSTR (XEXP (DECL_RTL (function), 0), 0)); + fputc ('\n', file); + fputs ("\tlsls r3, #8\n", file); + fputs ("\tadds\tr3, #:lower0_7:#", file); + assemble_name (file, XSTR (XEXP (DECL_RTL (function), 0), 0)); + fputc ('\n', file); + } + else + fputs ("\tldr\tr3, ", file); } else { fputs ("\tldr\tr12, ", file); } - assemble_name (file, label); - fputc ('\n', file); + + if (!target_pure_code) + { + assemble_name (file, label); + fputc ('\n', file); + } + if (flag_pic) { /* If we are generating PIC, the ldr instruction below loads diff --git a/gcc/config/arm/arm.h b/gcc/config/arm/arm.h index 4866e1e4b7dc..9ee6a4eb524c 100644 --- a/gcc/config/arm/arm.h +++ b/gcc/config/arm/arm.h @@ -1854,9 +1854,11 @@ enum arm_auto_incmodes for the index in the tablejump instruction. */ #define CASE_VECTOR_MODE Pmode -#define CASE_VECTOR_PC_RELATIVE (TARGET_THUMB2 \ - || (TARGET_THUMB1 \ - && (optimize_size || flag_pic))) +#define CASE_VECTOR_PC_RELATIVE ((TARGET_THUMB2 \ + || (TARGET_THUMB1 \ + && (optimize_size || flag_pic))) \ + && (!target_pure_code)) + #define CASE_VECTOR_SHORTEN_MODE(min, max, body) \ (TARGET_THUMB1 \ diff --git a/gcc/config/arm/thumb1.md b/gcc/config/arm/thumb1.md index cefd6cfcdcda..15bf3f08fb46 100644 --- a/gcc/config/arm/thumb1.md +++ b/gcc/config/arm/thumb1.md @@ -43,6 +43,41 @@ +(define_insn "thumb1_movsi_symbol_ref" + [(set (match_operand:SI 0 "register_operand" "=l") + (match_operand:SI 1 "general_operand" "")) + ] + "TARGET_THUMB1 + && arm_disable_literal_pool + && GET_CODE (operands[1]) == SYMBOL_REF" + "* + output_asm_insn (\"movs\\t%0, #:upper8_15:%1\", operands); + output_asm_insn (\"lsls\\t%0, #8\", operands); + output_asm_insn (\"adds\\t%0, #:upper0_7:%1\", operands); + output_asm_insn (\"lsls\\t%0, #8\", operands); + output_asm_insn (\"adds\\t%0, #:lower8_15:%1\", operands); + output_asm_insn (\"lsls\\t%0, #8\", operands); + output_asm_insn (\"adds\\t%0, #:lower0_7:%1\", operands); + return \"\"; + " + [(set_attr "length" "14") + (set_attr "conds" "clob")] +) + +(define_split + [(set (match_operand:SI 0 "register_operand" "") + (match_operand:SI 1 "immediate_operand" ""))] + "TARGET_THUMB1 + && arm_disable_literal_pool + && GET_CODE (operands[1]) == CONST_INT + && !satisfies_constraint_I (operands[1])" + [(clobber (const_int 0))] + " + thumb1_gen_const_int (operands[0], INTVAL (operands[1])); + DONE; + " +) + (define_insn "*thumb1_adddi3" [(set (match_operand:DI 0 "register_operand" "=l") (plus:DI (match_operand:DI 1 "register_operand" "%0") @@ -829,8 +864,8 @@ (set_attr "conds" "clob,nocond,nocond,nocond,nocond,clob")]) (define_insn "*thumb1_movhf" - [(set (match_operand:HF 0 "nonimmediate_operand" "=l,l,m,*r,*h") - (match_operand:HF 1 "general_operand" "l,mF,l,*h,*r"))] + [(set (match_operand:HF 0 "nonimmediate_operand" "=l,l,l,m,*r,*h") + (match_operand:HF 1 "general_operand" "l, m,F,l,*h,*r"))] "TARGET_THUMB1 && ( s_register_operand (operands[0], HFmode) || s_register_operand (operands[1], HFmode))" @@ -855,14 +890,34 @@ } return \"ldrh\\t%0, %1\"; } - case 2: return \"strh\\t%1, %0\"; + case 2: + { + int bits; + int high; + rtx ops[3]; + + bits = real_to_target (NULL, CONST_DOUBLE_REAL_VALUE (operands[1]), + HFmode); + ops[0] = operands[0]; + high = (bits >> 8) & 0xff; + ops[1] = GEN_INT (high); + ops[2] = GEN_INT (bits & 0xff); + if (high != 0) + output_asm_insn (\"movs\\t%0, %1\;lsls\\t%0, #8\;adds\\t%0, %2\", ops); + else + output_asm_insn (\"movs\\t%0, %2\", ops); + + return \"\"; + } + case 3: return \"strh\\t%1, %0\"; default: return \"mov\\t%0, %1\"; } " - [(set_attr "length" "2") - (set_attr "type" "mov_reg,load_4,store_4,mov_reg,mov_reg") - (set_attr "pool_range" "*,1018,*,*,*") - (set_attr "conds" "clob,nocond,nocond,nocond,nocond")]) + [(set_attr "length" "2,2,6,2,2,2") + (set_attr "type" "mov_reg,load_4,mov_reg,store_4,mov_reg,mov_reg") + (set_attr "pool_range" "*,1018,*,*,*,*") + (set_attr "conds" "clob,nocond,nocond,nocond,nocond,nocond")]) + ;;; ??? This should have alternatives for constants. (define_insn "*thumb1_movsf_insn" [(set (match_operand:SF 0 "nonimmediate_operand" "=l,l,>,l, m,*r,*h") diff --git a/gcc/doc/invoke.texi b/gcc/doc/invoke.texi index 7d8c0802c08c..0ab6c9c64494 100644 --- a/gcc/doc/invoke.texi +++ b/gcc/doc/invoke.texi @@ -17920,8 +17920,7 @@ provided for use in debugging the compiler. Do not allow constant data to be placed in code sections. Additionally, when compiling for ELF object format give all text sections the ELF processor-specific section attribute @code{SHF_ARM_PURECODE}. This option -is only available when generating non-pic code for M-profile targets with the -MOVT instruction. +is only available when generating non-pic code for M-profile targets. @item -mcmse @opindex mcmse diff --git a/gcc/testsuite/ChangeLog b/gcc/testsuite/ChangeLog index d3cdd122e143..8536c83b4802 100644 --- a/gcc/testsuite/ChangeLog +++ b/gcc/testsuite/ChangeLog @@ -1,3 +1,16 @@ +2020-02-25 Christophe Lyon + + Backport from mainline + 2019-12-17 Christophe Lyon + + * gcc.target/arm/pr45701-1.c: Adjust for -mpure-code. + * gcc.target/arm/pr45701-2.c: Likewise. + * gcc.target/arm/pure-code/no-literal-pool.c: Add tests for + __fp16. + * gcc.target/arm/pure-code/pure-code.exp: Remove thumb2 and movt + conditions. + * gcc.target/arm/thumb1-Os-mult.c: Skip if -mpure-code is used. + 2020-02-25 Jakub Jelinek PR rtl-optimization/93908 diff --git a/gcc/testsuite/gcc.target/arm/pr45701-1.c b/gcc/testsuite/gcc.target/arm/pr45701-1.c index 01db15abfd03..2060ae464b36 100644 --- a/gcc/testsuite/gcc.target/arm/pr45701-1.c +++ b/gcc/testsuite/gcc.target/arm/pr45701-1.c @@ -2,7 +2,7 @@ /* { dg-skip-if "" { ! { arm_thumb1_ok || arm_thumb2_ok } } } */ /* { dg-options "-mthumb -Os" } */ /* { dg-final { scan-assembler "push\t\{r3" } } */ -/* { dg-final { scan-assembler-not "\[^\-\]r8" } } */ +/* { dg-final { scan-assembler-not "\[^\-e\]r8" } } */ extern int hist_verify; extern int a1; diff --git a/gcc/testsuite/gcc.target/arm/pr45701-2.c b/gcc/testsuite/gcc.target/arm/pr45701-2.c index ce66d7509d17..23ca21319a4e 100644 --- a/gcc/testsuite/gcc.target/arm/pr45701-2.c +++ b/gcc/testsuite/gcc.target/arm/pr45701-2.c @@ -2,7 +2,7 @@ /* { dg-skip-if "" { ! { arm_thumb1_ok || arm_thumb2_ok } } } */ /* { dg-options "-mthumb -Os" } */ /* { dg-final { scan-assembler "push\t\{r3" } } */ -/* { dg-final { scan-assembler-not "\[^\-\]r8" } } */ +/* { dg-final { scan-assembler-not "\[^\-e\]r8" } } */ extern int hist_verify; extern int a1; diff --git a/gcc/testsuite/gcc.target/arm/pure-code/no-literal-pool.c b/gcc/testsuite/gcc.target/arm/pure-code/no-literal-pool.c index 4b893fd32f72..3de162091d9d 100644 --- a/gcc/testsuite/gcc.target/arm/pure-code/no-literal-pool.c +++ b/gcc/testsuite/gcc.target/arm/pure-code/no-literal-pool.c @@ -1,12 +1,24 @@ /* { dg-do compile } */ -/* { dg-options "-mpure-code" } */ +/* { dg-options "-mpure-code -mfp16-format=ieee" } */ /* { dg-skip-if "" { *-*-* } { "-g" "-fpic" "-fPIC" } { "" } } */ +__fp16 hf; float sf; double df; long long l; static char *p = "Hello World"; +__fp16 +testsfp16 (__fp16 *p) +{ + hf = 1.3; + *p += hf; + if (*p > 1.1234f) + return 2.1234f; + else + return 3.1234f; +} + float testsf (float *p) { diff --git a/gcc/testsuite/gcc.target/arm/pure-code/pure-code.exp b/gcc/testsuite/gcc.target/arm/pure-code/pure-code.exp index bf7e4add03fc..b05cfd6a28cd 100644 --- a/gcc/testsuite/gcc.target/arm/pure-code/pure-code.exp +++ b/gcc/testsuite/gcc.target/arm/pure-code/pure-code.exp @@ -25,11 +25,8 @@ if ![info exists DEFAULT_CFLAGS] then { set DEFAULT_CFLAGS " -ansi -pedantic-errors" } -# The -mpure-code option is only available for M-profile targets that support -# the MOVT instruction. -if {([check_effective_target_arm_thumb2_ok] - || [check_effective_target_arm_thumb1_movt_ok]) - && [check_effective_target_arm_cortex_m]} then { +# The -mpure-code option is only available for M-profile targets. +if {[check_effective_target_arm_cortex_m]} then { # Initialize `dg'. dg-init @@ -56,4 +53,4 @@ set LTO_TORTURE_OPTIONS ${saved-lto_torture_options} # All done. dg-finish -} +#} diff --git a/gcc/testsuite/gcc.target/arm/thumb1-Os-mult.c b/gcc/testsuite/gcc.target/arm/thumb1-Os-mult.c index b989c420830a..92772d414a70 100644 --- a/gcc/testsuite/gcc.target/arm/thumb1-Os-mult.c +++ b/gcc/testsuite/gcc.target/arm/thumb1-Os-mult.c @@ -1,6 +1,7 @@ /* { dg-do compile } */ /* { dg-require-effective-target arm_thumb1_ok } */ /* { dg-options "-Os" } */ +/* { dg-skip-if "-mpure-code generates an inline multiplication code sequence" { *-*-* } { "-mpure-code" } } */ /* { dg-skip-if "" { ! { arm_thumb1 } } } */ int