]> git.ipfire.org Git - thirdparty/gcc.git/commitdiff
core: Support heap-based trampolines
authorAndrew Burgess <andrew.burgess@embecosm.com>
Sat, 5 Aug 2023 12:54:11 +0000 (14:54 +0200)
committerIain Sandoe <iain@sandoe.co.uk>
Sun, 22 Oct 2023 13:01:06 +0000 (14:01 +0100)
Generate heap-based nested function trampolines

Add support for allocating nested function trampolines on an
executable heap rather than on the stack. This is motivated by targets
such as AArch64 Darwin, which globally prohibit executing code on the
stack.

The target-specific routines for allocating and writing trampolines are
to be provided in libgcc.

The gcc flag -ftrampoline-impl controls whether to generate code
that instantiates trampolines on the stack, or to emit calls to
__builtin_nested_func_ptr_created and
__builtin_nested_func_ptr_deleted. Note that this flag is completely
independent of libgcc: If libgcc is for any reason missing those
symbols, you will get a link failure.

This implementation imposes some implicit restrictions as compared to
stack trampolines. longjmp'ing back to a state before a trampoline was
created will cause us to skip over the corresponding
__builtin_nested_func_ptr_deleted, which will leak trampolines
starting from the beginning of the linked list of allocated
trampolines. There may be scope for instrumenting longjmp/setjmp to
trigger cleanups of trampolines.

Co-Authored-By: Maxim Blinov <maxim.blinov@embecosm.com>
Co-Authored-By: Iain Sandoe <iain@sandoe.co.uk>
Co-Authored-By: Francois-Xavier Coudert <fxcoudert@gcc.gnu.org>
gcc/ChangeLog:

* builtins.def (BUILT_IN_NESTED_PTR_CREATED): Define.
(BUILT_IN_NESTED_PTR_DELETED): Ditto.
* common.opt (ftrampoline-impl): Add option to control
generation of trampoline instantiation (heap or stack).
* coretypes.h: Define enum trampoline_impl.
* tree-nested.cc (convert_tramp_reference_op): Don't bother calling
__builtin_adjust_trampoline for heap trampolines.
(finalize_nesting_tree_1): Emit calls to
__builtin_nested_...{created,deleted} if we're generating with
-ftrampoline-impl=heap.
* tree.cc (build_common_builtin_nodes): Build
__builtin_nested_...{created,deleted}.
* doc/invoke.texi (-ftrampoline-impl): Document.

gcc/builtins.def
gcc/common.opt
gcc/coretypes.h
gcc/doc/invoke.texi
gcc/tree-nested.cc
gcc/tree.cc

index eb6f4ec2034cdd9b3a24bf6289e9c2f3fc615939..e989cd814a5d621b05585ca9d094d03783f50f35 100644 (file)
@@ -1074,6 +1074,8 @@ DEF_BUILTIN_STUB (BUILT_IN_ADJUST_TRAMPOLINE, "__builtin_adjust_trampoline")
 DEF_BUILTIN_STUB (BUILT_IN_INIT_DESCRIPTOR, "__builtin_init_descriptor")
 DEF_BUILTIN_STUB (BUILT_IN_ADJUST_DESCRIPTOR, "__builtin_adjust_descriptor")
 DEF_BUILTIN_STUB (BUILT_IN_NONLOCAL_GOTO, "__builtin_nonlocal_goto")
+DEF_BUILTIN_STUB (BUILT_IN_NESTED_PTR_CREATED, "__builtin_nested_func_ptr_created")
+DEF_BUILTIN_STUB (BUILT_IN_NESTED_PTR_DELETED, "__builtin_nested_func_ptr_deleted")
 
 /* Implementing __builtin_setjmp.  */
 DEF_BUILTIN_STUB (BUILT_IN_SETJMP_SETUP, "__builtin_setjmp_setup")
index ce34075561f9fe0205faea879170f816d5d0c164..1cf3bdd3b518e1cbd5a4dce24423c5d22068bcb9 100644 (file)
@@ -2927,10 +2927,25 @@ Common Var(flag_tracer) Optimization
 Perform superblock formation via tail duplication.
 
 ftrampolines
-Common Var(flag_trampolines) Init(0)
+Common Var(flag_trampolines) Init(HEAP_TRAMPOLINES_INIT)
 For targets that normally need trampolines for nested functions, always
 generate them instead of using descriptors.
 
+ftrampoline-impl=
+Common Joined RejectNegative Enum(trampoline_impl) Var(flag_trampoline_impl) Init(HEAP_TRAMPOLINES_INIT ? TRAMPOLINE_IMPL_HEAP : TRAMPOLINE_IMPL_STACK)
+Whether trampolines are generated in executable memory rather than
+executable stack.
+
+Enum
+Name(trampoline_impl) Type(enum trampoline_impl) UnknownError(unknown trampoline implementation %qs)
+
+EnumValue
+Enum(trampoline_impl) String(stack) Value(TRAMPOLINE_IMPL_STACK)
+
+EnumValue
+Enum(trampoline_impl) String(heap) Value(TRAMPOLINE_IMPL_HEAP)
+
+
 ; Zero means that floating-point math operations cannot generate a
 ; (user-visible) trap.  This is the case, for example, in nonstop
 ; IEEE 754 arithmetic.
index f86dc169a40b069b68efdfe8885566917e914a7d..db7813bdd3d95045d7add94d048d253980063e65 100644 (file)
@@ -204,6 +204,12 @@ enum tls_model {
   TLS_MODEL_LOCAL_EXEC
 };
 
+/* Types of trampoline implementation.  */
+enum trampoline_impl {
+  TRAMPOLINE_IMPL_STACK,
+  TRAMPOLINE_IMPL_HEAP
+};
+
 /* Types of ABI for an offload compiler.  */
 enum offload_abi {
   OFFLOAD_ABI_UNSET,
index aebe9195ef0f2fcb2c74e1395fa9c06f9c2ff640..17aaa8cc058ae1e31352b98a0d4d267e1ce6dd36 100644 (file)
@@ -718,7 +718,8 @@ Objective-C and Objective-C++ Dialects}.
 -fverbose-asm  -fpack-struct[=@var{n}]
 -fleading-underscore  -ftls-model=@var{model}
 -fstack-reuse=@var{reuse_level}
--ftrampolines  -ftrapv  -fwrapv
+-ftrampolines -ftrampoline-impl=@r{[}stack@r{|}heap@r{]}
+-ftrapv  -fwrapv
 -fvisibility=@r{[}default@r{|}internal@r{|}hidden@r{|}protected@r{]}
 -fstrict-volatile-bitfields  -fsync-libcalls}
 
@@ -19050,6 +19051,20 @@ For languages other than Ada, the @code{-ftrampolines} and
 trampolines are always generated on platforms that need them
 for nested functions.
 
+@opindex ftrampoline-impl
+@item -ftrampoline-impl=@r{[}stack@r{|}heap@r{]}
+By default, trampolines are generated on stack.  However, certain platforms
+(such as the Apple M1) do not permit an executable stack.  Compiling with
+@option{-ftrampoline-impl=heap} generate calls to
+@code{__builtin_nested_func_ptr_created} and
+@code{__builtin_nested_func_ptr_deleted} in order to allocate and
+deallocate trampoline space on the executable heap.  These functions are
+implemented in libgcc, and will only be provided on specific targets:
+x86_64 Darwin, x86_64 and aarch64 Linux.  @emph{PLEASE NOTE}: Heap
+trampolines are @emph{not} guaranteed to be correctly deallocated if you
+@code{setjmp}, instantiate nested functions, and then @code{longjmp} back
+to a state prior to having allocated those nested functions.
+
 @opindex fvisibility
 @item -fvisibility=@r{[}default@r{|}internal@r{|}hidden@r{|}protected@r{]}
 Set the default ELF image symbol visibility to the specified option---all
index 31c7b6001bd4f56df5c96c047d87f1fb6479e2f7..d2fe3fca8af9277369bd11aae678de7750acbe67 100644 (file)
@@ -611,6 +611,14 @@ get_trampoline_type (struct nesting_info *info)
   if (trampoline_type)
     return trampoline_type;
 
+  /* When trampolines are created off-stack then the only thing we need in the
+     local frame is a single pointer.  */
+  if (flag_trampoline_impl == TRAMPOLINE_IMPL_HEAP)
+    {
+      trampoline_type = build_pointer_type (void_type_node);
+      return trampoline_type;
+    }
+
   align = TRAMPOLINE_ALIGNMENT;
   size = TRAMPOLINE_SIZE;
 
@@ -2790,17 +2798,27 @@ convert_tramp_reference_op (tree *tp, int *walk_subtrees, void *data)
 
       /* Compute the address of the field holding the trampoline.  */
       x = get_frame_field (info, target_context, x, &wi->gsi);
-      x = build_addr (x);
-      x = gsi_gimplify_val (info, x, &wi->gsi);
 
-      /* Do machine-specific ugliness.  Normally this will involve
-        computing extra alignment, but it can really be anything.  */
-      if (descr)
-       builtin = builtin_decl_implicit (BUILT_IN_ADJUST_DESCRIPTOR);
+      /* APB: We don't need to do the adjustment calls when using off-stack
+        trampolines, any such adjustment will be done when the off-stack
+        trampoline is created.  */
+      if (!descr && flag_trampoline_impl == TRAMPOLINE_IMPL_HEAP)
+       x = gsi_gimplify_val (info, x, &wi->gsi);
       else
-       builtin = builtin_decl_implicit (BUILT_IN_ADJUST_TRAMPOLINE);
-      call = gimple_build_call (builtin, 1, x);
-      x = init_tmp_var_with_call (info, &wi->gsi, call);
+       {
+         x = build_addr (x);
+
+         x = gsi_gimplify_val (info, x, &wi->gsi);
+
+         /* Do machine-specific ugliness.  Normally this will involve
+            computing extra alignment, but it can really be anything.  */
+         if (descr)
+           builtin = builtin_decl_implicit (BUILT_IN_ADJUST_DESCRIPTOR);
+         else
+           builtin = builtin_decl_implicit (BUILT_IN_ADJUST_TRAMPOLINE);
+         call = gimple_build_call (builtin, 1, x);
+         x = init_tmp_var_with_call (info, &wi->gsi, call);
+       }
 
       /* Cast back to the proper function type.  */
       x = build1 (NOP_EXPR, TREE_TYPE (t), x);
@@ -3380,6 +3398,7 @@ build_init_call_stmt (struct nesting_info *info, tree decl, tree field,
 static void
 finalize_nesting_tree_1 (struct nesting_info *root)
 {
+  gimple_seq cleanup_list = NULL;
   gimple_seq stmt_list = NULL;
   gimple *stmt;
   tree context = root->context;
@@ -3511,9 +3530,48 @@ finalize_nesting_tree_1 (struct nesting_info *root)
          if (!field)
            continue;
 
-         x = builtin_decl_implicit (BUILT_IN_INIT_TRAMPOLINE);
-         stmt = build_init_call_stmt (root, i->context, field, x);
-         gimple_seq_add_stmt (&stmt_list, stmt);
+         if (flag_trampoline_impl == TRAMPOLINE_IMPL_HEAP)
+           {
+             /* We pass a whole bunch of arguments to the builtin function that
+                creates the off-stack trampoline, these are
+                1. The nested function chain value (that must be passed to the
+                nested function so it can find the function arguments).
+                2. A pointer to the nested function implementation,
+                3. The address in the local stack frame where we should write
+                the address of the trampoline.
+
+                When this code was originally written I just kind of threw
+                everything at the builtin, figuring I'd work out what was
+                actually needed later, I think, the stack pointer could
+                certainly be dropped, arguments #2 and #4 are based off the
+                stack pointer anyway, so #1 doesn't seem to add much value.  */
+             tree arg1, arg2, arg3;
+
+             gcc_assert (DECL_STATIC_CHAIN (i->context));
+             arg1 = build_addr (root->frame_decl);
+             arg2 = build_addr (i->context);
+
+             x = build3 (COMPONENT_REF, TREE_TYPE (field),
+                         root->frame_decl, field, NULL_TREE);
+             arg3 = build_addr (x);
+
+             x = builtin_decl_implicit (BUILT_IN_NESTED_PTR_CREATED);
+             stmt = gimple_build_call (x, 3, arg1, arg2, arg3);
+             gimple_seq_add_stmt (&stmt_list, stmt);
+
+             /* This call to delete the nested function trampoline is added to
+                the cleanup list, and called when we exit the current scope.  */
+             x = builtin_decl_implicit (BUILT_IN_NESTED_PTR_DELETED);
+             stmt = gimple_build_call (x, 0);
+             gimple_seq_add_stmt (&cleanup_list, stmt);
+           }
+         else
+           {
+             /* Original code to initialise the on stack trampoline.  */
+             x = builtin_decl_implicit (BUILT_IN_INIT_TRAMPOLINE);
+             stmt = build_init_call_stmt (root, i->context, field, x);
+             gimple_seq_add_stmt (&stmt_list, stmt);
+           }
        }
     }
 
@@ -3538,11 +3596,40 @@ finalize_nesting_tree_1 (struct nesting_info *root)
   /* If we created initialization statements, insert them.  */
   if (stmt_list)
     {
-      gbind *bind;
-      annotate_all_with_location (stmt_list, DECL_SOURCE_LOCATION (context));
-      bind = gimple_seq_first_stmt_as_a_bind (gimple_body (context));
-      gimple_seq_add_seq (&stmt_list, gimple_bind_body (bind));
-      gimple_bind_set_body (bind, stmt_list);
+      if (flag_trampoline_impl == TRAMPOLINE_IMPL_HEAP)
+       {
+         /* Handle off-stack trampolines.  */
+         gbind *bind;
+         annotate_all_with_location (stmt_list, DECL_SOURCE_LOCATION (context));
+         annotate_all_with_location (cleanup_list, DECL_SOURCE_LOCATION (context));
+         bind = gimple_seq_first_stmt_as_a_bind (gimple_body (context));
+         gimple_seq_add_seq (&stmt_list, gimple_bind_body (bind));
+
+         gimple_seq xxx_list = NULL;
+
+         if (cleanup_list != NULL)
+           {
+             /* Maybe we shouldn't be creating this try/finally if -fno-exceptions is
+                in use.  If this is the case, then maybe we should, instead, be
+                inserting the cleanup code onto every path out of this function?  Not
+                yet figured out how we would do this.  */
+             gtry *t = gimple_build_try (stmt_list, cleanup_list, GIMPLE_TRY_FINALLY);
+             gimple_seq_add_stmt (&xxx_list, t);
+           }
+         else
+           xxx_list = stmt_list;
+
+         gimple_bind_set_body (bind, xxx_list);
+       }
+      else
+       {
+         /* The traditional, on stack trampolines.  */
+         gbind *bind;
+         annotate_all_with_location (stmt_list, DECL_SOURCE_LOCATION (context));
+         bind = gimple_seq_first_stmt_as_a_bind (gimple_body (context));
+         gimple_seq_add_seq (&stmt_list, gimple_bind_body (bind));
+         gimple_bind_set_body (bind, stmt_list);
+       }
     }
 
   /* If a chain_decl was created, then it needs to be registered with
index f7bfd9e3451b3b19675bd0d856b4074e6e266cfe..f9fa7b78ffffe1064d42ac37547a304f9cb8da7c 100644 (file)
@@ -9922,6 +9922,23 @@ build_common_builtin_nodes (void)
                        "__builtin_nonlocal_goto",
                        ECF_NORETURN | ECF_NOTHROW);
 
+  tree ptr_ptr_type_node = build_pointer_type (ptr_type_node);
+
+  ftype = build_function_type_list (void_type_node,
+                                   ptr_type_node, // void *chain
+                                   ptr_type_node, // void *func
+                                   ptr_ptr_type_node, // void **dst
+                                   NULL_TREE);
+  local_define_builtin ("__builtin_nested_func_ptr_created", ftype,
+                       BUILT_IN_NESTED_PTR_CREATED,
+                       "__builtin_nested_func_ptr_created", ECF_NOTHROW);
+
+  ftype = build_function_type_list (void_type_node,
+                                   NULL_TREE);
+  local_define_builtin ("__builtin_nested_func_ptr_deleted", ftype,
+                       BUILT_IN_NESTED_PTR_DELETED,
+                       "__builtin_nested_func_ptr_deleted", ECF_NOTHROW);
+
   ftype = build_function_type_list (void_type_node,
                                    ptr_type_node, ptr_type_node, NULL_TREE);
   local_define_builtin ("__builtin_setjmp_setup", ftype,