extern const char *nvptx_ptx_type_from_mode (machine_mode, bool);
extern const char *nvptx_output_mov_insn (rtx, rtx);
extern const char *nvptx_output_call_insn (rtx_insn *, rtx, rtx);
+extern const char *nvptx_output_fake_ptx_alloca (void);
extern const char *nvptx_output_return (void);
extern const char *nvptx_output_set_softstack (unsigned);
extern const char *nvptx_output_simt_enter (rtx, rtx, rtx);
}
return "";
}
+
+/* Output a fake PTX 'alloca'. */
+
+const char *
+nvptx_output_fake_ptx_alloca (void)
+{
+#define FAKE_PTX_ALLOCA_NAME "__GCC_nvptx__PTX_alloca_not_supported"
+ static tree decl;
+ if (!decl)
+ {
+ tree alloca_type = TREE_TYPE (builtin_decl_explicit (BUILT_IN_ALLOCA));
+ decl = build_decl (UNKNOWN_LOCATION, FUNCTION_DECL,
+ get_identifier (FAKE_PTX_ALLOCA_NAME), alloca_type);
+ DECL_EXTERNAL (decl) = 1;
+ TREE_PUBLIC (decl) = 1;
+ nvptx_record_needed_fndecl (decl);
+ }
+ return "\tcall\t(%0), " FAKE_PTX_ALLOCA_NAME ", (%1);";
+#undef FAKE_PTX_ALLOCA_NAME
+}
+
/* Output a return instruction. Also copy the return value to its outgoing
location. */
output_asm_insn ("}", NULL);
return "";
}
+ else if (nvptx_fake_ptx_alloca)
+ return nvptx_output_fake_ptx_alloca ();
else
{
sorry_at (INSN_LOCATION (insn),
gcc_checking_assert (REG_P (operands[0]));
emit_insn (gen_nvptx_stacksave (Pmode, operands[0], operands[1]));
}
+ /* We don't bother to special-case '-mfake-ptx-alloca' here. */
else
{
/* The concept of a '%stack' pointer doesn't apply like this.
operands[1] = force_reg (Pmode, operands[1]);
emit_insn (gen_nvptx_stackrestore (Pmode, operands[0], operands[1]));
}
+ /* We don't bother to special-case '-mfake-ptx-alloca' here. */
else if (!TARGET_SOFT_STACK)
; /* See 'save_stack_block'. */
else if (TARGET_SOFT_STACK)
mexperimental
Target Var(nvptx_experimental) Init(0) Undocumented
+
+mfake-ptx-alloca
+Target Var(nvptx_fake_ptx_alloca) Init(0) Undocumented
+; With '-mfake-ptx-alloca' enabled, the user-visible behavior changes only
+; for configurations where PTX 'alloca' is not available. Rather than a
+; compile-time 'sorry, unimplemented: dynamic stack allocation not supported'
+; in presence of dynamic stack allocation, compilation and assembly then
+; succeeds. However, attempting to link in such '*.o' files then fails due
+; to unresolved symbol '__GCC_nvptx__PTX_alloca_not_supported'.
+;
+; This is meant to be used in scenarios where large volumes of code are
+; compiled, a small fraction of which runs into dynamic stack allocation, but
+; these parts are not important for specific use cases, and we'd thus like the
+; build to succeed, and error out just upon actual, very rare use of the
+; offending '*.o' files.
--- /dev/null
+/* { dg-do assemble } */
+/* { dg-options {-O0 -mno-soft-stack} } */
+/* { dg-additional-options -march=sm_30 } */
+/* { dg-additional-options -mfake-ptx-alloca } */
+/* { dg-additional-options -save-temps } */
+/* { dg-final { check-function-bodies {** } {} } } */
+
+void sink(void *);
+
+void f(void)
+{
+ sink(__builtin_alloca(123));
+ /* { dg-bogus {sorry, unimplemented: dynamic stack allocation not supported} {} { target *-*-* } .-1 } */
+}
+/*
+** f:
+** \.visible \.func f
+** {
+** \.reg\.u64 (%r[0-9]+);
+** \.reg\.u64 (%r[0-9]+);
+** \.reg\.u64 (%r[0-9]+);
+** \.reg\.u64 (%r[0-9]+);
+** \.reg\.u64 (%r[0-9]+);
+** \.reg\.u64 (%r[0-9]+);
+** \.reg\.u64 (%r[0-9]+);
+** \.reg\.u64 (%r[0-9]+);
+** \.reg\.u64 (%r[0-9]+);
+** \.reg\.u64 (%r[0-9]+);
+** \.reg\.u64 (%r[0-9]+);
+** mov\.u64 \11, 16;
+** add\.u64 \2, \11, -1;
+** add\.u64 \3, \2, 123;
+** div\.u64 \4, \3, 16;
+** mul\.lo\.u64 \5, \4, 16;
+** call \(\6\), __GCC_nvptx__PTX_alloca_not_supported, \(\5\);
+** add\.u64 \7, \6, 15;
+** shr\.u64 \8, \7, 4;
+** shl\.b64 \9, \8, 4;
+** mov\.u64 \1, \9;
+** mov\.u64 \10, \1;
+** {
+** \.param\.u64 %out_arg1;
+** st\.param\.u64 \[%out_arg1\], \10;
+** call sink, \(%out_arg1\);
+** }
+** ret;
+*/
+
+/* { dg-final { scan-assembler-times {(?n)^\.extern \.func \(\.param\.u64 %value_out\) __GCC_nvptx__PTX_alloca_not_supported \(\.param\.u64 %in_ar0\);$} 1 } } */
--- /dev/null
+/* { dg-do link } */
+/* { dg-options {-O0 -mno-soft-stack} } */
+/* { dg-additional-options -march=sm_30 } */
+/* { dg-additional-options -mfake-ptx-alloca } */
+/* { dg-additional-options -save-temps } */
+
+int
+main(void)
+{
+ return !(__builtin_alloca(100) != __builtin_alloca(10));
+}
+/* { dg-final { scan-assembler-times {(?n)\tcall\t\(%r[0-9]+\), __GCC_nvptx__PTX_alloca_not_supported, \(%r[0-9]+\);$} 2 } } */
+
+/* { dg-final { scan-assembler-times {(?n)^\.extern \.func \(\.param\.u64 %value_out\) __GCC_nvptx__PTX_alloca_not_supported \(\.param\.u64 %in_ar0\);$} 1 } } */
+
+/* { dg-message __GCC_nvptx__PTX_alloca_not_supported {unresolved symbol} { target *-*-* } 0 } */
+
+/* { dg-final output-exists-not } */
/* { dg-do assemble } */
/* { dg-options {-O3 -mno-soft-stack} } */
/* { dg-add-options nvptx_alloca_ptx } */
+/* Verify the fake one isn't used if the real PTX 'alloca' is available.
+ { dg-additional-options -mfake-ptx-alloca } */
/* { dg-additional-options -save-temps } */
/* { dg-final { check-function-bodies {** } {} } } */
--- /dev/null
+/* { dg-do assemble } */
+/* { dg-options {-O3 -mno-soft-stack} } */
+/* { dg-additional-options {-march=sm_30 -mfake-ptx-alloca} } */
+/* { dg-additional-options -save-temps } */
+/* { dg-final { check-function-bodies {** } {} } } */
+
+void sink(void *);
+
+void f(void)
+{
+ void *p;
+ p = __builtin_stack_save();
+ sink(__builtin_alloca(25));
+ /* { dg-bogus {sorry, unimplemented: dynamic stack allocation not supported} {} { target *-*-* } .-1 } */
+ __builtin_stack_restore(p);
+ sink(__builtin_alloca(13));
+ /* { dg-bogus {sorry, unimplemented: dynamic stack allocation not supported} {} { target *-*-* } .-1 } */
+}
+/*
+** f:
+** .visible .func f
+** {
+** \.reg\.u64 (%r[0-9]+);
+** \.reg\.u64 (%r[0-9]+);
+** \.reg\.u64 (%r[0-9]+);
+** \.reg\.u64 (%r[0-9]+);
+** \.reg\.u64 (%r[0-9]+);
+** \.reg\.u64 (%r[0-9]+);
+** call \(\1\), __GCC_nvptx__PTX_alloca_not_supported, \(32\);
+** add\.u64 \2, \1, 15;
+** and\.b64 \3, \2, -16;
+** {
+** \.param\.u64 %out_arg1;
+** st\.param\.u64 \[%out_arg1\], \3;
+** call sink, \(%out_arg1\);
+** }
+** call \(\4\), __GCC_nvptx__PTX_alloca_not_supported, \(16\);
+** add\.u64 \5, \4, 15;
+** and\.b64 \6, \5, -16;
+** {
+** \.param\.u64 %out_arg1;
+** st\.param\.u64 \[%out_arg1\], \6;
+** call sink, \(%out_arg1\);
+** }
+** ret;
+*/
+
+/* { dg-final { scan-assembler-times {(?n)^\.extern \.func \(\.param\.u64 %value_out\) __GCC_nvptx__PTX_alloca_not_supported \(\.param\.u64 %in_ar0\);$} 1 } } */
--- /dev/null
+/* { dg-do assemble } */
+/* { dg-options {-O0 -mno-soft-stack} } */
+/* { dg-additional-options -march=sm_30 } */
+/* { dg-additional-options -mfake-ptx-alloca } */
+/* { dg-additional-options -save-temps } */
+/* { dg-final { check-function-bodies {** } {} } } */
+
+void sink(void *);
+
+void f(int s)
+{
+ char a[s];
+ /* { dg-bogus {sorry, unimplemented: dynamic stack allocation not supported} {} { target *-*-* } .-1 } */
+ sink(a);
+}
+/*
+** f:
+** ...
+** cvt\.s64\.s32 (%r[0-9]+), (%r[0-9]+);
+** mov\.u64 (%r[0-9]+), 16;
+** add\.u64 (%r[0-9]+), \3, -1;
+** add\.u64 (%r[0-9]+), \1, \4;
+** div\.u64 (%r[0-9]+), \5, 16;
+** mul\.lo\.u64 (%r[0-9]+), \6, 16;
+** call \((%r[0-9]+)\), __GCC_nvptx__PTX_alloca_not_supported, \(\7\);
+** ...
+*/
+
+/* { dg-final { scan-assembler-times {(?n)^\.extern \.func \(\.param\.u64 %value_out\) __GCC_nvptx__PTX_alloca_not_supported \(\.param\.u64 %in_ar0\);$} 1 } } */