registers, %rax, %rcx, %rdx, %rsi, %rdi and %r8-%r11 can be used
freely. Integers and pointers are returned in %rax.
+At entry, it is required that %rsp == 8 (mod 16).
+
Registers May be Argument
clobbered number
stack for the four register arguments is also required. %xmm6 to
%xmm15 are callee-saved. The "long" type is just 32 bits.
-If we have five arguments, and push the additional callee-save
+If we have six arguments, and push the additional callee-save
registers %rdi and %rsi on the stack, we get a stack frame like
+64(%rsp): Sixth argument
56(%rsp): Fifth argument
48(%rsp): Space for fourth argument
40(%rsp): Space for third argument
32(%rsp): Space for second argument
24(%rsp): Space for first argument
16(%rsp): Return address
-8(%rsp): Saved %rsi
-(%rsp) : Saved %rdi
+8(%rsp) : Saved %rdi
+(%rsp): Saved %rsi
+
+If, in addition, we use more than 6 %xmm registers, we push them
+*after* %rdi (but before %rsi), so that they are stored at 16-byte
+aligned addresses.
dnl W64_ENTRY(nargs, xmm_used)
define(<W64_ENTRY>, <
changequote([,])dnl
- ifelse(<<<<<<<<<<<<<<<< ignored; only for balancing)
+ ifelse(<<<<<<<<<<<<<<<<<< ignored; only for balancing)
ifelse(W64_ABI,yes,[
+ dnl unconditionally push %rdi, making %rsp 16-byte aligned
+ push %rdi
+ dnl Save %xmm6, ..., if needed
ifelse(eval($2 > 6), 1, [
- sub [$]eval(8 + 16*($2 - 6)), %rsp
- movdqu %xmm6, 0(%rsp)
+ sub [$]eval(16*($2 - 6)), %rsp
+ movdqa %xmm6, 0(%rsp)
])
ifelse(eval($2 > 7), 1, [
- movdqu %xmm7, 16(%rsp)
+ movdqa %xmm7, 16(%rsp)
])
ifelse(eval($2 > 8), 1, [
- movdqu %xmm8, 32(%rsp)
+ movdqa %xmm8, 32(%rsp)
])
ifelse(eval($2 > 9), 1, [
- movdqu %xmm9, 48(%rsp)
+ movdqa %xmm9, 48(%rsp)
])
ifelse(eval($2 > 10), 1, [
- movdqu %xmm10, 64(%rsp)
+ movdqa %xmm10, 64(%rsp)
])
ifelse(eval($2 > 11), 1, [
- movdqu %xmm11, 80(%rsp)
+ movdqa %xmm11, 80(%rsp)
])
ifelse(eval($2 > 12), 1, [
- movdqu %xmm12, 96(%rsp)
+ movdqa %xmm12, 96(%rsp)
])
ifelse(eval($2 > 13), 1, [
- movdqu %xmm13, 112(%rsp)
+ movdqa %xmm13, 112(%rsp)
])
ifelse(eval($2 > 14), 1, [
- movdqu %xmm14, 128(%rsp)
+ movdqa %xmm14, 128(%rsp)
])
ifelse(eval($2 > 15), 1, [
- movdqu %xmm15, 144(%rsp)
+ movdqa %xmm15, 144(%rsp)
])
+ dnl Move around arguments
ifelse(eval($1 >= 1), 1, [
- push %rdi
mov %rcx, %rdi
])
ifelse(eval($1 >= 2), 1, [
+ dnl NOTE: Breaks 16-byte %rsp alignment
push %rsi
mov %rdx, %rsi
])
mov %r9, %rcx
])
ifelse(eval($1 >= 5), 1, [
- ifelse(eval($2 > 6), 1, [
- mov eval(8 + 16*($2 - 6) + 56)(%rsp), %r8
- ], [
- mov 56(%rsp), %r8
- ])
+ mov ifelse(eval($2 > 6), 1, eval(16*($2-6)+56),56)(%rsp), %r8
+ ])
+ ifelse(eval($1 >= 6), 1, [
+ mov ifelse(eval($2 > 6), 1, eval(16*($2-6)+64),64)(%rsp), %r9
])
])
changequote(<,>)dnl
dnl W64_EXIT(nargs, xmm_used)
define(<W64_EXIT>, <
changequote([,])dnl
- ifelse(<<<<<<<<<<<< ignored; only for balancing)
+ ifelse(<<<<<<<<<<< ignored; only for balancing)
ifelse(W64_ABI,yes,[
ifelse(eval($1 >= 2), 1, [
pop %rsi
- ])
- ifelse(eval($1 >= 1), 1, [
- pop %rdi
- ])
+ ])
ifelse(eval($2 > 15), 1, [
- movdqu 144(%rsp), %xmm15
+ movdqa 144(%rsp), %xmm15
])
ifelse(eval($2 > 14), 1, [
- movdqu 128(%rsp), %xmm14
+ movdqa 128(%rsp), %xmm14
])
ifelse(eval($2 > 13), 1, [
- movdqu 112(%rsp), %xmm13
+ movdqa 112(%rsp), %xmm13
])
ifelse(eval($2 > 12), 1, [
- movdqu 96(%rsp), %xmm12
+ movdqa 96(%rsp), %xmm12
])
ifelse(eval($2 > 11), 1, [
- movdqu 80(%rsp), %xmm11
+ movdqa 80(%rsp), %xmm11
])
ifelse(eval($2 > 10), 1, [
- movdqu 64(%rsp), %xmm10
+ movdqa 64(%rsp), %xmm10
])
ifelse(eval($2 > 9), 1, [
- movdqu 48(%rsp), %xmm9
+ movdqa 48(%rsp), %xmm9
])
ifelse(eval($2 > 8), 1, [
- movdqu 32(%rsp), %xmm8
+ movdqa 32(%rsp), %xmm8
])
ifelse(eval($2 > 7), 1, [
- movdqu 16(%rsp), %xmm7
+ movdqa 16(%rsp), %xmm7
])
ifelse(eval($2 > 6), 1, [
- movdqu 0(%rsp), %xmm6
- add [$]eval(8 + 16*($2 - 6)), %rsp
+ movdqa (%rsp), %xmm6
+ add [$]eval(16*($2 - 6)), %rsp
])
+ pop %rdi
])
changequote(<,>)dnl
>)