Update shared m4 files and x86_64.
divert(-1)
-changequote(<,>)dnl
-dnl (progn (modify-syntax-entry ?< "(>") (modify-syntax-entry ?> ")<") )
dnl FORTRAN style comment character
-define(<C>, <
-dnl>)dnl
+define(`C', `
+dnl')dnl
dnl Disable m4 comment processing, since the default, #, is used for
dnl constants on some architectures, in particular ARM.
changecom()dnl
dnl Including files from the srcdir
-define(<include_src>, <include(srcdir/$1)>)dnl
+define(`include_src', `include(srcdir/$1)')dnl
dnl default definition, changed in fat builds
-define(<fat_transform>, <$1>)
-define(<C_NAME>, <SYMBOL_PREFIX<>fat_transform($1)>)
+define(`fat_transform', `$1')
+define(`C_NAME', `SYMBOL_PREFIX`'fat_transform($1)')
dnl Pseudo ops
-define(<DECLARE_FUNC>,
-<ifelse(ELF_STYLE,yes,
-<.type $1,TYPE_FUNCTION>,
+define(`DECLARE_FUNC',
+`ifelse(ELF_STYLE,yes,
+`.type $1,TYPE_FUNCTION',
COFF_STYLE, yes,
-<.def $1
+`.def $1
.scl 2
.type 32
-.endef>,
-<>)>)
+.endef',
+`')')
-define(<GMP_NUMB_BITS>,<>)dnl
+define(`GMP_NUMB_BITS',`')dnl
-define(<PROLOGUE>,
-<.globl C_NAME($1)
+define(`PROLOGUE',
+`.globl C_NAME($1)
DECLARE_FUNC(C_NAME($1))
-C_NAME($1): ASM_X86_ENDBR>)
+C_NAME($1): ASM_X86_ENDBR')
-define(<EPILOGUE>,
-<ifelse(ELF_STYLE,yes,
-<.size C_NAME($1), . - C_NAME($1)>,<>)>)
+define(`EPILOGUE',
+`ifelse(ELF_STYLE,yes,
+`.size C_NAME($1), . - C_NAME($1)',`')')
-define(<m4_log2>, <m4_log2_internal($1,1,0)>)
-define(<m4_log2_internal>,
-<ifelse($3, 10, <not-a-power-of-two>,
+define(`m4_log2', `m4_log2_internal($1,1,0)')
+define(`m4_log2_internal',
+`ifelse($3, 10, `not-a-power-of-two',
$1, $2, $3,
-<m4_log2_internal($1, eval(2*$2), eval(1 + $3))>)>)
+`m4_log2_internal($1, eval(2*$2), eval(1 + $3))')')
dnl Argument to ALIGN is always in bytes, and converted to a
dnl logarithmic .align if necessary.
-define(<ALIGN>,
-<.align ifelse(ALIGN_LOG,yes,<m4_log2($1)>,$1)
->)
+define(`ALIGN',
+`.align ifelse(ALIGN_LOG,yes,`m4_log2($1)',$1)
+')
-define(<IF_BE>, <ifelse(
-WORDS_BIGENDIAN,yes,<$1>,
-WORDS_BIGENDIAN,no,<$2>,
-<errprint(<Unsupported endianness value>,WORDS_BIGENDIAN,<
->)
- m4exit(1)>)>)
-define(<IF_LE>, <IF_BE(<$2>, <$1>)>)
+define(`IF_BE', `ifelse(
+WORDS_BIGENDIAN,yes,`$1',
+WORDS_BIGENDIAN,no,`$2',
+`errprint(`Unsupported endianness value',WORDS_BIGENDIAN,`
+')
+ m4exit(1)')')
+define(`IF_LE', `IF_BE(`$2', `$1')')
dnl Struct defining macros
dnl STRUCTURE(prefix)
-define(<STRUCTURE>, <define(<SOFFSET>, 0)define(<SPREFIX>, <$1>)>)dnl
+define(`STRUCTURE', `define(`SOFFSET', 0)define(`SPREFIX', `$1')')dnl
dnl STRUCT(name, size)
-define(<STRUCT>,
-<define(SPREFIX<_>$1, SOFFSET)dnl
- define(<SOFFSET>, eval(SOFFSET + ($2)))>)dnl
+define(`STRUCT',
+`define(SPREFIX`_'$1, SOFFSET)dnl
+ define(`SOFFSET', eval(SOFFSET + ($2)))')dnl
dnl UCHAR(name)
-define(<UCHAR>, <STRUCT(<$1>, 1)>)dnl
+define(`UCHAR', `STRUCT(`$1', 1)')dnl
dnl UNSIGNED(name)
-define(<UNSIGNED>, <STRUCT(<$1>, 4)>)dnl
+define(`UNSIGNED', `STRUCT(`$1', 4)')dnl
dnl Offsets in arcfour_ctx
STRUCTURE(ARCFOUR)
-define(<srcdir>, <<@srcdir@>>)dnl
-define(<SYMBOL_PREFIX>, <@ASM_SYMBOL_PREFIX@><$1>)dnl
-define(<ELF_STYLE>, <@ASM_ELF_STYLE@>)dnl
-define(<COFF_STYLE>, <@ASM_COFF_STYLE@>)dnl
-define(<TYPE_FUNCTION>, <@ASM_TYPE_FUNCTION@>)dnl
-define(<TYPE_PROGBITS>, <@ASM_TYPE_PROGBITS@>)dnl
-define(<ALIGN_LOG>, <@ASM_ALIGN_LOG@>)dnl
-define(<W64_ABI>, <@W64_ABI@>)dnl
-define(<RODATA>, <@ASM_RODATA@>)dnl
-define(<WORDS_BIGENDIAN>, <@ASM_WORDS_BIGENDIAN@>)dnl
-define(<ASM_X86_ENDBR>,<@ASM_X86_ENDBR@>)dnl
-define(<ASM_X86_MARK_CET_ALIGN>,<@ASM_X86_MARK_CET_ALIGN@>)dnl
+define(`srcdir', ``@srcdir@'')dnl
+define(`SYMBOL_PREFIX', `@ASM_SYMBOL_PREFIX@'`$1')dnl
+define(`ELF_STYLE', `@ASM_ELF_STYLE@')dnl
+define(`COFF_STYLE', `@ASM_COFF_STYLE@')dnl
+define(`TYPE_FUNCTION', `@ASM_TYPE_FUNCTION@')dnl
+define(`TYPE_PROGBITS', `@ASM_TYPE_PROGBITS@')dnl
+define(`ALIGN_LOG', `@ASM_ALIGN_LOG@')dnl
+define(`W64_ABI', `@W64_ABI@')dnl
+define(`RODATA', `@ASM_RODATA@')dnl
+define(`WORDS_BIGENDIAN', `@ASM_WORDS_BIGENDIAN@')dnl
+define(`ASM_X86_ENDBR',`@ASM_X86_ENDBR@')dnl
+define(`ASM_X86_MARK_CET_ALIGN',`@ASM_X86_MARK_CET_ALIGN@')dnl
divert(1)
@ASM_X86_MARK_CET@
@ASM_MARK_NOEXEC_STACK@
C x86_64/aes-decrypt-internal.asm
-ifelse(<
+ifelse(`
Copyright (C) 2001, 2002, 2005, Rafael R. Sevilla, Niels Möller
Copyright (C) 2008, 2013 Niels Möller
You should have received copies of the GNU General Public License and
the GNU Lesser General Public License along with this program. If
not, see http://www.gnu.org/licenses/.
->)
+')
-include_src(<x86_64/aes.m4>)
+include_src(`x86_64/aes.m4')
C Register usage:
C AES state, use two of them
-define(<SA>,<%eax>)
-define(<SB>,<%ebx>)
-define(<SC>,<%ecx>)
-define(<SD>,<%edx>)
+define(`SA',`%eax')
+define(`SB',`%ebx')
+define(`SC',`%ecx')
+define(`SD',`%edx')
-define(<TA>,<%r10d>)
-define(<TB>,<%r11d>)
-define(<TC>,<%r12d>)
+define(`TA',`%r10d')
+define(`TB',`%r11d')
+define(`TC',`%r12d')
C Input argument
-define(<ROUNDS>, <%rdi>)
-define(<KEYS>, <%rsi>)
-define(<PARAM_TABLE>, <%rdx>)
-define(<PARAM_LENGTH>,<%rcx>)
-define(<DST>, <%r8>)
-define(<SRC>, <%r9>)
+define(`ROUNDS', `%rdi')
+define(`KEYS', `%rsi')
+define(`PARAM_TABLE', `%rdx')
+define(`PARAM_LENGTH',`%rcx')
+define(`DST', `%r8')
+define(`SRC', `%r9')
-define(<TABLE>, <%r13>)
-define(<LENGTH>,<%r14>)
-define(<KEY>, <%r15>)
+define(`TABLE', `%r13')
+define(`LENGTH',`%r14')
+define(`KEY', `%r15')
C Must correspond to an old-style register, for movzb from %ah--%dh to
C work.
-define(<TMP>,<%rbp>)
+define(`TMP',`%rbp')
.file "aes-decrypt-internal.asm"
C x86_64/aes-encrypt-internal.asm
-ifelse(<
+ifelse(`
Copyright (C) 2001, 2002, 2005, Rafael R. Sevilla, Niels Möller
Copyright (C) 2008, 2013 Niels Möller
You should have received copies of the GNU General Public License and
the GNU Lesser General Public License along with this program. If
not, see http://www.gnu.org/licenses/.
->)
+')
-include_src(<x86_64/aes.m4>)
+include_src(`x86_64/aes.m4')
C Register usage:
C AES state, use two of them
-define(<SA>,<%eax>)
-define(<SB>,<%ebx>)
-define(<SC>,<%ecx>)
-define(<SD>,<%edx>)
+define(`SA',`%eax')
+define(`SB',`%ebx')
+define(`SC',`%ecx')
+define(`SD',`%edx')
-define(<TA>,<%r10d>)
-define(<TB>,<%r11d>)
-define(<TC>,<%r12d>)
+define(`TA',`%r10d')
+define(`TB',`%r11d')
+define(`TC',`%r12d')
C Input argument
-define(<ROUNDS>, <%rdi>)
-define(<KEYS>, <%rsi>)
-define(<PARAM_TABLE>, <%rdx>)
-define(<PARAM_LENGTH>,<%rcx>)
-define(<DST>, <%r8>)
-define(<SRC>, <%r9>)
+define(`ROUNDS', `%rdi')
+define(`KEYS', `%rsi')
+define(`PARAM_TABLE', `%rdx')
+define(`PARAM_LENGTH',`%rcx')
+define(`DST', `%r8')
+define(`SRC', `%r9')
-define(<TABLE>, <%r13>)
-define(<LENGTH>,<%r14>)
-define(<KEY>, <%r15>)
+define(`TABLE', `%r13')
+define(`LENGTH',`%r14')
+define(`KEY', `%r15')
C Must correspond to an old-style register, for movzb from %ah--%dh to
C work.
-define(<TMP>,<%rbp>)
+define(`TMP',`%rbp')
.file "aes-encrypt-internal.asm"
dnl LREG(reg) gives the 8-bit register corresponding to the given 32-bit register.
-define(<LREG>,<ifelse(
+define(`LREG',`ifelse(
$1, %eax, %al,
$1, %ebx, %bl,
$1, %ecx, %cl,
$1, %r12d, %r12b,
$1, %r13d, %r13b,
$1, %r14d, %r14b,
- $1, %r15d, %r15b)>)dnl
+ $1, %r15d, %r15b)')dnl
-define(<HREG>,<ifelse(
+define(`HREG',`ifelse(
$1, %eax, %ah,
$1, %ebx, %bh,
$1, %ecx, %ch,
$1, %edx, %dh,
- error)>)
+ error)')
-define(<XREG>,<ifelse(
+define(`XREG',`ifelse(
$1, %rax, %eax,
$1, %rbx, %ebx,
$1, %rcx, %ecx,
$1, %r12,%r12d,
$1, %r13,%r13d,
$1, %r14,%r14d,
- $1, %r15,%r15d)>)dnl
+ $1, %r15,%r15d)')dnl
dnl AES_LOAD(a, b, c, d, src, key)
dnl Loads the next block of data from src, and add the subkey pointed
dnl to by key.
dnl Note that x86 allows unaligned accesses.
dnl Would it be preferable to interleave the loads and stores?
-define(<AES_LOAD>, <
+define(`AES_LOAD', `
movl ($5),$1
movl 4($5),$2
movl 8($5),$3
xorl ($6),$1
xorl 4($6),$2
xorl 8($6),$3
- xorl 12($6),$4>)dnl
+ xorl 12($6),$4')dnl
dnl AES_STORE(a, b, c, d, key, dst)
dnl Adds the subkey to a, b, c, d,
dnl and stores the result in the area pointed to by dst.
dnl Note that x86 allows unaligned accesses.
dnl Would it be preferable to interleave the loads and stores?
-define(<AES_STORE>, <
+define(`AES_STORE', `
xorl ($5),$1
xorl 4($5),$2
xorl 8($5),$3
movl $1,($6)
movl $2,4($6)
movl $3,8($6)
- movl $4,12($6)>)dnl
+ movl $4,12($6)')dnl
dnl AES_ROUND(table,a,b,c,d,out,ptr)
dnl Computes one word of the AES round. Leaves result in $6.
-define(<AES_ROUND>, <
+define(`AES_ROUND', `
movzb LREG($2), $7
movl AES_TABLE0 ($1, $7, 4),$6
movzb HREG($3), XREG($7)
xorl AES_TABLE1 ($1, $7, 4),$6
movl $4,XREG($7)
- shr <$>16,$7
- and <$>0xff,$7
+ shr `$'16,$7
+ and `$'0xff,$7
xorl AES_TABLE2 ($1, $7, 4),$6
movl $5,XREG($7)
- shr <$>24,$7
- xorl AES_TABLE3 ($1, $7, 4),$6>)dnl
+ shr `$'24,$7
+ xorl AES_TABLE3 ($1, $7, 4),$6')dnl
dnl AES_FINAL_ROUND(a, b, c, d, table, out, tmp)
dnl Computes one word of the final round. Leaves result in $6. Also
dnl performs the first substitution step, on the least significant
dnl byte, and rotates 8 bits.
-define(<AES_FINAL_ROUND>, <
+define(`AES_FINAL_ROUND', `
movzb LREG($1),$7
movzbl ($5, $7), $6
movl $2,XREG($7)
- andl <$>0x0000ff00,XREG($7)
+ andl `$'0x0000ff00,XREG($7)
orl XREG($7), $6
movl $3,XREG($7)
- andl <$>0x00ff0000,XREG($7)
+ andl `$'0x00ff0000,XREG($7)
orl XREG($7), $6
movl $4,XREG($7)
- andl <$>0xff000000,XREG($7)
+ andl `$'0xff000000,XREG($7)
orl XREG($7), $6
- roll <$>8, $6>)dnl
+ roll `$'8, $6')dnl
dnl AES_SUBST_BYTE(A, B, C, D, table, tmp)
dnl Substitutes the least significant byte of
dnl each of eax, ebx, ecx and edx, and also rotates
dnl the words one byte to the left.
dnl Uses that AES_SBOX == 0
-define(<AES_SUBST_BYTE>, <
+define(`AES_SUBST_BYTE', `
movzb LREG($1),$6
movb ($5, $6),LREG($1)
- roll <$>8,$1
+ roll `$'8,$1
movzb LREG($2),$6
movb ($5, $6),LREG($2)
- roll <$>8,$2
+ roll `$'8,$2
movzb LREG($3),$6
movb ($5, $6),LREG($3)
- roll <$>8,$3
+ roll `$'8,$3
movzb LREG($4),$6
movb ($5, $6),LREG($4)
- roll <$>8,$4>)dnl
+ roll `$'8,$4')dnl
C x86_64/aesni/aes-decrypt-internal.asm
-ifelse(<
+ifelse(`
Copyright (C) 2015, 2018 Niels Möller
This file is part of GNU Nettle.
You should have received copies of the GNU General Public License and
the GNU Lesser General Public License along with this program. If
not, see http://www.gnu.org/licenses/.
->)
+')
C Input argument
-define(<ROUNDS>, <%rdi>)
-define(<KEYS>, <%rsi>)
-C define(<TABLE>, <%rdx>) C Unused here
-define(<LENGTH>,<%rcx>)
-define(<DST>, <%r8>)
-define(<SRC>, <%r9>)
-
-define(<KEY0>, <%xmm0>)
-define(<KEY1>, <%xmm1>)
-define(<KEY2>, <%xmm2>)
-define(<KEY3>, <%xmm3>)
-define(<KEY4>, <%xmm4>)
-define(<KEY5>, <%xmm5>)
-define(<KEY6>, <%xmm6>)
-define(<KEY7>, <%xmm7>)
-define(<KEY8>, <%xmm8>)
-define(<KEY9>, <%xmm9>)
-define(<KEY10>, <%xmm10>)
-define(<KEY11>, <%xmm11>)
-define(<KEY12>, <%xmm12>)
-define(<KEY13>, <%xmm13>)
-define(<KEYLAST>, <%xmm14>)
-define(<BLOCK>, <%xmm15>)
+define(`ROUNDS', `%rdi')
+define(`KEYS', `%rsi')
+C define(`TABLE', `%rdx') C Unused here
+define(`LENGTH',`%rcx')
+define(`DST', `%r8')
+define(`SRC', `%r9')
+
+define(`KEY0', `%xmm0')
+define(`KEY1', `%xmm1')
+define(`KEY2', `%xmm2')
+define(`KEY3', `%xmm3')
+define(`KEY4', `%xmm4')
+define(`KEY5', `%xmm5')
+define(`KEY6', `%xmm6')
+define(`KEY7', `%xmm7')
+define(`KEY8', `%xmm8')
+define(`KEY9', `%xmm9')
+define(`KEY10', `%xmm10')
+define(`KEY11', `%xmm11')
+define(`KEY12', `%xmm12')
+define(`KEY13', `%xmm13')
+define(`KEYLAST', `%xmm14')
+define(`BLOCK', `%xmm15')
.file "aes-decrypt-internal.asm"
C x86_64/aesni/aes-encrypt-internal.asm
-ifelse(<
+ifelse(`
Copyright (C) 2015, 2018 Niels Möller
This file is part of GNU Nettle.
You should have received copies of the GNU General Public License and
the GNU Lesser General Public License along with this program. If
not, see http://www.gnu.org/licenses/.
->)
+')
C Input argument
-define(<ROUNDS>, <%rdi>)
-define(<KEYS>, <%rsi>)
-C define(<TABLE>, <%rdx>) C Unused here
-define(<LENGTH>,<%rcx>)
-define(<DST>, <%r8>)
-define(<SRC>, <%r9>)
-
-define(<KEY0>, <%xmm0>)
-define(<KEY1>, <%xmm1>)
-define(<KEY2>, <%xmm2>)
-define(<KEY3>, <%xmm3>)
-define(<KEY4>, <%xmm4>)
-define(<KEY5>, <%xmm5>)
-define(<KEY6>, <%xmm6>)
-define(<KEY7>, <%xmm7>)
-define(<KEY8>, <%xmm8>)
-define(<KEY9>, <%xmm9>)
-define(<KEY10>, <%xmm10>)
-define(<KEY11>, <%xmm11>)
-define(<KEY12>, <%xmm12>)
-define(<KEY13>, <%xmm13>)
-define(<KEYLAST>, <%xmm14>)
-define(<BLOCK>, <%xmm15>)
+define(`ROUNDS', `%rdi')
+define(`KEYS', `%rsi')
+C define(`TABLE', `%rdx') C Unused here
+define(`LENGTH',`%rcx')
+define(`DST', `%r8')
+define(`SRC', `%r9')
+
+define(`KEY0', `%xmm0')
+define(`KEY1', `%xmm1')
+define(`KEY2', `%xmm2')
+define(`KEY3', `%xmm3')
+define(`KEY4', `%xmm4')
+define(`KEY5', `%xmm5')
+define(`KEY6', `%xmm6')
+define(`KEY7', `%xmm7')
+define(`KEY8', `%xmm8')
+define(`KEY9', `%xmm9')
+define(`KEY10', `%xmm10')
+define(`KEY11', `%xmm11')
+define(`KEY12', `%xmm12')
+define(`KEY13', `%xmm13')
+define(`KEYLAST', `%xmm14')
+define(`BLOCK', `%xmm15')
.file "aes-encrypt-internal.asm"
C x86_64/camellia-crypt-internal.asm
-ifelse(<
+ifelse(`
Copyright (C) 2010, Niels Möller
This file is part of GNU Nettle.
You should have received copies of the GNU General Public License and
the GNU Lesser General Public License along with this program. If
not, see http://www.gnu.org/licenses/.
->)
+')
C Performance, cycles per block
C
C Register usage:
-define(<NKEYS>, <%rdi>)
-define(<KEYS>, <%rsi>)
-define(<TABLE>, <%rdx>)
-define(<LENGTH>, <%rcx>)
-define(<DST>, <%r8>)
-define(<SRC>, <%r9>)
+define(`NKEYS', `%rdi')
+define(`KEYS', `%rsi')
+define(`TABLE', `%rdx')
+define(`LENGTH', `%rcx')
+define(`DST', `%r8')
+define(`SRC', `%r9')
C Camellia state
-define(<I0>, <%rax>)
-define(<I1>, <%rbx>) C callee-save
-define(<KEY>, <%r13>) C callee-save
-define(<TMP>, <%rbp>) C callee-save
-define(<CNT>, <%r10>)
-define(<IL>, <%r11>)
-define(<IR>, <%r12>) C callee-save
-
-define(<SP1110>, <(TABLE,$1,4)>)
-define(<SP0222>, <1024(TABLE,$1,4)>)
-define(<SP3033>, <2048(TABLE,$1,4)>)
-define(<SP4404>, <3072(TABLE,$1,4)>)
+define(`I0', `%rax')
+define(`I1', `%rbx') C callee-save
+define(`KEY', `%r13') C callee-save
+define(`TMP', `%rbp') C callee-save
+define(`CNT', `%r10')
+define(`IL', `%r11')
+define(`IR', `%r12') C callee-save
+
+define(`SP1110', `(TABLE,$1,4)')
+define(`SP0222', `1024(TABLE,$1,4)')
+define(`SP3033', `2048(TABLE,$1,4)')
+define(`SP4404', `3072(TABLE,$1,4)')
C ROUND(x, y, key-offset)
-define(<ROUND>, <
+define(`ROUND', `
C Byte 0,1
movzbl LREG($1), XREG(TMP)
movl SP1110(TMP), XREG(IR)
movzbl HREG($1), XREG(TMP)
xorl SP4404(TMP), XREG(IR)
- ror <$>32, $1
+ ror `$'32, $1
C Byte 4,5
movzbl LREG($1), XREG(TMP)
movl SP4404(TMP), XREG(IL)
movzbl HREG($1), XREG(TMP)
xorl SP3033(TMP), XREG(IL)
- rol <$>16, $1
+ rol `$'16, $1
C Byte 2,3
movzbl LREG($1), XREG(TMP)
xorl SP3033(TMP), XREG(IR)
movzbl HREG($1), XREG(TMP)
xorl SP0222(TMP), XREG(IR)
- ror <$>32, $1
+ ror `$'32, $1
C Byte 6,7
movzbl LREG($1), XREG(TMP)
xorl SP0222(TMP), XREG(IL)
movzbl HREG($1), XREG(TMP)
xorl SP1110(TMP), XREG(IL)
- ror <$>16, $1
+ ror `$'16, $1
C 76543210
xorl XREG(IL), XREG(IR)
- rorl <$>8, XREG(IL)
+ rorl `$'8, XREG(IL)
xorl XREG(IR), XREG(IL)
- shl <$>32, IR
+ shl `$'32, IR
or IL, IR
xor $3(KEY), $2
xor IR, $2
->)
+')
C FL(x, key-offset)
-define(<FL>, <
+define(`FL', `
mov $1, TMP
- shr <$>32, TMP
+ shr `$'32, TMP
andl $2 + 4(KEY), XREG(TMP)
- roll <$>1, XREG(TMP)
+ roll `$'1, XREG(TMP)
C xorl XREG(TMP), XREG($1)
xor TMP, $1
movl $2(KEY), XREG(TMP)
orl XREG($1), XREG(TMP)
- shl <$>32, TMP
+ shl `$'32, TMP
xor TMP, $1
->)
+')
C FLINV(x0, key-offset)
-define(<FLINV>, <
+define(`FLINV', `
movl $2(KEY), XREG(TMP)
orl XREG($1), XREG(TMP)
- shl <$>32, TMP
+ shl `$'32, TMP
xor TMP, $1
mov $1, TMP
- shr <$>32, TMP
+ shr `$'32, TMP
andl $2 + 4(KEY), XREG(TMP)
- roll <$>1, XREG(TMP)
+ roll `$'1, XREG(TMP)
C xorl XREG(TMP), XREG($1)
xor TMP, $1
->)
+')
.file "camellia-crypt-internal.asm"
C x86_64/chacha-core-internal.asm
-ifelse(<
+ifelse(`
Copyright (C) 2012, 2014 Niels Möller
This file is part of GNU Nettle.
You should have received copies of the GNU General Public License and
the GNU Lesser General Public License along with this program. If
not, see http://www.gnu.org/licenses/.
->)
+')
-define(<DST>, <%rdi>)
-define(<SRC>, <%rsi>)
-define(<COUNT>, <%rdx>)
-define(<X0>, <%xmm0>)
-define(<X1>, <%xmm1>)
-define(<X2>, <%xmm2>)
-define(<X3>, <%xmm3>)
-define(<T0>, <%xmm4>)
-define(<T1>, <%xmm5>)
+define(`DST', `%rdi')
+define(`SRC', `%rsi')
+define(`COUNT', `%rdx')
+define(`X0', `%xmm0')
+define(`X1', `%xmm1')
+define(`X2', `%xmm2')
+define(`X3', `%xmm3')
+define(`T0', `%xmm4')
+define(`T1', `%xmm5')
-define(<USE_PSHUFW>, <yes>)
+define(`USE_PSHUFW', `yes')
C ROTL_BY_16(REG, TMP)
-ifelse(USE_PSHUFW, <yes>, <
-define(<ROTL_BY_16>, <
- pshufhw <$>0xb1, $1, $1
- pshuflw <$>0xb1, $1, $1
->)>, <
-define(<ROTL_BY_16>, <
- pslld <$>16, $1
- psrld <$>16, $2
+ifelse(USE_PSHUFW, `yes', `
+define(`ROTL_BY_16', `
+ pshufhw `$'0xb1, $1, $1
+ pshuflw `$'0xb1, $1, $1
+')', `
+define(`ROTL_BY_16', `
+ pslld `$'16, $1
+ psrld `$'16, $2
por $2, $1
->)
->)
+')
+')
C QROUND(x0, x1, x2, x3)
-define(<QROUND>, <
+define(`QROUND', `
paddd $2, $1
pxor $1, $4
movaps $4, T0
paddd $4, $3
pxor $3, $2
movaps $2, T0
- pslld <$>12, $2
- psrld <$>20, T0
+ pslld `$'12, $2
+ psrld `$'20, T0
por T0, $2
paddd $2, $1
pxor $1, $4
movaps $4, T0
- pslld <$>8, $4
- psrld <$>24, T0
+ pslld `$'8, $4
+ psrld `$'24, T0
por T0, $4
paddd $4, $3
pxor $3, $2
movaps $2, T0
- pslld <$>7, $2
- psrld <$>25, T0
+ pslld `$'7, $2
+ psrld `$'25, T0
por T0, $2
->)
+')
C _chacha_core(uint32_t *dst, const uint32_t *src, unsigned rounds)
.text
C x86_64/ecc-25519-modp.asm
-ifelse(<
+ifelse(`
Copyright (C) 2014 Niels Möller
This file is part of GNU Nettle.
You should have received copies of the GNU General Public License and
the GNU Lesser General Public License along with this program. If
not, see http://www.gnu.org/licenses/.
->)
+')
.file "ecc-25519-modp.asm"
-define(<RP>, <%rsi>)
-define(<U0>, <%rdi>) C Overlaps unused modulo input
-define(<U1>, <%rcx>)
-define(<U2>, <%r8>)
-define(<U3>, <%r9>)
-define(<T0>, <%r10>)
-define(<T1>, <%r11>)
-define(<M>, <%rbx>)
+define(`RP', `%rsi')
+define(`U0', `%rdi') C Overlaps unused modulo input
+define(`U1', `%rcx')
+define(`U2', `%r8')
+define(`U3', `%r9')
+define(`T0', `%r10')
+define(`T1', `%r11')
+define(`M', `%rbx')
PROLOGUE(_nettle_ecc_curve25519_modp)
W64_ENTRY(2, 0)
C x86_64/ecc-curve448-modp.asm
-ifelse(<
+ifelse(`
Copyright (C) 2019 Niels Möller
This file is part of GNU Nettle.
You should have received copies of the GNU General Public License and
the GNU Lesser General Public License along with this program. If
not, see http://www.gnu.org/licenses/.
->)
+')
.file "ecc-curve448-modp.asm"
-define(<RP>, <%rsi>)
-define(<X0>, <%rax>)
-define(<X1>, <%rbx>)
-define(<X2>, <%rcx>)
-define(<X3>, <%rdx>)
-define(<X4>, <%rbp>)
-define(<X5>, <%rdi>)
-define(<X6>, <%r8>)
-define(<X7>, <%r9>)
-define(<T0>, <%r10>)
-define(<T1>, <%r11>)
-define(<T2>, <%r12>)
+define(`RP', `%rsi')
+define(`X0', `%rax')
+define(`X1', `%rbx')
+define(`X2', `%rcx')
+define(`X3', `%rdx')
+define(`X4', `%rbp')
+define(`X5', `%rdi')
+define(`X6', `%r8')
+define(`X7', `%r9')
+define(`T0', `%r10')
+define(`T1', `%r11')
+define(`T2', `%r12')
PROLOGUE(_nettle_ecc_curve448_modp)
W64_ENTRY(2, 0)
C x86_64/ecc-secp192r1-modp.asm
-ifelse(<
+ifelse(`
Copyright (C) 2013 Niels Möller
This file is part of GNU Nettle.
You should have received copies of the GNU General Public License and
the GNU Lesser General Public License along with this program. If
not, see http://www.gnu.org/licenses/.
->)
+')
.file "ecc-secp192r1-modp.asm"
-define(<RP>, <%rsi>)
-define(<T0>, <%rdi>) C Overlaps unused modulo input
-define(<T1>, <%rcx>)
-define(<T2>, <%rdx>)
-define(<T3>, <%r8>)
-define(<H>, <%r9>)
-define(<C1>, <%r10>)
-define(<C2>, <%r11>)
+define(`RP', `%rsi')
+define(`T0', `%rdi') C Overlaps unused modulo input
+define(`T1', `%rcx')
+define(`T2', `%rdx')
+define(`T3', `%r8')
+define(`H', `%r9')
+define(`C1', `%r10')
+define(`C2', `%r11')
C ecc_secp192r1_modp (const struct ecc_modulo *m, mp_limb_t *rp)
.text
C x86_64/ecc-secp224r1-modp.asm
-ifelse(<
+ifelse(`
Copyright (C) 2013 Niels Möller
This file is part of GNU Nettle.
You should have received copies of the GNU General Public License and
the GNU Lesser General Public License along with this program. If
not, see http://www.gnu.org/licenses/.
->)
+')
.file "ecc-secp224r1-modp.asm"
GMP_NUMB_BITS(64)
-define(<RP>, <%rsi>)
-define(<T0>, <%rdi>) C Overlaps unused modulo input
-define(<T1>, <%rcx>)
-define(<H0>, <%rax>)
-define(<H1>, <%rdx>)
-define(<H2>, <%r8>)
-define(<F0>, <%r9>)
-define(<F1>, <%r10>)
-define(<F2>, <%r11>)
+define(`RP', `%rsi')
+define(`T0', `%rdi') C Overlaps unused modulo input
+define(`T1', `%rcx')
+define(`H0', `%rax')
+define(`H1', `%rdx')
+define(`H2', `%r8')
+define(`F0', `%r9')
+define(`F1', `%r10')
+define(`F2', `%r11')
C ecc_secp224r1_modp (const struct ecc_modulo *m, mp_limb_t *rp)
PROLOGUE(_nettle_ecc_secp224r1_modp)
C x86_64/ecc-secp256r1-redc.asm
-ifelse(<
+ifelse(`
Copyright (C) 2013 Niels Möller
This file is part of GNU Nettle.
You should have received copies of the GNU General Public License and
the GNU Lesser General Public License along with this program. If
not, see http://www.gnu.org/licenses/.
->)
+')
.file "ecc-secp256r1-redc.asm"
-define(<RP>, <%rsi>)
-define(<U0>, <%rdi>) C Overlaps unused modulo input
-define(<U1>, <%rcx>)
-define(<U2>, <%rax>)
-define(<U3>, <%rdx>)
-define(<U4>, <%r8>)
-define(<U5>, <%r9>)
-define(<U6>, <%r10>)
-define(<F0>, <%r11>)
-define(<F1>, <%r12>)
-define(<F2>, <%rbx>)
-define(<F3>, <%rbp>)
+define(`RP', `%rsi')
+define(`U0', `%rdi') C Overlaps unused modulo input
+define(`U1', `%rcx')
+define(`U2', `%rax')
+define(`U3', `%rdx')
+define(`U4', `%r8')
+define(`U5', `%r9')
+define(`U6', `%r10')
+define(`F0', `%r11')
+define(`F1', `%r12')
+define(`F2', `%rbx')
+define(`F3', `%rbp')
C FOLD(x), sets (F3,F2,F1,F0) <-- (x << 224) - (x << 128) - (x<<32)
-define(<FOLD>, <
+define(`FOLD', `
mov $1, F2
mov $1, F3
- shl <$>32, F2
- shr <$>32, F3
+ shl `$'32, F2
+ shr `$'32, F3
xor F0,F0
xor F1,F1
sub F2, F0
sbb F3, F1
sbb $1, F2
- sbb <$>0, F3
->)
+ sbb `$'0, F3
+')
PROLOGUE(_nettle_ecc_secp256r1_redc)
W64_ENTRY(2, 0)
C save all registers that need to be saved
C x86_64/ecc-secp384r1-modp.asm
-ifelse(<
+ifelse(`
Copyright (C) 2013, 2015 Niels Möller
This file is part of GNU Nettle.
You should have received copies of the GNU General Public License and
the GNU Lesser General Public License along with this program. If
not, see http://www.gnu.org/licenses/.
->)
+')
.file "ecc-secp384r1-modp.asm"
-define(<RP>, <%rsi>)
-define(<D5>, <%rax>)
-define(<T0>, <%rbx>)
-define(<T1>, <%rcx>)
-define(<T2>, <%rdx>)
-define(<T3>, <%rbp>)
-define(<T4>, <%rdi>)
-define(<T5>, <%r8>)
-define(<H0>, <%r9>)
-define(<H1>, <%r10>)
-define(<H2>, <%r11>)
-define(<H3>, <%r12>)
-define(<H4>, <%r13>)
-define(<H5>, <%r14>)
-define(<C2>, <%r15>)
-define(<C0>, H5) C Overlap
-define(<TMP>, RP) C Overlap
+define(`RP', `%rsi')
+define(`D5', `%rax')
+define(`T0', `%rbx')
+define(`T1', `%rcx')
+define(`T2', `%rdx')
+define(`T3', `%rbp')
+define(`T4', `%rdi')
+define(`T5', `%r8')
+define(`H0', `%r9')
+define(`H1', `%r10')
+define(`H2', `%r11')
+define(`H3', `%r12')
+define(`H4', `%r13')
+define(`H5', `%r14')
+define(`C2', `%r15')
+define(`C0', H5) C Overlap
+define(`TMP', RP) C Overlap
PROLOGUE(_nettle_ecc_secp384r1_modp)
C x86_64/ecc-secp521r1-modp.asm
-ifelse(<
+ifelse(`
Copyright (C) 2013 Niels Möller
This file is part of GNU Nettle.
You should have received copies of the GNU General Public License and
the GNU Lesser General Public License along with this program. If
not, see http://www.gnu.org/licenses/.
->)
+')
.file "ecc-secp521r1-modp.asm"
GMP_NUMB_BITS(64)
-define(<RP>, <%rsi>)
-define(<U0>, <%rax>)
-define(<U1>, <%rbx>)
-define(<U2>, <%rcx>)
-define(<U3>, <%rdx>)
-define(<U4>, <%rbp>)
-define(<U5>, <%rdi>)
-define(<U6>, <%r8>)
-define(<U7>, <%r9>)
-define(<U8>, <%r10>)
-define(<U9>, <%r11>)
-define(<T0>, <%r12>)
-define(<T1>, <%r13>)
+define(`RP', `%rsi')
+define(`U0', `%rax')
+define(`U1', `%rbx')
+define(`U2', `%rcx')
+define(`U3', `%rdx')
+define(`U4', `%rbp')
+define(`U5', `%rdi')
+define(`U6', `%r8')
+define(`U7', `%r9')
+define(`U8', `%r10')
+define(`U9', `%r11')
+define(`T0', `%r12')
+define(`T1', `%r13')
PROLOGUE(_nettle_ecc_secp521r1_modp)
W64_ENTRY(2, 0)
C x86_64/fat/aes-decrypt-internal-2.asm
-ifelse(<
+ifelse(`
Copyright (C) 2015 Niels Möller
This file is part of GNU Nettle.
You should have received copies of the GNU General Public License and
the GNU Lesser General Public License along with this program. If
not, see http://www.gnu.org/licenses/.
->)
+')
-define(<fat_transform>, <$1_aesni>)
-include_src(<x86_64/aesni/aes-decrypt-internal.asm>)
+define(`fat_transform', `$1_aesni')
+include_src(`x86_64/aesni/aes-decrypt-internal.asm')
C x86_64/fat/aes-decrypt-internal.asm
-ifelse(<
+ifelse(`
Copyright (C) 2015 Niels Möller
This file is part of GNU Nettle.
You should have received copies of the GNU General Public License and
the GNU Lesser General Public License along with this program. If
not, see http://www.gnu.org/licenses/.
->)
+')
-define(<fat_transform>, <$1_x86_64>)
-include_src(<x86_64/aes-decrypt-internal.asm>)
+define(`fat_transform', `$1_x86_64')
+include_src(`x86_64/aes-decrypt-internal.asm')
C x86_64/fat/aes-encrypt-internal-2.asm
-ifelse(<
+ifelse(`
Copyright (C) 2015 Niels Möller
This file is part of GNU Nettle.
You should have received copies of the GNU General Public License and
the GNU Lesser General Public License along with this program. If
not, see http://www.gnu.org/licenses/.
->)
+')
-define(<fat_transform>, <$1_aesni>)
-include_src(<x86_64/aesni/aes-encrypt-internal.asm>)
+define(`fat_transform', `$1_aesni')
+include_src(`x86_64/aesni/aes-encrypt-internal.asm')
C x86_64/fat/aes-encrypt-internal.asm
-ifelse(<
+ifelse(`
Copyright (C) 2015 Niels Möller
This file is part of GNU Nettle.
You should have received copies of the GNU General Public License and
the GNU Lesser General Public License along with this program. If
not, see http://www.gnu.org/licenses/.
->)
+')
-define(<fat_transform>, <$1_x86_64>)
-include_src(<x86_64/aes-encrypt-internal.asm>)
+define(`fat_transform', `$1_x86_64')
+include_src(`x86_64/aes-encrypt-internal.asm')
C x86_64/fat/cpuid.asm
-ifelse(<
+ifelse(`
Copyright (C) 2015 Niels Möller
This file is part of GNU Nettle.
You should have received copies of the GNU General Public License and
the GNU Lesser General Public License along with this program. If
not, see http://www.gnu.org/licenses/.
->)
+')
C Input argument
C cpuid input: %edi
C x86_64/fat/memxor-2.asm
-ifelse(<
+ifelse(`
Copyright (C) 2015 Niels Möller
This file is part of GNU Nettle.
You should have received copies of the GNU General Public License and
the GNU Lesser General Public License along with this program. If
not, see http://www.gnu.org/licenses/.
->)
+')
-define(<fat_transform>, <_$1_sse2>)
-define(<USE_SSE2>, <yes>)
-include_src(<x86_64/memxor.asm>)
+define(`fat_transform', `_$1_sse2')
+define(`USE_SSE2', `yes')
+include_src(`x86_64/memxor.asm')
C x86_64/fat/memxor.asm
-ifelse(<
+ifelse(`
Copyright (C) 2015 Niels Möller
This file is part of GNU Nettle.
You should have received copies of the GNU General Public License and
the GNU Lesser General Public License along with this program. If
not, see http://www.gnu.org/licenses/.
->)
+')
-define(<fat_transform>, <_$1_x86_64>)
-include_src(<x86_64/memxor.asm>)
+define(`fat_transform', `_$1_x86_64')
+include_src(`x86_64/memxor.asm')
C x86_64/fat/sha1-compress-2.asm
-ifelse(<
+ifelse(`
Copyright (C) 2018 Niels Möller
This file is part of GNU Nettle.
You should have received copies of the GNU General Public License and
the GNU Lesser General Public License along with this program. If
not, see http://www.gnu.org/licenses/.
->)
+')
-define(<fat_transform>, <_$1_sha_ni>)
-include_src(<x86_64/sha_ni/sha1-compress.asm>)
+define(`fat_transform', `_$1_sha_ni')
+include_src(`x86_64/sha_ni/sha1-compress.asm')
C x86_64/fat/sha1-compress.asm
-ifelse(<
+ifelse(`
Copyright (C) 2018 Niels Möller
This file is part of GNU Nettle.
You should have received copies of the GNU General Public License and
the GNU Lesser General Public License along with this program. If
not, see http://www.gnu.org/licenses/.
->)
+')
-define(<fat_transform>, <_$1_x86_64>)
-include_src(<x86_64/sha1-compress.asm>)
+define(`fat_transform', `_$1_x86_64')
+include_src(`x86_64/sha1-compress.asm')
C x86_64/fat/sha256-compress-2.asm
-ifelse(<
+ifelse(`
Copyright (C) 2018 Niels Möller
This file is part of GNU Nettle.
You should have received copies of the GNU General Public License and
the GNU Lesser General Public License along with this program. If
not, see http://www.gnu.org/licenses/.
->)
+')
-define(<fat_transform>, <$1_sha_ni>)
-include_src(<x86_64/sha_ni/sha256-compress.asm>)
+define(`fat_transform', `$1_sha_ni')
+include_src(`x86_64/sha_ni/sha256-compress.asm')
C x86_64/gcm-hash8.asm
-ifelse(<
+ifelse(`
Copyright (C) 2013 Niels Möller
This file is part of GNU Nettle.
You should have received copies of the GNU General Public License and
the GNU Lesser General Public License along with this program. If
not, see http://www.gnu.org/licenses/.
->)
+')
C Register usage:
-define(<KEY>, <%rdi>)
-define(<XP>, <%rsi>)
-define(<LENGTH>, <%rdx>)
-define(<SRC>, <%rcx>)
-define(<X0>, <%rax>)
-define(<X1>, <%rbx>)
-define(<CNT>, <%ebp>)
-define(<T0>, <%r8>)
-define(<T1>, <%r9>)
-define(<T2>, <%r10>)
-define(<Z0>, <%r11>)
-define(<Z1>, <%r12>)
-define(<SHIFT_TABLE>, <%r13>)
+define(`KEY', `%rdi')
+define(`XP', `%rsi')
+define(`LENGTH', `%rdx')
+define(`SRC', `%rcx')
+define(`X0', `%rax')
+define(`X1', `%rbx')
+define(`CNT', `%ebp')
+define(`T0', `%r8')
+define(`T1', `%r9')
+define(`T2', `%r10')
+define(`Z0', `%r11')
+define(`Z1', `%r12')
+define(`SHIFT_TABLE', `%r13')
.file "gcm-hash8.asm"
ret
EPILOGUE(_nettle_gcm_hash8)
-define(<W>, <0x$2$1>)
+define(`W', `0x$2$1')
RODATA
ALIGN(2)
C NOTE: Sun/Oracle assembler doesn't support ".short".
C OFFSET(i)
C Expands to 4*i, or to the empty string if i is zero
-define(<OFFSET>, <ifelse($1,0,,eval(4*$1))>)
+define(`OFFSET', `ifelse($1,0,,eval(4*$1))')
C OFFSET64(i)
C Expands to 8*i, or to the empty string if i is zero
-define(<OFFSET64>, <ifelse($1,0,,eval(8*$1))>)
+define(`OFFSET64', `ifelse($1,0,,eval(8*$1))')
dnl LREG(reg) gives the 8-bit register corresponding to the given 64-bit register.
-define(<LREG>,<ifelse(
+define(`LREG',`ifelse(
$1, %rax, %al,
$1, %rbx, %bl,
$1, %rcx, %cl,
$1, %r12, %r12b,
$1, %r13, %r13b,
$1, %r14, %r14b,
- $1, %r15, %r15b)>)dnl
+ $1, %r15, %r15b)')dnl
-define(<HREG>,<ifelse(
+define(`HREG',`ifelse(
$1, %rax, %ah,
$1, %rbx, %bh,
$1, %rcx, %ch,
- $1, %rdx, %dh)>)dnl
+ $1, %rdx, %dh)')dnl
-define(<WREG>,<ifelse(
+define(`WREG',`ifelse(
$1, %rax, %ax,
$1, %rbx, %bx,
$1, %rcx, %cx,
$1, %r12, %r12w,
$1, %r13, %r13w,
$1, %r14, %r14w,
- $1, %r15, %r15w)>)dnl
+ $1, %r15, %r15w)')dnl
-define(<XREG>,<ifelse(
+define(`XREG',`ifelse(
$1, %rax, %eax,
$1, %rbx, %ebx,
$1, %rcx, %ecx,
$1, %r12, %r12d,
$1, %r13, %r13d,
$1, %r14, %r14d,
- $1, %r15, %r15d)>)dnl
+ $1, %r15, %r15d)')dnl
dnl W64_ENTRY(nargs, xmm_used)
-define(<W64_ENTRY>, <
- changequote([,])dnl
- ifelse(<<<<<<<<<<<<<<<<<< ignored; only for balancing)
- ifelse(W64_ABI,yes,[
+define(`W64_ENTRY', `
+ ifelse(W64_ABI,yes,`
dnl unconditionally push %rdi, making %rsp 16-byte aligned
push %rdi
dnl Save %xmm6, ..., if needed
- ifelse(eval($2 > 6), 1, [
- sub [$]eval(16*($2 - 6)), %rsp
+ ifelse(eval($2 > 6), 1, `
+ sub `$'eval(16*($2 - 6)), %rsp
movdqa %xmm6, 0(%rsp)
- ])
- ifelse(eval($2 > 7), 1, [
+ ')
+ ifelse(eval($2 > 7), 1, `
movdqa %xmm7, 16(%rsp)
- ])
- ifelse(eval($2 > 8), 1, [
+ ')
+ ifelse(eval($2 > 8), 1, `
movdqa %xmm8, 32(%rsp)
- ])
- ifelse(eval($2 > 9), 1, [
+ ')
+ ifelse(eval($2 > 9), 1, `
movdqa %xmm9, 48(%rsp)
- ])
- ifelse(eval($2 > 10), 1, [
+ ')
+ ifelse(eval($2 > 10), 1, `
movdqa %xmm10, 64(%rsp)
- ])
- ifelse(eval($2 > 11), 1, [
+ ')
+ ifelse(eval($2 > 11), 1, `
movdqa %xmm11, 80(%rsp)
- ])
- ifelse(eval($2 > 12), 1, [
+ ')
+ ifelse(eval($2 > 12), 1, `
movdqa %xmm12, 96(%rsp)
- ])
- ifelse(eval($2 > 13), 1, [
+ ')
+ ifelse(eval($2 > 13), 1, `
movdqa %xmm13, 112(%rsp)
- ])
- ifelse(eval($2 > 14), 1, [
+ ')
+ ifelse(eval($2 > 14), 1, `
movdqa %xmm14, 128(%rsp)
- ])
- ifelse(eval($2 > 15), 1, [
+ ')
+ ifelse(eval($2 > 15), 1, `
movdqa %xmm15, 144(%rsp)
- ])
+ ')
dnl Move around arguments
- ifelse(eval($1 >= 1), 1, [
+ ifelse(eval($1 >= 1), 1, `
mov %rcx, %rdi
- ])
- ifelse(eval($1 >= 2), 1, [
+ ')
+ ifelse(eval($1 >= 2), 1, `
dnl NOTE: Breaks 16-byte %rsp alignment
push %rsi
mov %rdx, %rsi
- ])
- ifelse(eval($1 >= 3), 1, [
+ ')
+ ifelse(eval($1 >= 3), 1, `
mov %r8, %rdx
- ])
- ifelse(eval($1 >= 4), 1, [
+ ')
+ ifelse(eval($1 >= 4), 1, `
mov %r9, %rcx
- ])
- ifelse(eval($1 >= 5), 1, [
+ ')
+ ifelse(eval($1 >= 5), 1, `
mov ifelse(eval($2 > 6), 1, eval(16*($2-6)+56),56)(%rsp), %r8
- ])
- ifelse(eval($1 >= 6), 1, [
+ ')
+ ifelse(eval($1 >= 6), 1, `
mov ifelse(eval($2 > 6), 1, eval(16*($2-6)+64),64)(%rsp), %r9
- ])
- ])
- changequote(<,>)dnl
->)
+ ')
+ ')
+')
dnl W64_EXIT(nargs, xmm_used)
-define(<W64_EXIT>, <
- changequote([,])dnl
- ifelse(<<<<<<<<<<< ignored; only for balancing)
- ifelse(W64_ABI,yes,[
- ifelse(eval($1 >= 2), 1, [
+define(`W64_EXIT', `
+ ifelse(W64_ABI,yes,`
+ ifelse(eval($1 >= 2), 1, `
pop %rsi
- ])
- ifelse(eval($2 > 15), 1, [
+ ')
+ ifelse(eval($2 > 15), 1, `
movdqa 144(%rsp), %xmm15
- ])
- ifelse(eval($2 > 14), 1, [
+ ')
+ ifelse(eval($2 > 14), 1, `
movdqa 128(%rsp), %xmm14
- ])
- ifelse(eval($2 > 13), 1, [
+ ')
+ ifelse(eval($2 > 13), 1, `
movdqa 112(%rsp), %xmm13
- ])
- ifelse(eval($2 > 12), 1, [
+ ')
+ ifelse(eval($2 > 12), 1, `
movdqa 96(%rsp), %xmm12
- ])
- ifelse(eval($2 > 11), 1, [
+ ')
+ ifelse(eval($2 > 11), 1, `
movdqa 80(%rsp), %xmm11
- ])
- ifelse(eval($2 > 10), 1, [
+ ')
+ ifelse(eval($2 > 10), 1, `
movdqa 64(%rsp), %xmm10
- ])
- ifelse(eval($2 > 9), 1, [
+ ')
+ ifelse(eval($2 > 9), 1, `
movdqa 48(%rsp), %xmm9
- ])
- ifelse(eval($2 > 8), 1, [
+ ')
+ ifelse(eval($2 > 8), 1, `
movdqa 32(%rsp), %xmm8
- ])
- ifelse(eval($2 > 7), 1, [
+ ')
+ ifelse(eval($2 > 7), 1, `
movdqa 16(%rsp), %xmm7
- ])
- ifelse(eval($2 > 6), 1, [
+ ')
+ ifelse(eval($2 > 6), 1, `
movdqa (%rsp), %xmm6
- add [$]eval(16*($2 - 6)), %rsp
- ])
+ add `$'eval(16*($2 - 6)), %rsp
+ ')
pop %rdi
- ])
- changequote(<,>)dnl
->)
+ ')
+')
C x86_64/md5-compress.asm
-ifelse(<
+ifelse(`
Copyright (C) 2005, 2013 Niels Möller
This file is part of GNU Nettle.
You should have received copies of the GNU General Public License and
the GNU Lesser General Public License along with this program. If
not, see http://www.gnu.org/licenses/.
->)
+')
C Registers:
-define(<STATE>, <%rdi>)
-define(<INPUT>, <%rsi>)
-define(<SA>,<%rax>)
-define(<SB>,<%rbx>)
-define(<SC>,<%rcx>)
-define(<SD>,<%rbp>)
-define(<TMP>, <%r8>)
+define(`STATE', `%rdi')
+define(`INPUT', `%rsi')
+define(`SA',`%rax')
+define(`SB',`%rbx')
+define(`SC',`%rcx')
+define(`SD',`%rbp')
+define(`TMP', `%r8')
C F1(x,y,z) = (z ^ (x & (y ^ z)))
-define(<F1>, <
+define(`F1', `
movl XREG($3), XREG(TMP)
xorl XREG($2), XREG(TMP)
andl XREG($1), XREG(TMP)
- xorl XREG($3), XREG(TMP)>)
+ xorl XREG($3), XREG(TMP)')
-define(<F2>,<F1($3, $1, $2)>)
+define(`F2',`F1($3, $1, $2)')
C F3(x,y,z) = x ^ y ^ z
-define(<F3>,<
+define(`F3',`
movl XREG($1), XREG(TMP)
xorl XREG($2), XREG(TMP)
- xorl XREG($3), XREG(TMP)>)
+ xorl XREG($3), XREG(TMP)')
C F4(x,y,z) = y ^ (x | ~z)
-define(<F4>,<
+define(`F4',`
movl XREG($3), XREG(TMP)
notl XREG(TMP)
orl XREG($1), XREG(TMP)
- xorl XREG($2), XREG(TMP)>)
+ xorl XREG($2), XREG(TMP)')
C Index to 4*i, or to the empty string if zero
-define(<REF>,<ifelse($1,0,,eval(4*$1))(INPUT)>)
+define(`REF',`ifelse($1,0,,eval(4*$1))(INPUT)')
C ROUND(f, w, x, y, z, k, data, s):
C w += f(x,y,z) + data + k
C w <<< s
C w += x
-define(<ROUND>,<
- addl <$>$7, XREG($2)
+define(`ROUND',`
+ addl `$'$7, XREG($2)
$1($3, $4, $5)
addl $6, XREG($2)
addl XREG(TMP), XREG($2)
- roll <$>$8, XREG($2)
- addl XREG($3), XREG($2)>)
+ roll `$'$8, XREG($2)
+ addl XREG($3), XREG($2)')
.file "md5-compress.asm"
movl 8(STATE), XREG(SC)
movl 12(STATE), XREG(SD)
- ROUND(<F1>, SA, SB, SC, SD, REF( 0), 0xd76aa478, 7)
- ROUND(<F1>, SD, SA, SB, SC, REF( 1), 0xe8c7b756, 12)
- ROUND(<F1>, SC, SD, SA, SB, REF( 2), 0x242070db, 17)
- ROUND(<F1>, SB, SC, SD, SA, REF( 3), 0xc1bdceee, 22)
- ROUND(<F1>, SA, SB, SC, SD, REF( 4), 0xf57c0faf, 7)
- ROUND(<F1>, SD, SA, SB, SC, REF( 5), 0x4787c62a, 12)
- ROUND(<F1>, SC, SD, SA, SB, REF( 6), 0xa8304613, 17)
- ROUND(<F1>, SB, SC, SD, SA, REF( 7), 0xfd469501, 22)
- ROUND(<F1>, SA, SB, SC, SD, REF( 8), 0x698098d8, 7)
- ROUND(<F1>, SD, SA, SB, SC, REF( 9), 0x8b44f7af, 12)
- ROUND(<F1>, SC, SD, SA, SB, REF(10), 0xffff5bb1, 17)
- ROUND(<F1>, SB, SC, SD, SA, REF(11), 0x895cd7be, 22)
- ROUND(<F1>, SA, SB, SC, SD, REF(12), 0x6b901122, 7)
- ROUND(<F1>, SD, SA, SB, SC, REF(13), 0xfd987193, 12)
- ROUND(<F1>, SC, SD, SA, SB, REF(14), 0xa679438e, 17)
- ROUND(<F1>, SB, SC, SD, SA, REF(15), 0x49b40821, 22)
-
- ROUND(<F2>, SA, SB, SC, SD, REF( 1), 0xf61e2562, 5)
- ROUND(<F2>, SD, SA, SB, SC, REF( 6), 0xc040b340, 9)
- ROUND(<F2>, SC, SD, SA, SB, REF(11), 0x265e5a51, 14)
- ROUND(<F2>, SB, SC, SD, SA, REF( 0), 0xe9b6c7aa, 20)
- ROUND(<F2>, SA, SB, SC, SD, REF( 5), 0xd62f105d, 5)
- ROUND(<F2>, SD, SA, SB, SC, REF(10), 0x02441453, 9)
- ROUND(<F2>, SC, SD, SA, SB, REF(15), 0xd8a1e681, 14)
- ROUND(<F2>, SB, SC, SD, SA, REF( 4), 0xe7d3fbc8, 20)
- ROUND(<F2>, SA, SB, SC, SD, REF( 9), 0x21e1cde6, 5)
- ROUND(<F2>, SD, SA, SB, SC, REF(14), 0xc33707d6, 9)
- ROUND(<F2>, SC, SD, SA, SB, REF( 3), 0xf4d50d87, 14)
- ROUND(<F2>, SB, SC, SD, SA, REF( 8), 0x455a14ed, 20)
- ROUND(<F2>, SA, SB, SC, SD, REF(13), 0xa9e3e905, 5)
- ROUND(<F2>, SD, SA, SB, SC, REF( 2), 0xfcefa3f8, 9)
- ROUND(<F2>, SC, SD, SA, SB, REF( 7), 0x676f02d9, 14)
- ROUND(<F2>, SB, SC, SD, SA, REF(12), 0x8d2a4c8a, 20)
-
- ROUND(<F3>, SA, SB, SC, SD, REF( 5), 0xfffa3942, 4)
- ROUND(<F3>, SD, SA, SB, SC, REF( 8), 0x8771f681, 11)
- ROUND(<F3>, SC, SD, SA, SB, REF(11), 0x6d9d6122, 16)
- ROUND(<F3>, SB, SC, SD, SA, REF(14), 0xfde5380c, 23)
- ROUND(<F3>, SA, SB, SC, SD, REF( 1), 0xa4beea44, 4)
- ROUND(<F3>, SD, SA, SB, SC, REF( 4), 0x4bdecfa9, 11)
- ROUND(<F3>, SC, SD, SA, SB, REF( 7), 0xf6bb4b60, 16)
- ROUND(<F3>, SB, SC, SD, SA, REF(10), 0xbebfbc70, 23)
- ROUND(<F3>, SA, SB, SC, SD, REF(13), 0x289b7ec6, 4)
- ROUND(<F3>, SD, SA, SB, SC, REF( 0), 0xeaa127fa, 11)
- ROUND(<F3>, SC, SD, SA, SB, REF( 3), 0xd4ef3085, 16)
- ROUND(<F3>, SB, SC, SD, SA, REF( 6), 0x04881d05, 23)
- ROUND(<F3>, SA, SB, SC, SD, REF( 9), 0xd9d4d039, 4)
- ROUND(<F3>, SD, SA, SB, SC, REF(12), 0xe6db99e5, 11)
- ROUND(<F3>, SC, SD, SA, SB, REF(15), 0x1fa27cf8, 16)
- ROUND(<F3>, SB, SC, SD, SA, REF( 2), 0xc4ac5665, 23)
-
- ROUND(<F4>, SA, SB, SC, SD, REF( 0), 0xf4292244, 6)
- ROUND(<F4>, SD, SA, SB, SC, REF( 7), 0x432aff97, 10)
- ROUND(<F4>, SC, SD, SA, SB, REF(14), 0xab9423a7, 15)
- ROUND(<F4>, SB, SC, SD, SA, REF( 5), 0xfc93a039, 21)
- ROUND(<F4>, SA, SB, SC, SD, REF(12), 0x655b59c3, 6)
- ROUND(<F4>, SD, SA, SB, SC, REF( 3), 0x8f0ccc92, 10)
- ROUND(<F4>, SC, SD, SA, SB, REF(10), 0xffeff47d, 15)
- ROUND(<F4>, SB, SC, SD, SA, REF( 1), 0x85845dd1, 21)
- ROUND(<F4>, SA, SB, SC, SD, REF( 8), 0x6fa87e4f, 6)
- ROUND(<F4>, SD, SA, SB, SC, REF(15), 0xfe2ce6e0, 10)
- ROUND(<F4>, SC, SD, SA, SB, REF( 6), 0xa3014314, 15)
- ROUND(<F4>, SB, SC, SD, SA, REF(13), 0x4e0811a1, 21)
- ROUND(<F4>, SA, SB, SC, SD, REF( 4), 0xf7537e82, 6)
- ROUND(<F4>, SD, SA, SB, SC, REF(11), 0xbd3af235, 10)
- ROUND(<F4>, SC, SD, SA, SB, REF( 2), 0x2ad7d2bb, 15)
- ROUND(<F4>, SB, SC, SD, SA, REF( 9), 0xeb86d391, 21)
+ ROUND(`F1', SA, SB, SC, SD, REF( 0), 0xd76aa478, 7)
+ ROUND(`F1', SD, SA, SB, SC, REF( 1), 0xe8c7b756, 12)
+ ROUND(`F1', SC, SD, SA, SB, REF( 2), 0x242070db, 17)
+ ROUND(`F1', SB, SC, SD, SA, REF( 3), 0xc1bdceee, 22)
+ ROUND(`F1', SA, SB, SC, SD, REF( 4), 0xf57c0faf, 7)
+ ROUND(`F1', SD, SA, SB, SC, REF( 5), 0x4787c62a, 12)
+ ROUND(`F1', SC, SD, SA, SB, REF( 6), 0xa8304613, 17)
+ ROUND(`F1', SB, SC, SD, SA, REF( 7), 0xfd469501, 22)
+ ROUND(`F1', SA, SB, SC, SD, REF( 8), 0x698098d8, 7)
+ ROUND(`F1', SD, SA, SB, SC, REF( 9), 0x8b44f7af, 12)
+ ROUND(`F1', SC, SD, SA, SB, REF(10), 0xffff5bb1, 17)
+ ROUND(`F1', SB, SC, SD, SA, REF(11), 0x895cd7be, 22)
+ ROUND(`F1', SA, SB, SC, SD, REF(12), 0x6b901122, 7)
+ ROUND(`F1', SD, SA, SB, SC, REF(13), 0xfd987193, 12)
+ ROUND(`F1', SC, SD, SA, SB, REF(14), 0xa679438e, 17)
+ ROUND(`F1', SB, SC, SD, SA, REF(15), 0x49b40821, 22)
+
+ ROUND(`F2', SA, SB, SC, SD, REF( 1), 0xf61e2562, 5)
+ ROUND(`F2', SD, SA, SB, SC, REF( 6), 0xc040b340, 9)
+ ROUND(`F2', SC, SD, SA, SB, REF(11), 0x265e5a51, 14)
+ ROUND(`F2', SB, SC, SD, SA, REF( 0), 0xe9b6c7aa, 20)
+ ROUND(`F2', SA, SB, SC, SD, REF( 5), 0xd62f105d, 5)
+ ROUND(`F2', SD, SA, SB, SC, REF(10), 0x02441453, 9)
+ ROUND(`F2', SC, SD, SA, SB, REF(15), 0xd8a1e681, 14)
+ ROUND(`F2', SB, SC, SD, SA, REF( 4), 0xe7d3fbc8, 20)
+ ROUND(`F2', SA, SB, SC, SD, REF( 9), 0x21e1cde6, 5)
+ ROUND(`F2', SD, SA, SB, SC, REF(14), 0xc33707d6, 9)
+ ROUND(`F2', SC, SD, SA, SB, REF( 3), 0xf4d50d87, 14)
+ ROUND(`F2', SB, SC, SD, SA, REF( 8), 0x455a14ed, 20)
+ ROUND(`F2', SA, SB, SC, SD, REF(13), 0xa9e3e905, 5)
+ ROUND(`F2', SD, SA, SB, SC, REF( 2), 0xfcefa3f8, 9)
+ ROUND(`F2', SC, SD, SA, SB, REF( 7), 0x676f02d9, 14)
+ ROUND(`F2', SB, SC, SD, SA, REF(12), 0x8d2a4c8a, 20)
+
+ ROUND(`F3', SA, SB, SC, SD, REF( 5), 0xfffa3942, 4)
+ ROUND(`F3', SD, SA, SB, SC, REF( 8), 0x8771f681, 11)
+ ROUND(`F3', SC, SD, SA, SB, REF(11), 0x6d9d6122, 16)
+ ROUND(`F3', SB, SC, SD, SA, REF(14), 0xfde5380c, 23)
+ ROUND(`F3', SA, SB, SC, SD, REF( 1), 0xa4beea44, 4)
+ ROUND(`F3', SD, SA, SB, SC, REF( 4), 0x4bdecfa9, 11)
+ ROUND(`F3', SC, SD, SA, SB, REF( 7), 0xf6bb4b60, 16)
+ ROUND(`F3', SB, SC, SD, SA, REF(10), 0xbebfbc70, 23)
+ ROUND(`F3', SA, SB, SC, SD, REF(13), 0x289b7ec6, 4)
+ ROUND(`F3', SD, SA, SB, SC, REF( 0), 0xeaa127fa, 11)
+ ROUND(`F3', SC, SD, SA, SB, REF( 3), 0xd4ef3085, 16)
+ ROUND(`F3', SB, SC, SD, SA, REF( 6), 0x04881d05, 23)
+ ROUND(`F3', SA, SB, SC, SD, REF( 9), 0xd9d4d039, 4)
+ ROUND(`F3', SD, SA, SB, SC, REF(12), 0xe6db99e5, 11)
+ ROUND(`F3', SC, SD, SA, SB, REF(15), 0x1fa27cf8, 16)
+ ROUND(`F3', SB, SC, SD, SA, REF( 2), 0xc4ac5665, 23)
+
+ ROUND(`F4', SA, SB, SC, SD, REF( 0), 0xf4292244, 6)
+ ROUND(`F4', SD, SA, SB, SC, REF( 7), 0x432aff97, 10)
+ ROUND(`F4', SC, SD, SA, SB, REF(14), 0xab9423a7, 15)
+ ROUND(`F4', SB, SC, SD, SA, REF( 5), 0xfc93a039, 21)
+ ROUND(`F4', SA, SB, SC, SD, REF(12), 0x655b59c3, 6)
+ ROUND(`F4', SD, SA, SB, SC, REF( 3), 0x8f0ccc92, 10)
+ ROUND(`F4', SC, SD, SA, SB, REF(10), 0xffeff47d, 15)
+ ROUND(`F4', SB, SC, SD, SA, REF( 1), 0x85845dd1, 21)
+ ROUND(`F4', SA, SB, SC, SD, REF( 8), 0x6fa87e4f, 6)
+ ROUND(`F4', SD, SA, SB, SC, REF(15), 0xfe2ce6e0, 10)
+ ROUND(`F4', SC, SD, SA, SB, REF( 6), 0xa3014314, 15)
+ ROUND(`F4', SB, SC, SD, SA, REF(13), 0x4e0811a1, 21)
+ ROUND(`F4', SA, SB, SC, SD, REF( 4), 0xf7537e82, 6)
+ ROUND(`F4', SD, SA, SB, SC, REF(11), 0xbd3af235, 10)
+ ROUND(`F4', SC, SD, SA, SB, REF( 2), 0x2ad7d2bb, 15)
+ ROUND(`F4', SB, SC, SD, SA, REF( 9), 0xeb86d391, 21)
C Update the state vector
addl XREG(SA), (STATE)
C x86_64/memxor.asm
-ifelse(<
+ifelse(`
Copyright (C) 2010, 2014, Niels Möller
This file is part of GNU Nettle.
You should have received copies of the GNU General Public License and
the GNU Lesser General Public License along with this program. If
not, see http://www.gnu.org/licenses/.
->)
+')
C Register usage:
-define(<DST>, <%rax>) C Originally in %rdi
-define(<SRC>, <%rsi>)
-define(<N>, <%rdx>)
-define(<TMP>, <%r8>)
-define(<TMP2>, <%r9>)
-define(<CNT>, <%rdi>)
-define(<S0>, <%r11>)
-define(<S1>, <%rdi>) C Overlaps with CNT
+define(`DST', `%rax') C Originally in %rdi
+define(`SRC', `%rsi')
+define(`N', `%rdx')
+define(`TMP', `%r8')
+define(`TMP2', `%r9')
+define(`CNT', `%rdi')
+define(`S0', `%r11')
+define(`S1', `%rdi') C Overlaps with CNT
-define(<USE_SSE2>, <no>)
+define(`USE_SSE2', `no')
.file "memxor.asm"
jnz .Lalign_loop
.Laligned:
-ifdef(<USE_SSE2>, <
+ifdef(`USE_SSE2', `
cmp $16, N
jnc .Lsse2_case
->)
+')
C Next destination word is -8(DST, N)
C Setup for unrolling
W64_EXIT(3, 0)
ret
-ifdef(<USE_SSE2>, <
+ifdef(`USE_SSE2', `
.Lsse2_case:
lea (DST, N), TMP
W64_EXIT(3, 0)
ret
->)
+')
EPILOGUE(nettle_memxor)
C x86_64/memxor3.asm
-ifelse(<
+ifelse(`
Copyright (C) 2010, 2014 Niels Möller
This file is part of GNU Nettle.
You should have received copies of the GNU General Public License and
the GNU Lesser General Public License along with this program. If
not, see http://www.gnu.org/licenses/.
->)
+')
C Register usage:
-define(<DST>, <%rax>) C Originally in %rdi
-define(<AP>, <%rsi>)
-define(<BP>, <%rdx>)
-define(<N>, <%r10>)
-define(<TMP>, <%r8>)
-define(<TMP2>, <%r9>)
-define(<CNT>, <%rdi>)
-define(<S0>, <%r11>)
-define(<S1>, <%rdi>) C Overlaps with CNT
-
-define(<USE_SSE2>, <no>)
+define(`DST', `%rax') C Originally in %rdi
+define(`AP', `%rsi')
+define(`BP', `%rdx')
+define(`N', `%r10')
+define(`TMP', `%r8')
+define(`TMP2', `%r9')
+define(`CNT', `%rdi')
+define(`S0', `%r11')
+define(`S1', `%rdi') C Overlaps with CNT
+
+define(`USE_SSE2', `no')
.file "memxor3.asm"
jnz .Lalign_loop
.Laligned:
-ifelse(USE_SSE2, yes, <
+ifelse(USE_SSE2, yes, `
cmp $16, N
jnc .Lsse2_case
->)
+')
C Check for the case that AP and BP have the same alignment,
C but different from DST.
mov AP, TMP
W64_EXIT(4, 0)
ret
-ifelse(USE_SSE2, yes, <
+ifelse(USE_SSE2, yes, `
.Lsse2_case:
lea (DST, N), TMP
C ENTRY might have been 3 args, too, but it doesn't matter for the exit
W64_EXIT(4, 0)
ret
->)
+')
EPILOGUE(nettle_memxor3)
C x86_64/poly1305-internal.asm
-ifelse(<
+ifelse(`
Copyright (C) 2013 Niels Möller
This file is part of GNU Nettle.
You should have received copies of the GNU General Public License and
the GNU Lesser General Public License along with this program. If
not, see http://www.gnu.org/licenses/.
->)
+')
.file "poly1305-internal.asm"
C Registers mainly used by poly1305_block
-define(<CTX>, <%rdi>)
-define(<T0>, <%rcx>)
-define(<T1>, <%rsi>)
-define(<T2>, <%r8>)
-define(<H0>, <%r9>)
-define(<H1>, <%r10>)
-define(<H2>, <%r11>)
+define(`CTX', `%rdi')
+define(`T0', `%rcx')
+define(`T1', `%rsi')
+define(`T2', `%r8')
+define(`H0', `%r9')
+define(`H1', `%r10')
+define(`H2', `%r11')
C _poly1305_set_key(struct poly1305_ctx *ctx, const uint8_t key[16])
.text
adc $0, XREG(H2)
C Use %rax instead of %rsi
-define(<T1>, <%rax>)
+define(`T1', `%rax')
C Add 5, use result if >= 2^130
mov $5, T0
xor T1, T1
C x86_64/salsa20-2core.asm
-ifelse(<
+ifelse(`
Copyright (C) 2012, 2020 Niels Möller
This file is part of GNU Nettle.
You should have received copies of the GNU General Public License and
the GNU Lesser General Public License along with this program. If
not, see http://www.gnu.org/licenses/.
->)
+')
-define(<DST>, <%rdi>)
-define(<SRC>, <%rsi>)
-define(<COUNT>, <%rdx>)
+define(`DST', `%rdi')
+define(`SRC', `%rsi')
+define(`COUNT', `%rdx')
C State, even elements in X, odd elements in Y
-define(<X0>, <%xmm0>)
-define(<X1>, <%xmm1>)
-define(<X2>, <%xmm2>)
-define(<X3>, <%xmm3>)
-define(<Y0>, <%xmm4>)
-define(<Y1>, <%xmm5>)
-define(<Y2>, <%xmm6>)
-define(<Y3>, <%xmm7>)
-
-define(<T0>, <%xmm8>)
-define(<T1>, <%xmm9>)
-define(<T2>, <%xmm10>)
-define(<T3>, <%xmm11>)
-
-define(<M0011>, <%xmm12>)
-
-include_src(<x86_64/salsa20.m4>)
+define(`X0', `%xmm0')
+define(`X1', `%xmm1')
+define(`X2', `%xmm2')
+define(`X3', `%xmm3')
+define(`Y0', `%xmm4')
+define(`Y1', `%xmm5')
+define(`Y2', `%xmm6')
+define(`Y3', `%xmm7')
+
+define(`T0', `%xmm8')
+define(`T1', `%xmm9')
+define(`T2', `%xmm10')
+define(`T3', `%xmm11')
+
+define(`M0011', `%xmm12')
+
+include_src(`x86_64/salsa20.m4')
.text
ALIGN(16)
C x86_64/salsa20-core-internal.asm
-ifelse(<
+ifelse(`
Copyright (C) 2012 Niels Möller
This file is part of GNU Nettle.
You should have received copies of the GNU General Public License and
the GNU Lesser General Public License along with this program. If
not, see http://www.gnu.org/licenses/.
->)
-
-define(<DST>, <%rdi>)
-define(<SRC>, <%rsi>)
-define(<COUNT>, <%rdx>)
-define(<X0>, <%xmm0>)
-define(<X1>, <%xmm1>)
-define(<X2>, <%xmm2>)
-define(<X3>, <%xmm3>)
-define(<T0>, <%xmm4>)
-define(<T1>, <%xmm5>)
-define(<M0101>, <%xmm6>)
-define(<M0110>, <%xmm7>)
-define(<M0011>, <%xmm8>)
-
-include_src(<x86_64/salsa20.m4>)
+')
+
+define(`DST', `%rdi')
+define(`SRC', `%rsi')
+define(`COUNT', `%rdx')
+define(`X0', `%xmm0')
+define(`X1', `%xmm1')
+define(`X2', `%xmm2')
+define(`X3', `%xmm3')
+define(`T0', `%xmm4')
+define(`T1', `%xmm5')
+define(`M0101', `%xmm6')
+define(`M0110', `%xmm7')
+define(`M0011', `%xmm8')
+
+include_src(`x86_64/salsa20.m4')
C _salsa20_core(uint32_t *dst, const uint32_t *src, unsigned rounds)
.text
C Needs T0 and T1
C QROUND(x0, x1, x2, x3)
-define(<QROUND>, <
+define(`QROUND', `
movaps $4, T0 C 0
paddd $1, T0 C 1
movaps T0, T1 C 2
- pslld <$>7, T0 C 2
- psrld <$>25, T1 C 3
+ pslld `$'7, T0 C 2
+ psrld `$'25, T1 C 3
pxor T0, $2 C 3
pxor T1, $2 C 4
movaps $1, T0 C 0
paddd $2, T0 C 5
movaps T0, T1 C 6
- pslld <$>9, T0 C 6
- psrld <$>23, T1 C 7
+ pslld `$'9, T0 C 6
+ psrld `$'23, T1 C 7
pxor T0, $3 C 7
pxor T1, $3 C 8
movaps $2, T0 C 0
paddd $3, T0 C 9
movaps T0, T1 C 10
- pslld <$>13, T0 C 10
- psrld <$>19, T1 C 11
+ pslld `$'13, T0 C 10
+ psrld `$'19, T1 C 11
pxor T0, $4 C 11
pxor T1, $4 C 12
movaps $3, T0 C 0
paddd $4, T0 C 13
movaps T0, T1 C 14
- pslld <$>18, T0 C 14
- psrld <$>14, T1 C 15
+ pslld `$'18, T0 C 14
+ psrld `$'14, T1 C 15
pxor T0, $1 C 15
pxor T1, $1 C 16
->)
+')
C SWAP(x0, x1, mask)
C Swaps bits in x0 and x1, with bits selected by the mask
-define(<SWAP>, <
+define(`SWAP', `
movaps $1, T0
pxor $2, $1
pand $3, $1
pxor $1, $2
pxor T0, $1
->)
+')
C x86_64/serpent-decrypt.asm
-ifelse(<
+ifelse(`
Copyright (C) 2011 Niels Möller
This file is part of GNU Nettle.
You should have received copies of the GNU General Public License and
the GNU Lesser General Public License along with this program. If
not, see http://www.gnu.org/licenses/.
->)
+')
-include_src(<x86_64/serpent.m4>)
+include_src(`x86_64/serpent.m4')
C Register usage:
C Single block serpent state, two copies
-define(<x0>, <%eax>)
-define(<x1>, <%ebx>)
-define(<x2>, <%ebp>)
-define(<x3>, <%r8d>)
+define(`x0', `%eax')
+define(`x1', `%ebx')
+define(`x2', `%ebp')
+define(`x3', `%r8d')
-define(<y0>, <%r9d>)
-define(<y1>, <%r10d>)
-define(<y2>, <%r11d>)
-define(<y3>, <%r12d>)
+define(`y0', `%r9d')
+define(`y1', `%r10d')
+define(`y2', `%r11d')
+define(`y3', `%r12d')
C Quadruple block serpent state, two copies
-define(<X0>, <%xmm0>)
-define(<X1>, <%xmm1>)
-define(<X2>, <%xmm2>)
-define(<X3>, <%xmm3>)
-
-define(<Y0>, <%xmm4>)
-define(<Y1>, <%xmm5>)
-define(<Y2>, <%xmm6>)
-define(<Y3>, <%xmm7>)
-
-define(<MINUS1>, <%xmm8>)
-define(<T0>, <%xmm9>)
-define(<T1>, <%xmm10>)
-define(<T2>, <%xmm11>)
-define(<T3>, <%xmm12>)
+define(`X0', `%xmm0')
+define(`X1', `%xmm1')
+define(`X2', `%xmm2')
+define(`X3', `%xmm3')
+
+define(`Y0', `%xmm4')
+define(`Y1', `%xmm5')
+define(`Y2', `%xmm6')
+define(`Y3', `%xmm7')
+
+define(`MINUS1', `%xmm8')
+define(`T0', `%xmm9')
+define(`T1', `%xmm10')
+define(`T2', `%xmm11')
+define(`T3', `%xmm12')
C Arguments
-define(<CTX>, <%rdi>)
-define(<N>, <%rsi>)
-define(<DST>, <%rdx>)
-define(<SRC>, <%rcx>)
+define(`CTX', `%rdi')
+define(`N', `%rsi')
+define(`DST', `%rdx')
+define(`SRC', `%rcx')
-define(<CNT>, <%r13>)
-define(<TMP32>, <%r14d>)
+define(`CNT', `%r13')
+define(`TMP32', `%r14d')
C SBOX macros. Inputs $1 - $4 (destroyed), outputs $5 - $8
-define(<SBOX0I>, <
+define(`SBOX0I', `
mov $1, $5
xor $3, $5
mov $1, $7
xor $4, $8
or $4, $2
xor $2, $5
->)
+')
-define(<SBOX1I>, <
+define(`SBOX1I', `
mov $2, $6
or $4, $6
xor $3, $6
xor $3, $5
or $7, $1
xor $1, $5
->)
+')
-define(<SBOX2I>, <
+define(`SBOX2I', `
mov $1, $5
xor $4, $5
mov $3, $7
mov $5, $7
xor $6, $7
xor $3, $7
->)
+')
-define(<SBOX3I>, <
+define(`SBOX3I', `
mov $3, $8
or $4, $8
mov $2, $5
and $7, $1
or $2, $1
xor $1, $8
->)
+')
-define(<SBOX4I>, <
+define(`SBOX4I', `
mov $3, $6
xor $4, $6
mov $3, $7
xor $1, $5
xor $2, $1
xor $1, $7
->)
+')
-define(<SBOX5I>, <
+define(`SBOX5I', `
mov $1, $6
and $4, $6
mov $3, $8
not $2
or $1, $2
xor $2, $8
->)
+')
-define(<SBOX6I>, <
+define(`SBOX6I', `
mov $1, $7
xor $3, $7
not $3
and $5, $2
xor $2, $7
xor $4, $7
->)
+')
-define(<SBOX7I>, <
+define(`SBOX7I', `
mov $1, $8
and $2, $8
mov $2, $7
xor $1, $6
or $6, $4
xor $4, $5
->)
+')
-define(<LTI>, <
- rol <$>10, $3
- rol <$>27, $1
+define(`LTI', `
+ rol `$'10, $3
+ rol `$'27, $1
mov $2, TMP32
- shl <$>7, TMP32
+ shl `$'7, TMP32
xor $4, $3
xor TMP32, $3
xor $2, $1
xor $4, $1
- rol <$>25, $4
- rol <$>31, $2
+ rol `$'25, $4
+ rol `$'31, $2
mov $1, TMP32
- shl <$>3, TMP32
+ shl `$'3, TMP32
xor $3, $4
xor TMP32, $4
xor $1, $2
xor $3, $2
- rol <$>29, $3
- rol <$>19, $1
->)
+ rol `$'29, $3
+ rol `$'19, $1
+')
-define(<PNOT>, <
+define(`PNOT', `
pxor MINUS1, $1
->)
+')
-define(<WSBOX0I>, <
+define(`WSBOX0I', `
movdqa $1, $5
pxor $3, $5
movdqa $1, $7
pxor $4, $8
por $4, $2
pxor $2, $5
->)
+')
-define(<WSBOX1I>, <
+define(`WSBOX1I', `
movdqa $2, $6
por $4, $6
pxor $3, $6
pxor $3, $5
por $7, $1
pxor $1, $5
->)
+')
-define(<WSBOX2I>, <
+define(`WSBOX2I', `
movdqa $1, $5
pxor $4, $5
movdqa $3, $7
movdqa $5, $7
pxor $6, $7
pxor $3, $7
->)
+')
-define(<WSBOX3I>, <
+define(`WSBOX3I', `
movdqa $3, $8
por $4, $8
movdqa $2, $5
pand $7, $1
por $2, $1
pxor $1, $8
->)
+')
-define(<WSBOX4I>, <
+define(`WSBOX4I', `
movdqa $3, $6
pxor $4, $6
movdqa $3, $7
pxor $1, $5
pxor $2, $1
pxor $1, $7
->)
+')
-define(<WSBOX5I>, <
+define(`WSBOX5I', `
movdqa $1, $6
pand $4, $6
movdqa $3, $8
PNOT($2)
por $1, $2
pxor $2, $8
->)
+')
-define(<WSBOX6I>, <
+define(`WSBOX6I', `
movdqa $1, $7
pxor $3, $7
PNOT($3)
pand $5, $2
pxor $2, $7
pxor $4, $7
->)
+')
-define(<WSBOX7I>, <
+define(`WSBOX7I', `
movdqa $1, $8
pand $2, $8
movdqa $2, $7
pxor $1, $6
por $6, $4
pxor $4, $5
->)
+')
-define(<WLTI>, <
+define(`WLTI', `
WROL(10, $3)
WROL(27, $1)
movdqa $2, T0
- pslld <$>7, T0
+ pslld `$'7, T0
pxor $4, $3
pxor T0, $3
pxor $2, $1
WROL(25, $4)
WROL(31, $2)
movdqa $1, T0
- pslld <$>3, T0
+ pslld `$'3, T0
pxor $3, $4
pxor T0, $4
pxor $1, $2
pxor $3, $2
WROL(29, $3)
WROL(19, $1)
->)
+')
.file "serpent-decrypt.asm"
C x86_64/serpent-encrypt.asm
-ifelse(<
+ifelse(`
Copyright (C) 2011 Niels Möller
This file is part of GNU Nettle.
You should have received copies of the GNU General Public License and
the GNU Lesser General Public License along with this program. If
not, see http://www.gnu.org/licenses/.
->)
+')
-include_src(<x86_64/serpent.m4>)
+include_src(`x86_64/serpent.m4')
C Register usage:
C Single block serpent state, two copies
-define(<x0>, <%eax>)
-define(<x1>, <%ebx>)
-define(<x2>, <%ebp>)
-define(<x3>, <%r8d>)
+define(`x0', `%eax')
+define(`x1', `%ebx')
+define(`x2', `%ebp')
+define(`x3', `%r8d')
-define(<y0>, <%r9d>)
-define(<y1>, <%r10d>)
-define(<y2>, <%r11d>)
-define(<y3>, <%r12d>)
+define(`y0', `%r9d')
+define(`y1', `%r10d')
+define(`y2', `%r11d')
+define(`y3', `%r12d')
C Quadruple block serpent state, two copies
-define(<X0>, <%xmm0>)
-define(<X1>, <%xmm1>)
-define(<X2>, <%xmm2>)
-define(<X3>, <%xmm3>)
-
-define(<Y0>, <%xmm4>)
-define(<Y1>, <%xmm5>)
-define(<Y2>, <%xmm6>)
-define(<Y3>, <%xmm7>)
-
-define(<MINUS1>, <%xmm8>)
-define(<T0>, <%xmm9>)
-define(<T1>, <%xmm10>)
-define(<T2>, <%xmm11>)
-define(<T3>, <%xmm12>)
+define(`X0', `%xmm0')
+define(`X1', `%xmm1')
+define(`X2', `%xmm2')
+define(`X3', `%xmm3')
+
+define(`Y0', `%xmm4')
+define(`Y1', `%xmm5')
+define(`Y2', `%xmm6')
+define(`Y3', `%xmm7')
+
+define(`MINUS1', `%xmm8')
+define(`T0', `%xmm9')
+define(`T1', `%xmm10')
+define(`T2', `%xmm11')
+define(`T3', `%xmm12')
C Arguments
-define(<CTX>, <%rdi>)
-define(<N>, <%rsi>)
-define(<DST>, <%rdx>)
-define(<SRC>, <%rcx>)
+define(`CTX', `%rdi')
+define(`N', `%rsi')
+define(`DST', `%rdx')
+define(`SRC', `%rcx')
-define(<CNT>, <%r13>)
-define(<TMP32>, <%r14d>)
+define(`CNT', `%r13')
+define(`TMP32', `%r14d')
C SBOX macros. Inputs $1 - $4 (destroyed), outputs $5 - $8
-define(<SBOX0>, <
+define(`SBOX0', `
mov $2, $8 C y3 = x1 ^ x2
xor $3, $8
mov $1, $5 C y0 = x0 | x3
mov $5, $6 C y1 = y0 ^ x1
xor $2, $6
xor $4, $6 C y1 ^= x3
->)
+')
-define(<SBOX1>, <
+define(`SBOX1', `
mov $1, $6 C y1 = x0 | x3
or $4, $6
mov $3, $7 C y2 = x2 ^ x3
not $8 C y3 = ~y3
and $4, $5 C y0 &= x3
xor $3, $5 C y0 ^= x2
->)
+')
-define(<SBOX2>, <
+define(`SBOX2', `
mov $1, $7 C y2 = x1 | x2
or $3, $7
mov $1, $6
xor $2, $7
not $8
xor $4, $7
->)
+')
-define(<SBOX3>, <
+define(`SBOX3', `
mov $1, $6
xor $3, $6
mov $1, $5
and $4, $2
mov $1, $5
xor $2, $5
->)
+')
-define(<SBOX4>, <
+define(`SBOX4', `
mov $1, $8
or $2, $8
mov $2, $7
and $4, $5
xor $3, $5
not $5
->)
+')
-define(<SBOX5>, <
+define(`SBOX5', `
mov $2, $5
or $4, $5
xor $3, $5
xor $7, $6
or $4, $7
xor $2, $7
->)
+')
-define(<SBOX6>, <
+define(`SBOX6', `
mov $1, $5
xor $4, $5
mov $1, $6
not $7
xor $7, $5
xor $1, $5
->)
+')
-define(<SBOX7>, <
+define(`SBOX7', `
mov $1, $5
and $3, $5
mov $2, $8
not $4 C t02
or $4, $5
xor $3, $5
->)
+')
-define(<LT>, <
- rol <$>13, $1
- rol <$>3, $3
+define(`LT', `
+ rol `$'13, $1
+ rol `$'3, $3
xor $1, $2
xor $3, $2
mov $1, TMP32
- shl <$>3, TMP32
+ shl `$'3, TMP32
xor $3, $4
xor TMP32, $4
rol $2
- rol <$>7, $4
+ rol `$'7, $4
xor $2, $1
xor $4, $1
mov $2, TMP32
- shl <$>7, TMP32
+ shl `$'7, TMP32
xor $4, $3
xor TMP32, $3
- rol <$>5, $1
- rol <$>22, $3
->)
+ rol `$'5, $1
+ rol `$'22, $3
+')
C Parallel operation on four blocks at a time.
C pnot instruction is missing. For lack of a spare register, XOR with
C constant in memory.
-define(<PNOT>, <
+define(`PNOT', `
pxor MINUS1, $1
->)
+')
-define(<WSBOX0>, <
+define(`WSBOX0', `
movdqa $2, $8 C y3 = x1 ^ x2
pxor $3, $8
movdqa $1, $5 C y0 = x0 | x3
movdqa $5, $6 C y1 = y0 ^ x1
pxor $2, $6
pxor $4, $6 C y1 ^= x3
->)
+')
-define(<WSBOX1>, <
+define(`WSBOX1', `
movdqa $1, $6 C y1 = x0 | x3
por $4, $6
movdqa $3, $7 C y2 = x2 ^ x3
PNOT($8) C y3 = ~y3
pand $4, $5 C y0 &= x3
pxor $3, $5 C y0 ^= x2
->)
+')
-define(<WSBOX2>, <
+define(`WSBOX2', `
movdqa $1, $7 C y2 = x1 | x2
por $3, $7
movdqa $1, $6
pxor $2, $7
PNOT($8)
pxor $4, $7
->)
+')
-define(<WSBOX3>, <
+define(`WSBOX3', `
movdqa $1, $6
pxor $3, $6
movdqa $1, $5
pand $4, $2
movdqa $1, $5
pxor $2, $5
->)
+')
-define(<WSBOX4>, <
+define(`WSBOX4', `
movdqa $1, $8
por $2, $8
movdqa $2, $7
pand $4, $5
pxor $3, $5
PNOT($5)
->)
+')
-define(<WSBOX5>, <
+define(`WSBOX5', `
movdqa $2, $5
por $4, $5
pxor $3, $5
pxor $7, $6
por $4, $7
pxor $2, $7
->)
+')
-define(<WSBOX6>, <
+define(`WSBOX6', `
movdqa $1, $5
pxor $4, $5
movdqa $1, $6
PNOT($7)
pxor $7, $5
pxor $1, $5
->)
+')
-define(<WSBOX7>, <
+define(`WSBOX7', `
movdqa $1, $5
pand $3, $5
movdqa $2, $8
PNOT($4) C t02
por $4, $5
pxor $3, $5
->)
+')
C WLT(x0, x1, x2, x3)
-define(<WLT>, <
+define(`WLT', `
WROL(13, $1)
WROL(3, $3)
pxor $1, $2
pxor $3, $2
movdqa $1, T0
- pslld <$>3, T0
+ pslld `$'3, T0
pxor $3, $4
pxor T0, $4
WROL(1, $2)
pxor $2, $1
pxor $4, $1
movdqa $2, T0
- pslld <$>7, T0
+ pslld `$'7, T0
pxor $4, $3
pxor T0, $3
WROL(5, $1)
WROL(22, $3)
->)
+')
.file "serpent-encrypt.asm"
C x86_64/serpent.m4
-ifelse(<
+ifelse(`
Copyright (C) 2011 Niels Möller
This file is part of GNU Nettle.
You should have received copies of the GNU General Public License and
the GNU Lesser General Public License along with this program. If
not, see http://www.gnu.org/licenses/.
->)
+')
C WROL(count, w)
-define(<WROL>, <
+define(`WROL', `
movdqa $2, T0
- pslld <$>$1, $2
- psrld <$>eval(32 - $1), T0
+ pslld `$'$1, $2
+ psrld `$'eval(32 - $1), T0
por T0, $2
->)
+')
C Note: Diagrams use little-endian representation, with least
C significant word to the left.
C x3: | a3 | b3 | c3 | d3 |
C +----+----+----+----+
-define(<WTRANSPOSE>, <
+define(`WTRANSPOSE', `
movdqa $1, T0
punpcklqdq $3, T0 C |a0 a1 c0 c1|
punpckhqdq $3, $1 C |a2 a3 c2 c3|
- pshufd <$>0xd8, T0, T0 C |a0 c0 a1 c1|
- pshufd <$>0xd8, $1, T1 C |a2 c2 a3 c3|
+ pshufd `$'0xd8, T0, T0 C |a0 c0 a1 c1|
+ pshufd `$'0xd8, $1, T1 C |a2 c2 a3 c3|
movdqa $2, T2
punpcklqdq $4, T2 C |b0 b1 d0 11|
punpckhqdq $4, $2 C |b2 b3 d2 d3|
- pshufd <$>0xd8, T2, T2 C |b0 d0 b1 d1|
- pshufd <$>0xd8, $2, T3 C |b2 d2 b3 d3|
+ pshufd `$'0xd8, T2, T2 C |b0 d0 b1 d1|
+ pshufd `$'0xd8, $2, T3 C |b2 d2 b3 d3|
movdqa T0, $1
punpckldq T2, $1 C |a0 b0 c0 d0|
punpckldq T3, $3 C |a2 b2 c2 d2|
movdqa T1, $4
punpckhdq T3, $4 C |a3 b3 c3 d3|
->)
+')
C FIXME: Arrange 16-byte alignment, so we can use movaps?
-define(<WKEYXOR>, <
+define(`WKEYXOR', `
movups $1(CTX, CNT), T0
- pshufd <$>0x55, T0, T1
- pshufd <$>0xaa, T0, T2
+ pshufd `$'0x55, T0, T1
+ pshufd `$'0xaa, T0, T2
pxor T1, $3
pxor T2, $4
- pshufd <$>0xff, T0, T1
- pshufd <$>0x00, T0, T0
+ pshufd `$'0xff, T0, T1
+ pshufd `$'0x00, T0, T0
pxor T1, $5
pxor T0, $2
->)
+')
C x86_64/sha1-compress.asm
-ifelse(<
+ifelse(`
Copyright (C) 2004, 2008, 2013 Niels Möller
This file is part of GNU Nettle.
You should have received copies of the GNU General Public License and
the GNU Lesser General Public License along with this program. If
not, see http://www.gnu.org/licenses/.
->)
+')
C Register usage. KVALUE and INPUT share a register.
-define(<SA>,<%eax>)dnl
-define(<SB>,<%r8d>)dnl
-define(<SC>,<%ecx>)dnl
-define(<SD>,<%edx>)dnl
-define(<SE>,<%r9d>)dnl
-define(<DATA>,<%rsp>)dnl
-define(<T1>,<%r10d>)dnl
-define(<T2>,<%r11d>)dnl
-define(<KVALUE>, <%esi>)dnl
+define(`SA',`%eax')dnl
+define(`SB',`%r8d')dnl
+define(`SC',`%ecx')dnl
+define(`SD',`%edx')dnl
+define(`SE',`%r9d')dnl
+define(`DATA',`%rsp')dnl
+define(`T1',`%r10d')dnl
+define(`T2',`%r11d')dnl
+define(`KVALUE', `%esi')dnl
C Arguments
-define(<STATE>,<%rdi>)dnl
-define(<INPUT>,<%rsi>)dnl
+define(`STATE',`%rdi')dnl
+define(`INPUT',`%rsi')dnl
C Constants
-define(<K1VALUE>, <<$>0x5A827999>)dnl C Rounds 0-19
-define(<K2VALUE>, <<$>0x6ED9EBA1>)dnl C Rounds 20-39
-define(<K3VALUE>, <<$>0x8F1BBCDC>)dnl C Rounds 40-59
-define(<K4VALUE>, <<$>0xCA62C1D6>)dnl C Rounds 60-79
+define(`K1VALUE', ``$'0x5A827999')dnl C Rounds 0-19
+define(`K2VALUE', ``$'0x6ED9EBA1')dnl C Rounds 20-39
+define(`K3VALUE', ``$'0x8F1BBCDC')dnl C Rounds 40-59
+define(`K4VALUE', ``$'0xCA62C1D6')dnl C Rounds 60-79
C Reads the input into register, byteswaps it, and stores it in the DATA array.
C SWAP(index, register)
-define(<SWAP>, <
+define(`SWAP', `
movl OFFSET($1)(INPUT), $2
bswap $2
movl $2, OFFSET($1) (DATA)
->)dnl
+')dnl
C The f functions,
C
C b <<<= 30
dnl ROUND_F1(a, b, c, d, e, i)
-define(<ROUND_F1>, <
+define(`ROUND_F1', `
movl OFFSET(eval($6 % 16)) (DATA), T1
xorl OFFSET(eval(($6 + 2) % 16)) (DATA), T1
xorl OFFSET(eval(($6 + 8) % 16)) (DATA), T1
xorl OFFSET(eval(($6 + 13) % 16)) (DATA), T1
- roll <$>1, T1
+ roll `$'1, T1
movl T1, OFFSET(eval($6 % 16)) (DATA)
movl $4, T2
xorl $3, T2
andl $2, T2
xorl $4, T2
- roll <$>30, $2
+ roll `$'30, $2
addl T1, $5
addl KVALUE, $5
movl $1, T1
- roll <$>5, T1
+ roll `$'5, T1
addl T1, $5
addl T2, $5
->)
+')
dnl ROUND_F1_NOEXP(a, b, c, d, e, i)
-define(<ROUND_F1_NOEXP>, <
+define(`ROUND_F1_NOEXP', `
movl $4, T2
xorl $3, T2
movl $1, T1
addl OFFSET($6) (DATA), $5
xorl $4, T2
addl T2, $5
- roll <$>30, $2
- roll <$>5, T1
+ roll `$'30, $2
+ roll `$'5, T1
addl T1, $5
addl KVALUE, $5
->)
+')
dnl ROUND_F2(a, b, c, d, e, i)
-define(<ROUND_F2>, <
+define(`ROUND_F2', `
movl OFFSET(eval($6 % 16)) (DATA), T1
xorl OFFSET(eval(($6 + 2) % 16)) (DATA), T1
xorl OFFSET(eval(($6 + 8) % 16)) (DATA), T1
xorl OFFSET(eval(($6 + 13) % 16)) (DATA), T1
- roll <$>1, T1
+ roll `$'1, T1
movl T1, OFFSET(eval($6 % 16)) (DATA)
movl $4, T2
xorl $3, T2
xorl $2, T2
- roll <$>30, $2
+ roll `$'30, $2
addl T1, $5
addl KVALUE, $5
movl $1, T1
- roll <$>5, T1
+ roll `$'5, T1
addl T1, $5
addl T2, $5
->)
+')
dnl ROUND_F3(a, b, c, d, e, i)
-define(<ROUND_F3>, <
+define(`ROUND_F3', `
movl OFFSET(eval($6 % 16)) (DATA), T1
xorl OFFSET(eval(($6 + 2) % 16)) (DATA), T1
xorl OFFSET(eval(($6 + 8) % 16)) (DATA), T1
xorl OFFSET(eval(($6 + 13) % 16)) (DATA), T1
- roll <$>1, T1
+ roll `$'1, T1
movl T1, OFFSET(eval($6 % 16)) (DATA)
movl $4, T2
andl $3, T2
xorl $3, T1
andl $2, T1
addl T2, $5
- roll <$>30, $2
+ roll `$'30, $2
movl $1, T2
- roll <$>5, T2
+ roll `$'5, T2
addl T1, $5
addl T2, $5
->)
+')
.file "sha1-compress.asm"
C x86_64/sha256-compress.asm
-ifelse(<
+ifelse(`
Copyright (C) 2013 Niels Möller
This file is part of GNU Nettle.
You should have received copies of the GNU General Public License and
the GNU Lesser General Public License along with this program. If
not, see http://www.gnu.org/licenses/.
->)
+')
.file "sha256-compress.asm"
-define(<STATE>, <%rdi>)
-define(<INPUT>, <%rsi>)
-define(<K>, <%rdx>)
-define(<SA>, <%eax>)
-define(<SB>, <%ebx>)
-define(<SC>, <%ecx>)
-define(<SD>, <%r8d>)
-define(<SE>, <%r9d>)
-define(<SF>, <%r10d>)
-define(<SG>, <%r11d>)
-define(<SH>, <%r12d>)
-define(<T0>, <%r13d>)
-define(<T1>, <%edi>) C Overlap STATE
-define(<COUNT>, <%r14>)
-define(<W>, <%r15d>)
-
-define(<EXPN>, <
+define(`STATE', `%rdi')
+define(`INPUT', `%rsi')
+define(`K', `%rdx')
+define(`SA', `%eax')
+define(`SB', `%ebx')
+define(`SC', `%ecx')
+define(`SD', `%r8d')
+define(`SE', `%r9d')
+define(`SF', `%r10d')
+define(`SG', `%r11d')
+define(`SH', `%r12d')
+define(`T0', `%r13d')
+define(`T1', `%edi') C Overlap STATE
+define(`COUNT', `%r14')
+define(`W', `%r15d')
+
+define(`EXPN', `
movl OFFSET($1)(%rsp), W
movl OFFSET(eval(($1 + 14) % 16))(%rsp), T0
movl T0, T1
- shrl <$>10, T0
- roll <$>13, T1
+ shrl `$'10, T0
+ roll `$'13, T1
xorl T1, T0
- roll <$>2, T1
+ roll `$'2, T1
xorl T1, T0
addl T0, W
movl OFFSET(eval(($1 + 1) % 16))(%rsp), T0
movl T0, T1
- shrl <$>3, T0
- roll <$>14, T1
+ shrl `$'3, T0
+ roll `$'14, T1
xorl T1, T0
- roll <$>11, T1
+ roll `$'11, T1
xorl T1, T0
addl T0, W
addl OFFSET(eval(($1 + 9) % 16))(%rsp), W
movl W, OFFSET($1)(%rsp)
->)
+')
C ROUND(A,B,C,D,E,F,G,H,K)
C
C Choice (E, F, G) = G^(E&(F^G))
C Majority (A,B,C) = (A&B) + (C&(A^B))
-define(<ROUND>, <
+define(`ROUND', `
movl $5, T0
movl $5, T1
- roll <$>7, T0
- roll <$>21, T1
+ roll `$'7, T0
+ roll `$'21, T1
xorl T0, T1
- roll <$>19, T0
+ roll `$'19, T0
xorl T0, T1
addl W, $8
addl T1, $8
movl $1, T0
movl $1, T1
- roll <$>10, T0
- roll <$>19, T1
+ roll `$'10, T0
+ roll `$'19, T1
xorl T0, T1
- roll <$>20, T0
+ roll `$'20, T0
xorl T0, T1
addl T1, $8
movl $1, T0
addl T0, $8
andl $3, T1
addl T1, $8
->)
+')
-define(<NOEXPN>, <
+define(`NOEXPN', `
movl OFFSET($1)(INPUT, COUNT, 4), W
bswapl W
movl W, OFFSET($1)(%rsp, COUNT, 4)
->)
+')
C void
C _nettle_sha256_compress(uint32_t *state, const uint8_t *input, const uint32_t *k)
C x86_64/sha3-permute.asm
-ifelse(<
+ifelse(`
Copyright (C) 2012 Niels Möller
This file is part of GNU Nettle.
You should have received copies of the GNU General Public License and
the GNU Lesser General Public License along with this program. If
not, see http://www.gnu.org/licenses/.
->)
+')
-define(<CTX>, <%rdi>) C 25 64-bit values, 200 bytes.
-define(<COUNT>, <%r8>) C Avoid clobbering %rsi, for W64.
+define(`CTX', `%rdi') C 25 64-bit values, 200 bytes.
+define(`COUNT', `%r8') C Avoid clobbering %rsi, for W64.
-define(<A00>, <%rax>)
-define(<A0102>, <%xmm0>)
-define(<A0304>, <%xmm1>)
+define(`A00', `%rax')
+define(`A0102', `%xmm0')
+define(`A0304', `%xmm1')
-define(<A05>, <%rcx>)
-define(<A0607>, <%xmm2>)
-define(<A0809>, <%xmm3>)
+define(`A05', `%rcx')
+define(`A0607', `%xmm2')
+define(`A0809', `%xmm3')
-define(<A10>, <%rdx>)
-define(<A1112>, <%xmm4>)
-define(<A1314>, <%xmm5>)
+define(`A10', `%rdx')
+define(`A1112', `%xmm4')
+define(`A1314', `%xmm5')
-define(<A15>, <%rbp>)
-define(<A1617>, <%xmm6>)
-define(<A1819>, <%xmm7>)
+define(`A15', `%rbp')
+define(`A1617', `%xmm6')
+define(`A1819', `%xmm7')
-define(<A20>, <%r9>)
-define(<A2122>, <%xmm8>)
-define(<A2324>, <%xmm9>)
+define(`A20', `%r9')
+define(`A2122', `%xmm8')
+define(`A2324', `%xmm9')
-define(<C0>, <%r10>)
-define(<C12>, <%xmm10>)
-define(<C34>, <%xmm11>)
+define(`C0', `%r10')
+define(`C12', `%xmm10')
+define(`C34', `%xmm11')
-define(<D0>, <%r11>)
-define(<D12>, <%xmm12>)
-define(<D34>, <%xmm13>)
+define(`D0', `%r11')
+define(`D12', `%xmm12')
+define(`D34', `%xmm13')
C Wide temporaries
-define(<W0>, <%xmm14>)
-define(<W1>, <%xmm15>)
-define(<W2>, <%xmm12>) C Overlap D12
-define(<W3>, <%xmm13>) C Overlap D34
+define(`W0', `%xmm14')
+define(`W1', `%xmm15')
+define(`W2', `%xmm12') C Overlap D12
+define(`W3', `%xmm13') C Overlap D34
-define(<T0>, <%r12>)
-define(<T1>, <%r13>)
-define(<T2>, <%r11>) C Overlap D0
-define(<T3>, <%r10>) C Overlap C0
+define(`T0', `%r12')
+define(`T1', `%r13')
+define(`T2', `%r11') C Overlap D0
+define(`T3', `%r10') C Overlap C0
-define(<RC>, <%r14>)
+define(`RC', `%r14')
-define(<OFFSET>, <ifelse($1,0,,eval(8*$1))>)
-define(<STATE>, <OFFSET($1)(CTX)>)
+define(`OFFSET', `ifelse($1,0,,eval(8*$1))')
+define(`STATE', `OFFSET($1)(CTX)')
-define(<SWAP64>, <pshufd <$>0x4e,>)
+define(`SWAP64', `pshufd `$'0x4e,')
-define(<DIRECT_MOVQ>, <no>)
+define(`DIRECT_MOVQ', `no')
C MOVQ(src, dst), for moves between a general register and an xmm
C register.
-ifelse(DIRECT_MOVQ, yes, <
+ifelse(DIRECT_MOVQ, yes, `
C movq calls that are equal to the corresponding movd,
C where the Apple assembler requires them to be written as movd.
-define(<MOVQ>, <movd $1, $2>)
->, <
+define(`MOVQ', `movd $1, $2')
+', `
C Moving via (cached) memory is generally faster.
-define(<MOVQ>, <
+define(`MOVQ', `
movq $1, (CTX)
movq (CTX), $2
->)>)
+')')
C ROTL64(rot, register, temp)
C Caller needs to or together the result.
-define(<ROTL64>, <
+define(`ROTL64', `
movdqa $2, $3
- psllq <$>$1, $2
- psrlq <$>eval(64-$1), $3
->)
+ psllq `$'$1, $2
+ psrlq `$'eval(64-$1), $3
+')
.file "sha3-permute.asm"
C x86_64/sha512-compress.asm
-ifelse(<
+ifelse(`
Copyright (C) 2013 Niels Möller
This file is part of GNU Nettle.
You should have received copies of the GNU General Public License and
the GNU Lesser General Public License along with this program. If
not, see http://www.gnu.org/licenses/.
->)
+')
.file "sha512-compress.asm"
-define(<STATE>, <%rdi>)
-define(<INPUT>, <%rsi>)
-define(<K>, <%rdx>)
-define(<SA>, <%rax>)
-define(<SB>, <%rbx>)
-define(<SC>, <%rcx>)
-define(<SD>, <%r8>)
-define(<SE>, <%r9>)
-define(<SF>, <%r10>)
-define(<SG>, <%r11>)
-define(<SH>, <%r12>)
-define(<T0>, <%r13>)
-define(<T1>, <%rdi>) C Overlap STATE
-define(<COUNT>, <%r14>)
-define(<W>, <%r15>)
-
-define(<EXPN>, <
+define(`STATE', `%rdi')
+define(`INPUT', `%rsi')
+define(`K', `%rdx')
+define(`SA', `%rax')
+define(`SB', `%rbx')
+define(`SC', `%rcx')
+define(`SD', `%r8')
+define(`SE', `%r9')
+define(`SF', `%r10')
+define(`SG', `%r11')
+define(`SH', `%r12')
+define(`T0', `%r13')
+define(`T1', `%rdi') C Overlap STATE
+define(`COUNT', `%r14')
+define(`W', `%r15')
+
+define(`EXPN', `
mov OFFSET64($1)(%rsp), W
mov OFFSET64(eval(($1 + 14) % 16))(%rsp), T0
mov T0, T1
- shr <$>6, T0
- rol <$>3, T1
+ shr `$'6, T0
+ rol `$'3, T1
xor T1, T0
- rol <$>42, T1
+ rol `$'42, T1
xor T1, T0
add T0, W
mov OFFSET64(eval(($1 + 1) % 16))(%rsp), T0
mov T0, T1
- shr <$>7, T0
- rol <$>56, T1
+ shr `$'7, T0
+ rol `$'56, T1
xor T1, T0
- rol <$>7, T1
+ rol `$'7, T1
xor T1, T0
add T0, W
add OFFSET64(eval(($1 + 9) % 16))(%rsp), W
mov W, OFFSET64($1)(%rsp)
->)
+')
C ROUND(A,B,C,D,E,F,G,H,K)
C
C Choice (E, F, G) = G^(E&(F^G))
C Majority (A,B,C) = (A&B) + (C&(A^B))
-define(<ROUND>, <
+define(`ROUND', `
mov $5, T0
mov $5, T1
- rol <$>23, T0
- rol <$>46, T1
+ rol `$'23, T0
+ rol `$'46, T1
xor T0, T1
- rol <$>27, T0
+ rol `$'27, T0
xor T0, T1
add W, $8
add T1, $8
mov $1, T0
mov $1, T1
- rol <$>25, T0
- rol <$>30, T1
+ rol `$'25, T0
+ rol `$'30, T1
xor T0, T1
- rol <$>11, T0
+ rol `$'11, T0
xor T0, T1
add T1, $8
mov $1, T0
add T0, $8
and $3, T1
add T1, $8
->)
+')
-define(<NOEXPN>, <
+define(`NOEXPN', `
mov OFFSET64($1)(INPUT, COUNT, 8), W
bswap W
mov W, OFFSET64($1)(%rsp, COUNT, 8)
->)
+')
C void
C _nettle_sha512_compress(uint64_t *state, const uint8_t *input, const uint64_t *k)
C x86_64/sha_ni/sha1-compress.asm
-ifelse(<
+ifelse(`
Copyright (C) 2018 Niels Möller
This file is part of GNU Nettle.
You should have received copies of the GNU General Public License and
the GNU Lesser General Public License along with this program. If
not, see http://www.gnu.org/licenses/.
->)
+')
C Register usage.
C Arguments
-define(<STATE>,<%rdi>)dnl
-define(<INPUT>,<%rsi>)dnl
-
-define(<MSG0>,<%xmm0>)
-define(<MSG1>,<%xmm1>)
-define(<MSG2>,<%xmm2>)
-define(<MSG3>,<%xmm3>)
-define(<ABCD>,<%xmm4>)
-define(<E0>,<%xmm5>)
-define(<E1>,<%xmm6>)
-define(<ABCD_ORIG>, <%xmm7>)
-define(<E_ORIG>, <%xmm8>)
-define(<SWAP_MASK>,<%xmm9>)
+define(`STATE',`%rdi')dnl
+define(`INPUT',`%rsi')dnl
+
+define(`MSG0',`%xmm0')
+define(`MSG1',`%xmm1')
+define(`MSG2',`%xmm2')
+define(`MSG3',`%xmm3')
+define(`ABCD',`%xmm4')
+define(`E0',`%xmm5')
+define(`E1',`%xmm6')
+define(`ABCD_ORIG', `%xmm7')
+define(`E_ORIG', `%xmm8')
+define(`SWAP_MASK',`%xmm9')
C QROUND(M0, M1, M2, M3, E0, E1, TYPE)
-define(<QROUND>, <
+define(`QROUND', `
sha1nexte $1, $5
movdqa ABCD, $6
sha1msg2 $1, $2
- sha1rnds4 <$>$7, $5, ABCD
+ sha1rnds4 `$'$7, $5, ABCD
sha1msg1 $1, $4
pxor $1, $3
->)
+')
.file "sha1-compress.asm"
C x86_64/sha_ni/sha256-compress.asm
-ifelse(<
+ifelse(`
Copyright (C) 2018 Niels Möller
This file is part of GNU Nettle.
You should have received copies of the GNU General Public License and
the GNU Lesser General Public License along with this program. If
not, see http://www.gnu.org/licenses/.
->)
+')
.file "sha256-compress.asm"
-define(<STATE>, <%rdi>)
-define(<INPUT>, <%rsi>)
-define(<K>, <%rdx>)
-
-define(<MSGK>,<%xmm0>) C Implicit operand of sha256rnds2
-define(<MSG0>,<%xmm1>)
-define(<MSG1>,<%xmm2>)
-define(<MSG2>,<%xmm3>)
-define(<MSG3>,<%xmm4>)
-define(<ABEF>,<%xmm5>)
-define(<CDGH>,<%xmm6>)
-define(<ABEF_ORIG>,<%xmm7>)
-define(<CDGH_ORIG>, <%xmm8>)
-define(<SWAP_MASK>,<%xmm9>)
-define(<TMP>, <%xmm9>) C Overlaps SWAP_MASK
+define(`STATE', `%rdi')
+define(`INPUT', `%rsi')
+define(`K', `%rdx')
+
+define(`MSGK',`%xmm0') C Implicit operand of sha256rnds2
+define(`MSG0',`%xmm1')
+define(`MSG1',`%xmm2')
+define(`MSG2',`%xmm3')
+define(`MSG3',`%xmm4')
+define(`ABEF',`%xmm5')
+define(`CDGH',`%xmm6')
+define(`ABEF_ORIG',`%xmm7')
+define(`CDGH_ORIG', `%xmm8')
+define(`SWAP_MASK',`%xmm9')
+define(`TMP', `%xmm9') C Overlaps SWAP_MASK
C QROUND(M0, M1, M2, M3, R)
-define(<QROUND>, <
+define(`QROUND', `
movdqa eval($5*4)(K), MSGK
paddd $1, MSGK
sha256rnds2 ABEF, CDGH
- pshufd <$>0xe, MSGK, MSGK
+ pshufd `$'0xe, MSGK, MSGK
sha256rnds2 CDGH, ABEF
movdqa $1, TMP
- palignr <$>4, $4, TMP
+ palignr `$'4, $4, TMP
paddd TMP, $2
sha256msg2 $1, $2
sha256msg1 $1, $4
- >)
+ ')
C FIXME: Do something more clever, taking the pshufd into account.
C TRANSPOSE(ABCD, EFGH, scratch) --> untouched, ABEF, CDGH
-define(<TRANSPOSE>, <
+define(`TRANSPOSE', `
movdqa $2, $3
punpckhqdq $1, $2
punpcklqdq $1, $3
->)
+')
C void
C _nettle_sha256_compress(uint32_t *state, const uint8_t *input, const uint32_t *k)
C x86_64/umac-nh-n.asm
-ifelse(<
+ifelse(`
Copyright (C) 2013 Niels Möller
This file is part of GNU Nettle.
You should have received copies of the GNU General Public License and
the GNU Lesser General Public License along with this program. If
not, see http://www.gnu.org/licenses/.
->)
-
-define(<OUT>, <%rdi>)
-define(<ITERS>, <%rsi>)
-define(<KEY>, <%rdx>)
-define(<LENGTH>, <%rcx>)
-define(<MSG>, <%r8>)
-
-define(<XM0>, <%xmm0>)
-define(<XM1>, <%xmm1>)
-define(<XM2>, <%xmm2>)
-define(<XM3>, <%xmm3>)
-define(<XK0>, <%xmm4>)
-define(<XK1>, <%xmm5>)
-define(<XK2>, <%xmm6>)
-define(<XK3>, <%xmm7>)
-define(<XT0>, <%xmm8>)
-define(<XT1>, <%xmm9>)
-define(<XT2>, <%xmm10>)
-define(<XT3>, <%xmm11>)
-define(<XY0>, <%xmm12>)
-define(<XY1>, <%xmm13>)
+')
+
+define(`OUT', `%rdi')
+define(`ITERS', `%rsi')
+define(`KEY', `%rdx')
+define(`LENGTH', `%rcx')
+define(`MSG', `%r8')
+
+define(`XM0', `%xmm0')
+define(`XM1', `%xmm1')
+define(`XM2', `%xmm2')
+define(`XM3', `%xmm3')
+define(`XK0', `%xmm4')
+define(`XK1', `%xmm5')
+define(`XK2', `%xmm6')
+define(`XK3', `%xmm7')
+define(`XT0', `%xmm8')
+define(`XT1', `%xmm9')
+define(`XT2', `%xmm10')
+define(`XT3', `%xmm11')
+define(`XY0', `%xmm12')
+define(`XY1', `%xmm13')
C Copy [0,1,2,3] to [1,1,3,3]
-define(<HI2LO>, <pshufd <$>0xf5,>)
+define(`HI2LO', `pshufd `$'0xf5,')
C FIXME: Would be nice if we could force the key array to be 16-byte
C aligned.
C x86_64/umac-nh.asm
-ifelse(<
+ifelse(`
Copyright (C) 2013 Niels Möller
This file is part of GNU Nettle.
You should have received copies of the GNU General Public License and
the GNU Lesser General Public License along with this program. If
not, see http://www.gnu.org/licenses/.
->)
+')
-define(<KEY>, <%rdi>)
-define(<LENGTH>, <%rsi>)
-define(<MSG>, <%rdx>)
+define(`KEY', `%rdi')
+define(`LENGTH', `%rsi')
+define(`MSG', `%rdx')
-define(<XA>, <%xmm0>)
-define(<XB>, <%xmm1>)
-define(<XK0>, <%xmm2>)
-define(<XK1>, <%xmm3>)
-define(<XY>, <%xmm4>)
-define(<XT0>, <%xmm5>)
-define(<XT1>, <%xmm6>)
+define(`XA', `%xmm0')
+define(`XB', `%xmm1')
+define(`XK0', `%xmm2')
+define(`XK1', `%xmm3')
+define(`XY', `%xmm4')
+define(`XT0', `%xmm5')
+define(`XT1', `%xmm6')
C FIXME: Would be nice if we could force the key array to be 16-byte
C aligned.