C AES state, use two of them
define(<SA>,<%eax>)
define(<SB>,<%ebx>)
-define(<SC>,<%ebp>)
-define(<SD>,<%r9d>)
+define(<SC>,<%ecx>)
+define(<SD>,<%edx>)
define(<TA>,<%r10d>)
define(<TB>,<%r11d>)
define(<TC>,<%r12d>)
-define(<TD>,<%r13d>)
define(<CTX>, <%rdi>)
define(<TABLE>, <%rsi>)
-define(<LENGTH>,<%edx>) C Length is only 32 bits
-define(<DST>, <%rcx>)
+define(<PARAM_LENGTH>,<%edx>) C Length is only 32 bits
+define(<PARAM_DST>, <%rcx>)
define(<SRC>, <%r8>)
+define(<DST>, <%r9>)
define(<KEY>,<%r14>)
define(<COUNT>, <%r15d>)
+define(<BLOCK_COUNT>, <%r13d>)
-C Put the outer loop counter on the stack, and reuse the LENGTH
-C register as a temporary.
-
-define(<FRAME_COUNT>, <(%rsp)>)
-define(<TMP>,<%rdx>)
+define(<TMP>,<%rbp>)
.file "aes-decrypt-internal.asm"
.text
ALIGN(4)
PROLOGUE(_nettle_aes_decrypt)
- test LENGTH, LENGTH
+ test PARAM_LENGTH, PARAM_LENGTH
jz .Lend
C save all registers that need to be saved
push %r14
push %r15
- C Allocates 4 bytes more than we need, for nicer alignment.
- sub $8, %rsp
-
- shrl $4, LENGTH
- movl LENGTH, FRAME_COUNT
+ mov PARAM_DST, DST
+ movl PARAM_LENGTH, BLOCK_COUNT
+ shrl $4, BLOCK_COUNT
.Lblock_loop:
mov CTX,KEY
AES_ROUND(TABLE, SC,SB,SA,SD, TC, TMP)
xorl 8(KEY),TC
- AES_ROUND(TABLE, SD,SC,SB,SA, TD, TMP)
- xorl 12(KEY),TD
+ AES_ROUND(TABLE, SD,SC,SB,SA, SD, TMP)
+ xorl 12(KEY),SD
- AES_ROUND(TABLE, TA,TD,TC,TB, SA, TMP)
+ AES_ROUND(TABLE, TA,SD,TC,TB, SA, TMP)
xorl 16(KEY), SA
- AES_ROUND(TABLE, TB,TA,TD,TC, SB, TMP)
+ AES_ROUND(TABLE, TB,TA,SD,TC, SB, TMP)
xorl 20(KEY),SB
- AES_ROUND(TABLE, TC,TB,TA,TD, SC, TMP)
+ AES_ROUND(TABLE, TC,TB,TA,SD, SC, TMP)
xorl 24(KEY),SC
- AES_ROUND(TABLE, TD,TC,TB,TA, SD, TMP)
+ AES_ROUND(TABLE, SD,TC,TB,TA, SD, TMP)
xorl 28(KEY),SD
add $32,KEY C point to next key
AES_ROUND(TABLE, SC,SB,SA,SD, TC, TMP)
xorl 8(KEY),TC
- AES_ROUND(TABLE, SD,SC,SB,SA, TD, TMP)
- xorl 12(KEY),TD
+ AES_ROUND(TABLE, SD,SC,SB,SA, SD, TMP)
+ xorl 12(KEY),SD
- AES_FINAL_ROUND(TA,TD,TC,TB, TABLE, SA, TMP)
- AES_FINAL_ROUND(TB,TA,TD,TC, TABLE, SB, TMP)
- AES_FINAL_ROUND(TC,TB,TA,TD, TABLE, SC, TMP)
- AES_FINAL_ROUND(TD,TC,TB,TA, TABLE, SD, TMP)
+ AES_FINAL_ROUND(TA,SD,TC,TB, TABLE, SA, TMP)
+ AES_FINAL_ROUND(TB,TA,SD,TC, TABLE, SB, TMP)
+ AES_FINAL_ROUND(TC,TB,TA,SD, TABLE, SC, TMP)
+ AES_FINAL_ROUND(SD,TC,TB,TA, TABLE, SD, TMP)
C Inverse S-box substitution
mov $3, COUNT
AES_STORE(SA,SB,SC,SD, KEY, DST)
add $16, DST
- decl FRAME_COUNT
+ decl BLOCK_COUNT
jnz .Lblock_loop
- add $8, %rsp
pop %r15
pop %r14
pop %r13
-C -*- mode: asm; asm-comment-char: ?C; -*-
C nettle, low-level cryptographics library
C
C Copyright (C) 2001, 2002, 2005, 2008 Rafael R. Sevilla, Niels Möller
C AES state, use two of them
define(<SA>,<%eax>)
define(<SB>,<%ebx>)
-define(<SC>,<%ebp>)
-define(<SD>,<%r9d>)
+define(<SC>,<%ecx>)
+define(<SD>,<%edx>)
define(<TA>,<%r10d>)
define(<TB>,<%r11d>)
define(<TC>,<%r12d>)
-define(<TD>,<%r13d>)
define(<CTX>, <%rdi>)
define(<TABLE>, <%rsi>)
-define(<LENGTH>,<%edx>) C Length is only 32 bits
-define(<DST>, <%rcx>)
+define(<PARAM_LENGTH>,<%edx>) C Length is only 32 bits
+define(<PARAM_DST>, <%rcx>)
define(<SRC>, <%r8>)
+define(<DST>, <%r9>)
define(<KEY>,<%r14>)
define(<COUNT>, <%r15d>)
+define(<BLOCK_COUNT>, <%r13d>)
-C Put the outer loop counter on the stack, and reuse the LENGTH
-C register as a temporary.
-
-define(<FRAME_COUNT>, <(%rsp)>)
-define(<TMP>,<%rdx>)
+define(<TMP>,<%rbp>)
.file "aes-encrypt-internal.asm"
.text
ALIGN(4)
PROLOGUE(_nettle_aes_encrypt)
- test LENGTH, LENGTH
+ test PARAM_LENGTH, PARAM_LENGTH
jz .Lend
C save all registers that need to be saved
push %r14
push %r15
- C Allocates 4 bytes more than we need, for nicer alignment.
- sub $8, %rsp
-
- shrl $4, LENGTH
- movl LENGTH, FRAME_COUNT
+ mov PARAM_DST, DST
+ movl PARAM_LENGTH, BLOCK_COUNT
+ shrl $4, BLOCK_COUNT
.Lblock_loop:
mov CTX,KEY
AES_ROUND(TABLE, SC,SD,SA,SB, TC, TMP)
xorl 8(KEY),TC
- AES_ROUND(TABLE, SD,SA,SB,SC, TD, TMP)
- xorl 12(KEY),TD
+ AES_ROUND(TABLE, SD,SA,SB,SC, SD, TMP)
+ xorl 12(KEY),SD
- AES_ROUND(TABLE, TA,TB,TC,TD, SA, TMP)
+ AES_ROUND(TABLE, TA,TB,TC,SD, SA, TMP)
xorl 16(KEY), SA
- AES_ROUND(TABLE, TB,TC,TD,TA, SB, TMP)
+ AES_ROUND(TABLE, TB,TC,SD,TA, SB, TMP)
xorl 20(KEY),SB
- AES_ROUND(TABLE, TC,TD,TA,TB, SC, TMP)
+ AES_ROUND(TABLE, TC,SD,TA,TB, SC, TMP)
xorl 24(KEY),SC
- AES_ROUND(TABLE, TD,TA,TB,TC, SD, TMP)
+ AES_ROUND(TABLE, SD,TA,TB,TC, SD, TMP)
xorl 28(KEY),SD
add $32,KEY C point to next key
AES_ROUND(TABLE, SC,SD,SA,SB, TC, TMP)
xorl 8(KEY),TC
- AES_ROUND(TABLE, SD,SA,SB,SC, TD, TMP)
- xorl 12(KEY),TD
+ AES_ROUND(TABLE, SD,SA,SB,SC, SD, TMP)
+ xorl 12(KEY),SD
- AES_FINAL_ROUND(TA,TB,TC,TD, TABLE, SA, TMP)
- AES_FINAL_ROUND(TB,TC,TD,TA, TABLE, SB, TMP)
- AES_FINAL_ROUND(TC,TD,TA,TB, TABLE, SC, TMP)
- AES_FINAL_ROUND(TD,TA,TB,TC, TABLE, SD, TMP)
+ AES_FINAL_ROUND(TA,TB,TC,SD, TABLE, SA, TMP)
+ AES_FINAL_ROUND(TB,TC,SD,TA, TABLE, SB, TMP)
+ AES_FINAL_ROUND(TC,SD,TA,TB, TABLE, SC, TMP)
+ AES_FINAL_ROUND(SD,TA,TB,TC, TABLE, SD, TMP)
C S-box substitution
mov $3, COUNT
AES_STORE(SA,SB,SC,SD, KEY, DST)
add $16, DST
- decl FRAME_COUNT
+ decl BLOCK_COUNT
jnz .Lblock_loop
- add $8, %rsp
pop %r15
pop %r14
pop %r13
$1, %r14d, %r14b,
$1, %r15d, %r15b)>)dnl
+define(<HREG>,<ifelse(
+ $1, %eax, %ah,
+ $1, %ebx, %bh,
+ $1, %ecx, %ch,
+ $1, %edx, %dh,
+ error)>)
+
+dnl MOVE_HREG(src, dst)
+define(<MOVE_HREG>, <ifelse(
+ $1, %eax, <movzb %ah, $2
+ >,
+ $1, %ebx, <movzb %bh, $2
+ >,
+ $1, %ecx, <movzb %ch, $2
+ >,
+ $1, %edx, <movzb %dh, $2
+ >,
+ <movl $1, $2
+ shr <$>8, $2
+ and <$>0xff, $2
+ >)>)
+
define(<XREG>,<ifelse(
$1, %rax, %eax,
$1, %rbx, %ebx,
$1, %rdi, %edi,
$1, %rbp, %ebp,
$1, %rsp, %esp,
- $1, %r8d, %r8d,
- $1, %r9d, %r9d,
+ $1, %r8, %r8d,
+ $1, %r9, %r9d,
$1, %r10,%r10d,
$1, %r11,%r11d,
$1, %r12,%r12d,
define(<AES_ROUND>, <
movzb LREG($2), $7
movl AES_TABLE0 ($1, $7, 4),$6
- movl $3, XREG($7)
- shr <$>8,$7
- and <$>0xff,$7
+ MOVE_HREG($3, XREG($7))
xorl AES_TABLE1 ($1, $7, 4),$6
movl $4,XREG($7)
shr <$>16,$7