From: Niels Möller Date: Fri, 6 Mar 2009 11:28:39 +0000 (+0100) Subject: * x86_64/aes-decrypt-internal.asm: Rearrange register allocation. X-Git-Tag: nettle_2.0_release_20090608~18 X-Git-Url: http://git.ipfire.org/gitweb.cgi?a=commitdiff_plain;h=eb5cf95135aad183ff8739e10a3504ef18f74369;p=thirdparty%2Fnettle.git * x86_64/aes-decrypt-internal.asm: Rearrange register allocation. Put SA--SD in %eax--%edx, so the second byte can be accessed as %ah-%dh. TD is not needed, SD can be reused. Use the register that is saved for the outer loop counter, getting it off the stack. * x86_64/aes-encrypt-internal.asm: Likewise. * x86_64/aes.m4 (HREG, MOVE_HREG): New macros. (XREG): Fixed bug in handling of %r8 and %r9. (AES_ROUND): Use MOVE_HREG. Rev: nettle/x86_64/aes-decrypt-internal.asm:1.2 Rev: nettle/x86_64/aes-encrypt-internal.asm:1.9 Rev: nettle/x86_64/aes.m4:1.6 --- diff --git a/x86_64/aes-decrypt-internal.asm b/x86_64/aes-decrypt-internal.asm index d3a48f26..1e9d700b 100644 --- a/x86_64/aes-decrypt-internal.asm +++ b/x86_64/aes-decrypt-internal.asm @@ -24,28 +24,25 @@ C Register usage: C AES state, use two of them define(,<%eax>) define(,<%ebx>) -define(,<%ebp>) -define(,<%r9d>) +define(,<%ecx>) +define(,<%edx>) define(,<%r10d>) define(,<%r11d>) define(,<%r12d>) -define(,<%r13d>) define(, <%rdi>) define(, <%rsi>) -define(,<%edx>) C Length is only 32 bits -define(, <%rcx>) +define(,<%edx>) C Length is only 32 bits +define(, <%rcx>) define(, <%r8>) +define(, <%r9>) define(,<%r14>) define(, <%r15d>) +define(, <%r13d>) -C Put the outer loop counter on the stack, and reuse the LENGTH -C register as a temporary. - -define(, <(%rsp)>) -define(,<%rdx>) +define(,<%rbp>) .file "aes-decrypt-internal.asm" @@ -56,7 +53,7 @@ define(,<%rdx>) .text ALIGN(4) PROLOGUE(_nettle_aes_decrypt) - test LENGTH, LENGTH + test PARAM_LENGTH, PARAM_LENGTH jz .Lend C save all registers that need to be saved @@ -67,11 +64,9 @@ PROLOGUE(_nettle_aes_decrypt) push %r14 push %r15 - C Allocates 4 bytes more than we need, for nicer alignment. - sub $8, %rsp - - shrl $4, LENGTH - movl LENGTH, FRAME_COUNT + mov PARAM_DST, DST + movl PARAM_LENGTH, BLOCK_COUNT + shrl $4, BLOCK_COUNT .Lblock_loop: mov CTX,KEY @@ -95,19 +90,19 @@ PROLOGUE(_nettle_aes_decrypt) AES_ROUND(TABLE, SC,SB,SA,SD, TC, TMP) xorl 8(KEY),TC - AES_ROUND(TABLE, SD,SC,SB,SA, TD, TMP) - xorl 12(KEY),TD + AES_ROUND(TABLE, SD,SC,SB,SA, SD, TMP) + xorl 12(KEY),SD - AES_ROUND(TABLE, TA,TD,TC,TB, SA, TMP) + AES_ROUND(TABLE, TA,SD,TC,TB, SA, TMP) xorl 16(KEY), SA - AES_ROUND(TABLE, TB,TA,TD,TC, SB, TMP) + AES_ROUND(TABLE, TB,TA,SD,TC, SB, TMP) xorl 20(KEY),SB - AES_ROUND(TABLE, TC,TB,TA,TD, SC, TMP) + AES_ROUND(TABLE, TC,TB,TA,SD, SC, TMP) xorl 24(KEY),SC - AES_ROUND(TABLE, TD,TC,TB,TA, SD, TMP) + AES_ROUND(TABLE, SD,TC,TB,TA, SD, TMP) xorl 28(KEY),SD add $32,KEY C point to next key @@ -125,13 +120,13 @@ PROLOGUE(_nettle_aes_decrypt) AES_ROUND(TABLE, SC,SB,SA,SD, TC, TMP) xorl 8(KEY),TC - AES_ROUND(TABLE, SD,SC,SB,SA, TD, TMP) - xorl 12(KEY),TD + AES_ROUND(TABLE, SD,SC,SB,SA, SD, TMP) + xorl 12(KEY),SD - AES_FINAL_ROUND(TA,TD,TC,TB, TABLE, SA, TMP) - AES_FINAL_ROUND(TB,TA,TD,TC, TABLE, SB, TMP) - AES_FINAL_ROUND(TC,TB,TA,TD, TABLE, SC, TMP) - AES_FINAL_ROUND(TD,TC,TB,TA, TABLE, SD, TMP) + AES_FINAL_ROUND(TA,SD,TC,TB, TABLE, SA, TMP) + AES_FINAL_ROUND(TB,TA,SD,TC, TABLE, SB, TMP) + AES_FINAL_ROUND(TC,TB,TA,SD, TABLE, SC, TMP) + AES_FINAL_ROUND(SD,TC,TB,TA, TABLE, SD, TMP) C Inverse S-box substitution mov $3, COUNT @@ -145,11 +140,10 @@ PROLOGUE(_nettle_aes_decrypt) AES_STORE(SA,SB,SC,SD, KEY, DST) add $16, DST - decl FRAME_COUNT + decl BLOCK_COUNT jnz .Lblock_loop - add $8, %rsp pop %r15 pop %r14 pop %r13 diff --git a/x86_64/aes-encrypt-internal.asm b/x86_64/aes-encrypt-internal.asm index e89dc2ba..839d3341 100644 --- a/x86_64/aes-encrypt-internal.asm +++ b/x86_64/aes-encrypt-internal.asm @@ -1,4 +1,3 @@ -C -*- mode: asm; asm-comment-char: ?C; -*- C nettle, low-level cryptographics library C C Copyright (C) 2001, 2002, 2005, 2008 Rafael R. Sevilla, Niels Möller @@ -25,28 +24,25 @@ C Register usage: C AES state, use two of them define(,<%eax>) define(,<%ebx>) -define(,<%ebp>) -define(,<%r9d>) +define(,<%ecx>) +define(,<%edx>) define(,<%r10d>) define(,<%r11d>) define(,<%r12d>) -define(
,<%r13d>) define(, <%rdi>) define(, <%rsi>) -define(,<%edx>) C Length is only 32 bits -define(, <%rcx>) +define(,<%edx>) C Length is only 32 bits +define(, <%rcx>) define(, <%r8>) +define(, <%r9>) define(,<%r14>) define(, <%r15d>) +define(, <%r13d>) -C Put the outer loop counter on the stack, and reuse the LENGTH -C register as a temporary. - -define(, <(%rsp)>) -define(,<%rdx>) +define(,<%rbp>) .file "aes-encrypt-internal.asm" @@ -57,7 +53,7 @@ define(,<%rdx>) .text ALIGN(4) PROLOGUE(_nettle_aes_encrypt) - test LENGTH, LENGTH + test PARAM_LENGTH, PARAM_LENGTH jz .Lend C save all registers that need to be saved @@ -68,11 +64,9 @@ PROLOGUE(_nettle_aes_encrypt) push %r14 push %r15 - C Allocates 4 bytes more than we need, for nicer alignment. - sub $8, %rsp - - shrl $4, LENGTH - movl LENGTH, FRAME_COUNT + mov PARAM_DST, DST + movl PARAM_LENGTH, BLOCK_COUNT + shrl $4, BLOCK_COUNT .Lblock_loop: mov CTX,KEY @@ -96,19 +90,19 @@ PROLOGUE(_nettle_aes_encrypt) AES_ROUND(TABLE, SC,SD,SA,SB, TC, TMP) xorl 8(KEY),TC - AES_ROUND(TABLE, SD,SA,SB,SC, TD, TMP) - xorl 12(KEY),TD + AES_ROUND(TABLE, SD,SA,SB,SC, SD, TMP) + xorl 12(KEY),SD - AES_ROUND(TABLE, TA,TB,TC,TD, SA, TMP) + AES_ROUND(TABLE, TA,TB,TC,SD, SA, TMP) xorl 16(KEY), SA - AES_ROUND(TABLE, TB,TC,TD,TA, SB, TMP) + AES_ROUND(TABLE, TB,TC,SD,TA, SB, TMP) xorl 20(KEY),SB - AES_ROUND(TABLE, TC,TD,TA,TB, SC, TMP) + AES_ROUND(TABLE, TC,SD,TA,TB, SC, TMP) xorl 24(KEY),SC - AES_ROUND(TABLE, TD,TA,TB,TC, SD, TMP) + AES_ROUND(TABLE, SD,TA,TB,TC, SD, TMP) xorl 28(KEY),SD add $32,KEY C point to next key @@ -126,13 +120,13 @@ PROLOGUE(_nettle_aes_encrypt) AES_ROUND(TABLE, SC,SD,SA,SB, TC, TMP) xorl 8(KEY),TC - AES_ROUND(TABLE, SD,SA,SB,SC, TD, TMP) - xorl 12(KEY),TD + AES_ROUND(TABLE, SD,SA,SB,SC, SD, TMP) + xorl 12(KEY),SD - AES_FINAL_ROUND(TA,TB,TC,TD, TABLE, SA, TMP) - AES_FINAL_ROUND(TB,TC,TD,TA, TABLE, SB, TMP) - AES_FINAL_ROUND(TC,TD,TA,TB, TABLE, SC, TMP) - AES_FINAL_ROUND(TD,TA,TB,TC, TABLE, SD, TMP) + AES_FINAL_ROUND(TA,TB,TC,SD, TABLE, SA, TMP) + AES_FINAL_ROUND(TB,TC,SD,TA, TABLE, SB, TMP) + AES_FINAL_ROUND(TC,SD,TA,TB, TABLE, SC, TMP) + AES_FINAL_ROUND(SD,TA,TB,TC, TABLE, SD, TMP) C S-box substitution mov $3, COUNT @@ -146,11 +140,10 @@ PROLOGUE(_nettle_aes_encrypt) AES_STORE(SA,SB,SC,SD, KEY, DST) add $16, DST - decl FRAME_COUNT + decl BLOCK_COUNT jnz .Lblock_loop - add $8, %rsp pop %r15 pop %r14 pop %r13 diff --git a/x86_64/aes.m4 b/x86_64/aes.m4 index 26f4b29a..9f251c50 100644 --- a/x86_64/aes.m4 +++ b/x86_64/aes.m4 @@ -17,6 +17,28 @@ define(,)dnl +define(,) + +dnl MOVE_HREG(src, dst) +define(, , + $1, %ebx, , + $1, %ecx, , + $1, %edx, , + 8, $2 + and <$>0xff, $2 + >)>) + define(,,, < movzb LREG($2), $7 movl AES_TABLE0 ($1, $7, 4),$6 - movl $3, XREG($7) - shr <$>8,$7 - and <$>0xff,$7 + MOVE_HREG($3, XREG($7)) xorl AES_TABLE1 ($1, $7, 4),$6 movl $4,XREG($7) shr <$>16,$7