From: Niels Möller Date: Sun, 2 Oct 2005 20:43:42 +0000 (+0200) Subject: (_nettle_sha1_compress): Do byte order X-Git-Tag: nettle_1.13_release_20051006~29 X-Git-Url: http://git.ipfire.org/gitweb.cgi?a=commitdiff_plain;h=54467c823ecf5bfd5a7047661a5f2d26025d56bf;p=thirdparty%2Fnettle.git (_nettle_sha1_compress): Do byte order conversion, and store the input data on the stack. This leaves one more register free for other uses. Rev: src/nettle/x86/sha1-compress.asm:1.8 --- diff --git a/x86/sha1-compress.asm b/x86/sha1-compress.asm index 00d8d836..cff69643 100644 --- a/x86/sha1-compress.asm +++ b/x86/sha1-compress.asm @@ -23,11 +23,11 @@ define(,<%ebx>) define(,<%ecx>) define(,<%edx>) define(,<%ebp>) -define(,<%esi>) +define(,<%esp>) define(,<%edi>) -C Extra temporary needed by F3. Can we get rid of it? -define(,<(%esp)>) -define(, <4(%esp)>)dnl +define(,<%esi>) + +define(, <64(%esp)>)dnl C Constants define(, <<$>0x5A827999>) C Rounds 0-19 define(, <<$>0x6ED9EBA1>) C Rounds 20-39 @@ -81,6 +81,7 @@ define(, < andl $3, TMP orl TMP2, TMP >)dnl + C The form of one sha1 round is C C a' = e + a <<< 5 + f( b, c, d ) + k + w; @@ -110,9 +111,16 @@ C adding, and then rotating back. roll <$>30, $2 >)dnl +C SWAP(from, to, register) +define(, < + movl $1, $3 + bswap $3 + movl $3, $2 +>)dnl + .file "sha1-compress.asm" - C sha1_compress(uint32_t *state, uint32_t *data) + C sha1_compress(uint32_t *state, uint8_t *data) .text .align 16 @@ -121,24 +129,44 @@ C adding, and then rotating back. C_NAME(_nettle_sha1_compress): C save all registers that need to be saved - pushl %ebx C 20(%esp) - pushl %ebp C 16(%esp) - pushl %esi C 12(%esp) - pushl %edi C 8(%esp) + pushl %ebx C 80(%esp) + pushl %ebp C 76(%esp) + pushl %esi C 72(%esp) + pushl %edi C 68(%esp) - pushl K1VALUE C 4(%esp) - subl $4, %esp C 0(%esp) = TMP + pushl K1VALUE C 64(%esp) + subl $64, %esp C %esp = W + C Load and byteswap data + movl 92(%esp), TMP + + C No scheduling of these instructions, just use a couple of registers, + C and hope the out-of-order unit can keep up. + SWAP( (TMP), (DATA), %eax) + SWAP( 4(TMP), 4(DATA), %ebx) + SWAP( 8(TMP), 8(DATA), %ecx) + SWAP(12(TMP), 12(DATA), %edx) + SWAP(16(TMP), 16(DATA), %eax) + SWAP(20(TMP), 20(DATA), %ebx) + SWAP(24(TMP), 24(DATA), %ecx) + SWAP(28(TMP), 28(DATA), %edx) + SWAP(32(TMP), 32(DATA), %eax) + SWAP(36(TMP), 36(DATA), %ebx) + SWAP(40(TMP), 40(DATA), %ecx) + SWAP(44(TMP), 44(DATA), %edx) + SWAP(48(TMP), 48(DATA), %eax) + SWAP(52(TMP), 52(DATA), %ebx) + SWAP(56(TMP), 56(DATA), %ecx) + SWAP(60(TMP), 60(DATA), %edx) + C load the state vector - movl 28(%esp),TMP + movl 88(%esp),TMP movl (TMP), SA movl 4(TMP), SB movl 8(TMP), SC movl 12(TMP), SD movl 16(TMP), SE - movl 32(%esp), DATA - ROUND(SA, SB, SC, SD, SE, , NOEXPAND( 0)) ROUND(SE, SA, SB, SC, SD, , NOEXPAND( 1)) ROUND(SD, SE, SA, SB, SC, , NOEXPAND( 2)) @@ -239,14 +267,14 @@ C_NAME(_nettle_sha1_compress): EXPAND(79) ROUND(SB, SC, SD, SE, SA, , TMP) C Update the state vector - movl 28(%esp),TMP + movl 88(%esp),TMP addl SA, (TMP) addl SB, 4(TMP) addl SC, 8(TMP) addl SD, 12(TMP) addl SE, 16(TMP) - addl $8, %esp + addl $68, %esp popl %edi popl %esi popl %ebp