From: Niels Möller Date: Tue, 14 Jun 2011 20:52:54 +0000 (+0200) Subject: * x86_64/serpent-encrypt.asm: Slight simplification of loop logic. X-Git-Tag: nettle_2.2_release_20110711~58 X-Git-Url: http://git.ipfire.org/gitweb.cgi?a=commitdiff_plain;h=623f2f2c3902e5a825b6c5da4a38abd18f0b2b5b;p=thirdparty%2Fnettle.git * x86_64/serpent-encrypt.asm: Slight simplification of loop logic. Rev: nettle/x86_64/serpent-encrypt.asm:1.2 --- diff --git a/x86_64/serpent-encrypt.asm b/x86_64/serpent-encrypt.asm index 9172b7fb..9b7a25b5 100644 --- a/x86_64/serpent-encrypt.asm +++ b/x86_64/serpent-encrypt.asm @@ -296,9 +296,6 @@ define(, < .text ALIGN(4) PROLOGUE(nettle_serpent_encrypt) - test N, N - jz .Lend - C save all registers that need to be saved push %rbx push %rbp @@ -309,7 +306,11 @@ PROLOGUE(nettle_serpent_encrypt) lea (SRC, N), SRC lea (DST, N), DST neg N - + jz .Lend + + C Point at the final subkey. + lea 512(CTX), CTX + C The single-block loop here is slightly slower than the double-block C loop in serpent-encrypt.c. @@ -319,8 +320,13 @@ C loop in serpent-encrypt.c. movl 8(SRC, N), x2 movl 12(SRC, N), x3 - xor CNT, CNT + mov $-512, CNT + jmp .Lround_start + + ALIGN(4) .Lround_loop: + LT(x0,x1,x2,x3) +.Lround_start: xor (CTX, CNT), x0 xor 4(CTX, CNT), x1 xor 8(CTX, CNT), x2 @@ -376,13 +382,9 @@ C loop in serpent-encrypt.c. xor 124(CTX, CNT), y3 SBOX7(y0,y1,y2,y3, x0,x1,x2,x3) add $128, CNT - C FIXME: Offset CTX and CNT, so we can jump out when CNT == 0 - cmp $512, CNT - je .Lfinal_round - LT(x0,x1,x2,x3) - jmp .Lround_loop + jnz .Lround_loop -.Lfinal_round: + C Apply final subkey. xor (CTX, CNT), x0 xor 4(CTX, CNT), x1 xor 8(CTX, CNT), x2 @@ -394,12 +396,11 @@ C loop in serpent-encrypt.c. movl x3, 12(DST, N) add $16, N jnc .Lblock_loop - +.Lend: pop %r14 pop %r13 pop %r12 pop %rbp pop %rbx -.Lend: ret