From: Nikos Mavrogiannopoulos Date: Sat, 21 Dec 2013 08:25:20 +0000 (+0100) Subject: corrected generated files X-Git-Tag: gnutls_3_3_0pre0~423 X-Git-Url: http://git.ipfire.org/gitweb.cgi?a=commitdiff_plain;h=7524d0bee8980b984d3da310fc30b3703d9b9331;p=thirdparty%2Fgnutls.git corrected generated files --- diff --git a/lib/accelerated/x86/coff/aes-ssse3-x86.s b/lib/accelerated/x86/coff/aes-ssse3-x86.s index 6894b14b7c..e209b7d0c0 100644 --- a/lib/accelerated/x86/coff/aes-ssse3-x86.s +++ b/lib/accelerated/x86/coff/aes-ssse3-x86.s @@ -84,33 +84,33 @@ __vpaes_encrypt_core: movdqa %xmm6,%xmm1 movdqa (%ebp),%xmm2 pandn %xmm0,%xmm1 - pand %xmm6,%xmm0 movdqu (%edx),%xmm5 + psrld $4,%xmm1 + pand %xmm6,%xmm0 .byte 102,15,56,0,208 movdqa 16(%ebp),%xmm0 +.byte 102,15,56,0,193 pxor %xmm5,%xmm2 - psrld $4,%xmm1 + pxor %xmm2,%xmm0 addl $16,%edx -.byte 102,15,56,0,193 leal 192(%ebp),%ebx - pxor %xmm2,%xmm0 jmp .L000enc_entry .align 16 .L001enc_loop: movdqa 32(%ebp),%xmm4 - movdqa 48(%ebp),%xmm0 .byte 102,15,56,0,226 -.byte 102,15,56,0,195 pxor %xmm5,%xmm4 - movdqa 64(%ebp),%xmm5 + movdqa 48(%ebp),%xmm0 +.byte 102,15,56,0,195 pxor %xmm4,%xmm0 - movdqa -64(%ebx,%ecx,1),%xmm1 + movdqa 64(%ebp),%xmm5 .byte 102,15,56,0,234 + movdqa -64(%ebx,%ecx,1),%xmm1 movdqa 80(%ebp),%xmm2 - movdqa (%ebx,%ecx,1),%xmm4 .byte 102,15,56,0,211 - movdqa %xmm0,%xmm3 pxor %xmm5,%xmm2 + movdqa (%ebx,%ecx,1),%xmm4 + movdqa %xmm0,%xmm3 .byte 102,15,56,0,193 addl $16,%edx pxor %xmm2,%xmm0 @@ -119,28 +119,28 @@ __vpaes_encrypt_core: pxor %xmm0,%xmm3 .byte 102,15,56,0,193 andl $48,%ecx - subl $1,%eax pxor %xmm3,%xmm0 + subl $1,%eax .L000enc_entry: movdqa %xmm6,%xmm1 - movdqa -32(%ebp),%xmm5 pandn %xmm0,%xmm1 psrld $4,%xmm1 pand %xmm6,%xmm0 + movdqa -32(%ebp),%xmm5 .byte 102,15,56,0,232 - movdqa %xmm7,%xmm3 pxor %xmm1,%xmm0 + movdqa %xmm7,%xmm3 .byte 102,15,56,0,217 - movdqa %xmm7,%xmm4 pxor %xmm5,%xmm3 + movdqa %xmm7,%xmm4 .byte 102,15,56,0,224 - movdqa %xmm7,%xmm2 pxor %xmm5,%xmm4 + movdqa %xmm7,%xmm2 .byte 102,15,56,0,211 - movdqa %xmm7,%xmm3 pxor %xmm0,%xmm2 -.byte 102,15,56,0,220 + movdqa %xmm7,%xmm3 movdqu (%edx),%xmm5 +.byte 102,15,56,0,220 pxor %xmm1,%xmm3 jnz .L001enc_loop movdqa 96(%ebp),%xmm4 @@ -155,8 +155,8 @@ __vpaes_encrypt_core: .def __vpaes_decrypt_core; .scl 3; .type 32; .endef .align 16 __vpaes_decrypt_core: - leal 608(%ebp),%ebx movl 240(%edx),%eax + leal 608(%ebp),%ebx movdqa %xmm6,%xmm1 movdqa -64(%ebx),%xmm2 pandn %xmm0,%xmm1 @@ -179,56 +179,56 @@ __vpaes_decrypt_core: .align 16 .L003dec_loop: movdqa -32(%ebx),%xmm4 - movdqa -16(%ebx),%xmm1 .byte 102,15,56,0,226 -.byte 102,15,56,0,203 + pxor %xmm0,%xmm4 + movdqa -16(%ebx),%xmm0 +.byte 102,15,56,0,195 pxor %xmm4,%xmm0 + addl $16,%edx +.byte 102,15,56,0,197 movdqa (%ebx),%xmm4 - pxor %xmm1,%xmm0 - movdqa 16(%ebx),%xmm1 .byte 102,15,56,0,226 -.byte 102,15,56,0,197 -.byte 102,15,56,0,203 + pxor %xmm0,%xmm4 + movdqa 16(%ebx),%xmm0 +.byte 102,15,56,0,195 pxor %xmm4,%xmm0 + subl $1,%eax +.byte 102,15,56,0,197 movdqa 32(%ebx),%xmm4 - pxor %xmm1,%xmm0 - movdqa 48(%ebx),%xmm1 .byte 102,15,56,0,226 -.byte 102,15,56,0,197 -.byte 102,15,56,0,203 + pxor %xmm0,%xmm4 + movdqa 48(%ebx),%xmm0 +.byte 102,15,56,0,195 pxor %xmm4,%xmm0 +.byte 102,15,56,0,197 movdqa 64(%ebx),%xmm4 - pxor %xmm1,%xmm0 - movdqa 80(%ebx),%xmm1 .byte 102,15,56,0,226 -.byte 102,15,56,0,197 -.byte 102,15,56,0,203 + pxor %xmm0,%xmm4 + movdqa 80(%ebx),%xmm0 +.byte 102,15,56,0,195 pxor %xmm4,%xmm0 - addl $16,%edx .byte 102,15,58,15,237,12 - pxor %xmm1,%xmm0 - subl $1,%eax .L002dec_entry: movdqa %xmm6,%xmm1 - movdqa -32(%ebp),%xmm2 pandn %xmm0,%xmm1 - pand %xmm6,%xmm0 psrld $4,%xmm1 + pand %xmm6,%xmm0 + movdqa -32(%ebp),%xmm2 .byte 102,15,56,0,208 - movdqa %xmm7,%xmm3 pxor %xmm1,%xmm0 + movdqa %xmm7,%xmm3 .byte 102,15,56,0,217 - movdqa %xmm7,%xmm4 pxor %xmm2,%xmm3 + movdqa %xmm7,%xmm4 .byte 102,15,56,0,224 pxor %xmm2,%xmm4 movdqa %xmm7,%xmm2 .byte 102,15,56,0,211 - movdqa %xmm7,%xmm3 pxor %xmm0,%xmm2 + movdqa %xmm7,%xmm3 .byte 102,15,56,0,220 - movdqu (%edx),%xmm0 pxor %xmm1,%xmm3 + movdqu (%edx),%xmm0 jnz .L003dec_loop movdqa 96(%ebx),%xmm4 .byte 102,15,56,0,226 @@ -335,12 +335,12 @@ __vpaes_schedule_core: .def __vpaes_schedule_192_smear; .scl 3; .type 32; .endef .align 16 __vpaes_schedule_192_smear: - pshufd $128,%xmm6,%xmm1 + pshufd $128,%xmm6,%xmm0 + pxor %xmm0,%xmm6 pshufd $254,%xmm7,%xmm0 - pxor %xmm1,%xmm6 - pxor %xmm1,%xmm1 pxor %xmm0,%xmm6 movdqa %xmm6,%xmm0 + pxor %xmm1,%xmm1 movhlps %xmm1,%xmm6 ret .def __vpaes_schedule_round; .scl 3; .type 32; .endef @@ -601,8 +601,6 @@ _vpaes_cbc_encrypt: movl 24(%esp),%edi movl 28(%esp),%eax movl 32(%esp),%edx - subl $16,%eax - jc .L020cbc_abort leal -56(%esp),%ebx movl 36(%esp),%ebp andl $-16,%ebx @@ -612,17 +610,18 @@ _vpaes_cbc_encrypt: subl %esi,%edi movl %ebx,48(%esp) movl %edi,(%esp) + subl $16,%eax movl %edx,4(%esp) movl %ebp,8(%esp) movl %eax,%edi - leal .L_vpaes_consts+0x30-.L021pic_point,%ebp + leal .L_vpaes_consts+0x30-.L020pic_point,%ebp call __vpaes_preheat -.L021pic_point: +.L020pic_point: cmpl $0,%ecx - je .L022cbc_dec_loop - jmp .L023cbc_enc_loop + je .L021cbc_dec_loop + jmp .L022cbc_enc_loop .align 16 -.L023cbc_enc_loop: +.L022cbc_enc_loop: movdqu (%esi),%xmm0 pxor %xmm1,%xmm0 call __vpaes_encrypt_core @@ -632,10 +631,10 @@ _vpaes_cbc_encrypt: movdqu %xmm0,(%ebx,%esi,1) leal 16(%esi),%esi subl $16,%edi - jnc .L023cbc_enc_loop - jmp .L024cbc_done + jnc .L022cbc_enc_loop + jmp .L023cbc_done .align 16 -.L022cbc_dec_loop: +.L021cbc_dec_loop: movdqu (%esi),%xmm0 movdqa %xmm1,16(%esp) movdqa %xmm0,32(%esp) @@ -647,16 +646,14 @@ _vpaes_cbc_encrypt: movdqu %xmm0,(%ebx,%esi,1) leal 16(%esi),%esi subl $16,%edi - jnc .L022cbc_dec_loop -.L024cbc_done: + jnc .L021cbc_dec_loop +.L023cbc_done: movl 8(%esp),%ebx movl 48(%esp),%esp movdqu %xmm1,(%ebx) -.L020cbc_abort: popl %edi popl %esi popl %ebx popl %ebp ret -.section .note.GNU-stack,"",%progbits diff --git a/lib/accelerated/x86/coff/aes-ssse3-x86_64.s b/lib/accelerated/x86/coff/aes-ssse3-x86_64.s index 71ec4a5f40..1a6cf83232 100644 --- a/lib/accelerated/x86/coff/aes-ssse3-x86_64.s +++ b/lib/accelerated/x86/coff/aes-ssse3-x86_64.s @@ -43,8 +43,8 @@ _vpaes_encrypt_core: movdqa .Lk_ipt+16(%rip),%xmm0 .byte 102,15,56,0,193 pxor %xmm5,%xmm2 - addq $16,%r9 pxor %xmm2,%xmm0 + addq $16,%r9 leaq .Lk_mc_backward(%rip),%r10 jmp .Lenc_entry @@ -52,19 +52,19 @@ _vpaes_encrypt_core: .Lenc_loop: movdqa %xmm13,%xmm4 - movdqa %xmm12,%xmm0 .byte 102,15,56,0,226 -.byte 102,15,56,0,195 pxor %xmm5,%xmm4 - movdqa %xmm15,%xmm5 + movdqa %xmm12,%xmm0 +.byte 102,15,56,0,195 pxor %xmm4,%xmm0 - movdqa -64(%r11,%r10,1),%xmm1 + movdqa %xmm15,%xmm5 .byte 102,15,56,0,234 - movdqa (%r11,%r10,1),%xmm4 + movdqa -64(%r11,%r10,1),%xmm1 movdqa %xmm14,%xmm2 .byte 102,15,56,0,211 - movdqa %xmm0,%xmm3 pxor %xmm5,%xmm2 + movdqa (%r11,%r10,1),%xmm4 + movdqa %xmm0,%xmm3 .byte 102,15,56,0,193 addq $16,%r9 pxor %xmm2,%xmm0 @@ -73,30 +73,30 @@ _vpaes_encrypt_core: pxor %xmm0,%xmm3 .byte 102,15,56,0,193 andq $48,%r11 - subq $1,%rax pxor %xmm3,%xmm0 + subq $1,%rax .Lenc_entry: movdqa %xmm9,%xmm1 - movdqa %xmm11,%xmm5 pandn %xmm0,%xmm1 psrld $4,%xmm1 pand %xmm9,%xmm0 + movdqa %xmm11,%xmm5 .byte 102,15,56,0,232 - movdqa %xmm10,%xmm3 pxor %xmm1,%xmm0 + movdqa %xmm10,%xmm3 .byte 102,15,56,0,217 - movdqa %xmm10,%xmm4 pxor %xmm5,%xmm3 + movdqa %xmm10,%xmm4 .byte 102,15,56,0,224 - movdqa %xmm10,%xmm2 pxor %xmm5,%xmm4 + movdqa %xmm10,%xmm2 .byte 102,15,56,0,211 - movdqa %xmm10,%xmm3 pxor %xmm0,%xmm2 -.byte 102,15,56,0,220 + movdqa %xmm10,%xmm3 movdqu (%r9),%xmm5 +.byte 102,15,56,0,220 pxor %xmm1,%xmm3 jnz .Lenc_loop @@ -149,61 +149,62 @@ _vpaes_decrypt_core: movdqa -32(%r10),%xmm4 - movdqa -16(%r10),%xmm1 .byte 102,15,56,0,226 -.byte 102,15,56,0,203 + pxor %xmm0,%xmm4 + movdqa -16(%r10),%xmm0 +.byte 102,15,56,0,195 pxor %xmm4,%xmm0 - movdqa 0(%r10),%xmm4 - pxor %xmm1,%xmm0 - movdqa 16(%r10),%xmm1 + addq $16,%r9 -.byte 102,15,56,0,226 .byte 102,15,56,0,197 -.byte 102,15,56,0,203 + movdqa 0(%r10),%xmm4 +.byte 102,15,56,0,226 + pxor %xmm0,%xmm4 + movdqa 16(%r10),%xmm0 +.byte 102,15,56,0,195 pxor %xmm4,%xmm0 - movdqa 32(%r10),%xmm4 - pxor %xmm1,%xmm0 - movdqa 48(%r10),%xmm1 + subq $1,%rax -.byte 102,15,56,0,226 .byte 102,15,56,0,197 -.byte 102,15,56,0,203 + movdqa 32(%r10),%xmm4 +.byte 102,15,56,0,226 + pxor %xmm0,%xmm4 + movdqa 48(%r10),%xmm0 +.byte 102,15,56,0,195 pxor %xmm4,%xmm0 - movdqa 64(%r10),%xmm4 - pxor %xmm1,%xmm0 - movdqa 80(%r10),%xmm1 -.byte 102,15,56,0,226 .byte 102,15,56,0,197 -.byte 102,15,56,0,203 + movdqa 64(%r10),%xmm4 +.byte 102,15,56,0,226 + pxor %xmm0,%xmm4 + movdqa 80(%r10),%xmm0 +.byte 102,15,56,0,195 pxor %xmm4,%xmm0 - addq $16,%r9 + .byte 102,15,58,15,237,12 - pxor %xmm1,%xmm0 - subq $1,%rax .Ldec_entry: movdqa %xmm9,%xmm1 pandn %xmm0,%xmm1 - movdqa %xmm11,%xmm2 psrld $4,%xmm1 pand %xmm9,%xmm0 + movdqa %xmm11,%xmm2 .byte 102,15,56,0,208 - movdqa %xmm10,%xmm3 pxor %xmm1,%xmm0 + movdqa %xmm10,%xmm3 .byte 102,15,56,0,217 - movdqa %xmm10,%xmm4 pxor %xmm2,%xmm3 + movdqa %xmm10,%xmm4 .byte 102,15,56,0,224 pxor %xmm2,%xmm4 movdqa %xmm10,%xmm2 .byte 102,15,56,0,211 - movdqa %xmm10,%xmm3 pxor %xmm0,%xmm2 + movdqa %xmm10,%xmm3 .byte 102,15,56,0,220 - movdqu (%r9),%xmm0 pxor %xmm1,%xmm3 + movdqu (%r9),%xmm0 jnz .Ldec_loop @@ -211,7 +212,7 @@ _vpaes_decrypt_core: .byte 102,15,56,0,226 pxor %xmm0,%xmm4 movdqa 112(%r10),%xmm0 - movdqa -352(%r11),%xmm2 + movdqa .Lk_sr-.Lk_dsbd(%r11),%xmm2 .byte 102,15,56,0,195 pxor %xmm4,%xmm0 .byte 102,15,56,0,194 @@ -231,7 +232,7 @@ _vpaes_schedule_core: - call _vpaes_preheat + call _vpaes_preheat movdqa .Lk_rcon(%rip),%xmm8 movdqu (%rdi),%xmm0 @@ -277,7 +278,7 @@ _vpaes_schedule_core: call _vpaes_schedule_round decq %rsi jz .Lschedule_mangle_last - call _vpaes_schedule_mangle + call _vpaes_schedule_mangle jmp .Loop_schedule_128 @@ -298,7 +299,7 @@ _vpaes_schedule_core: .p2align 4 .Lschedule_192: movdqu 8(%rdi),%xmm0 - call _vpaes_schedule_transform + call _vpaes_schedule_transform movdqa %xmm0,%xmm6 pxor %xmm4,%xmm4 movhlps %xmm4,%xmm6 @@ -307,13 +308,13 @@ _vpaes_schedule_core: .Loop_schedule_192: call _vpaes_schedule_round .byte 102,15,58,15,198,8 - call _vpaes_schedule_mangle + call _vpaes_schedule_mangle call _vpaes_schedule_192_smear - call _vpaes_schedule_mangle + call _vpaes_schedule_mangle call _vpaes_schedule_round decq %rsi jz .Lschedule_mangle_last - call _vpaes_schedule_mangle + call _vpaes_schedule_mangle call _vpaes_schedule_192_smear jmp .Loop_schedule_192 @@ -330,18 +331,18 @@ _vpaes_schedule_core: .p2align 4 .Lschedule_256: movdqu 16(%rdi),%xmm0 - call _vpaes_schedule_transform + call _vpaes_schedule_transform movl $7,%esi .Loop_schedule_256: - call _vpaes_schedule_mangle + call _vpaes_schedule_mangle movdqa %xmm0,%xmm6 call _vpaes_schedule_round decq %rsi jz .Lschedule_mangle_last - call _vpaes_schedule_mangle + call _vpaes_schedule_mangle pshufd $255,%xmm0,%xmm0 @@ -379,7 +380,7 @@ _vpaes_schedule_core: .Lschedule_mangle_last_dec: addq $-16,%rdx pxor .Lk_s63(%rip),%xmm0 - call _vpaes_schedule_transform + call _vpaes_schedule_transform movdqu %xmm0,(%rdx) @@ -411,12 +412,12 @@ _vpaes_schedule_core: .def _vpaes_schedule_192_smear; .scl 3; .type 32; .endef .p2align 4 _vpaes_schedule_192_smear: - pshufd $128,%xmm6,%xmm1 + pshufd $128,%xmm6,%xmm0 + pxor %xmm0,%xmm6 pshufd $254,%xmm7,%xmm0 - pxor %xmm1,%xmm6 - pxor %xmm1,%xmm1 pxor %xmm0,%xmm6 movdqa %xmm6,%xmm0 + pxor %xmm1,%xmm1 movhlps %xmm1,%xmm6 .byte 0xf3,0xc3 @@ -826,8 +827,6 @@ vpaes_cbc_encrypt: movq 48(%rsp),%r9 xchgq %rcx,%rdx - subq $16,%rcx - jc .Lcbc_abort leaq -184(%rsp),%rsp movaps %xmm6,16(%rsp) movaps %xmm7,32(%rsp) @@ -842,6 +841,7 @@ vpaes_cbc_encrypt: .Lcbc_body: movdqu (%r8),%xmm6 subq %rdi,%rsi + subq $16,%rcx call _vpaes_preheat cmpl $0,%r9d je .Lcbc_dec_loop @@ -882,7 +882,6 @@ vpaes_cbc_encrypt: movaps 160(%rsp),%xmm15 leaq 184(%rsp),%rsp .Lcbc_epilogue: -.Lcbc_abort: movq 8(%rsp),%rdi movq 16(%rsp),%rsi .byte 0xf3,0xc3 @@ -1046,7 +1045,7 @@ se_handler: leaq 16(%rax),%rsi leaq 512(%r8),%rdi movl $20,%ecx -.long 0xa548f3fc +.long 0xa548f3fc leaq 184(%rax),%rax .Lin_prologue: @@ -1059,7 +1058,7 @@ se_handler: movq 40(%r9),%rdi movq %r8,%rsi movl $154,%ecx -.long 0xa548f3fc +.long 0xa548f3fc movq %r9,%rsi xorq %rcx,%rcx @@ -1116,22 +1115,21 @@ se_handler: .LSEH_info_vpaes_set_encrypt_key: .byte 9,0,0,0 .rva se_handler -.rva .Lenc_key_body,.Lenc_key_epilogue +.rva .Lenc_key_body,.Lenc_key_epilogue .LSEH_info_vpaes_set_decrypt_key: .byte 9,0,0,0 .rva se_handler -.rva .Ldec_key_body,.Ldec_key_epilogue +.rva .Ldec_key_body,.Ldec_key_epilogue .LSEH_info_vpaes_encrypt: .byte 9,0,0,0 .rva se_handler -.rva .Lenc_body,.Lenc_epilogue +.rva .Lenc_body,.Lenc_epilogue .LSEH_info_vpaes_decrypt: .byte 9,0,0,0 .rva se_handler -.rva .Ldec_body,.Ldec_epilogue +.rva .Ldec_body,.Ldec_epilogue .LSEH_info_vpaes_cbc_encrypt: .byte 9,0,0,0 .rva se_handler -.rva .Lcbc_body,.Lcbc_epilogue +.rva .Lcbc_body,.Lcbc_epilogue -.section .note.GNU-stack,"",%progbits diff --git a/lib/accelerated/x86/coff/aesni-x86.s b/lib/accelerated/x86/coff/aesni-x86.s index 9c982a20b5..502be77883 100644 --- a/lib/accelerated/x86/coff/aesni-x86.s +++ b/lib/accelerated/x86/coff/aesni-x86.s @@ -2163,4 +2163,3 @@ _aesni_set_decrypt_key: .byte 32,98,121,32,60,97,112,112,114,111,64,111,112,101,110,115 .byte 115,108,46,111,114,103,62,0 -.section .note.GNU-stack,"",%progbits diff --git a/lib/accelerated/x86/coff/aesni-x86_64.s b/lib/accelerated/x86/coff/aesni-x86_64.s index f8abd317aa..8751ccba8f 100644 --- a/lib/accelerated/x86/coff/aesni-x86_64.s +++ b/lib/accelerated/x86/coff/aesni-x86_64.s @@ -53,7 +53,7 @@ aesni_encrypt: decl %eax movups (%r8),%xmm1 leaq 16(%r8),%r8 - jnz .Loop_enc1_1 + jnz .Loop_enc1_1 .byte 102,15,56,221,209 movups %xmm2,(%rdx) .byte 0xf3,0xc3 @@ -74,7 +74,7 @@ aesni_decrypt: decl %eax movups (%r8),%xmm1 leaq 16(%r8),%r8 - jnz .Loop_dec1_2 + jnz .Loop_dec1_2 .byte 102,15,56,223,209 movups %xmm2,(%rdx) .byte 0xf3,0xc3 @@ -593,7 +593,7 @@ aesni_ecb_encrypt: decl %eax movups (%rcx),%xmm1 leaq 16(%rcx),%rcx - jnz .Loop_enc1_3 + jnz .Loop_enc1_3 .byte 102,15,56,221,209 movups %xmm2,(%rsi) jmp .Lecb_ret @@ -738,7 +738,7 @@ aesni_ecb_encrypt: decl %eax movups (%rcx),%xmm1 leaq 16(%rcx),%rcx - jnz .Loop_dec1_4 + jnz .Loop_dec1_4 .byte 102,15,56,223,209 movups %xmm2,(%rsi) jmp .Lecb_ret @@ -911,7 +911,7 @@ aesni_ccm64_decrypt_blocks: decl %eax movups (%rcx),%xmm1 leaq 16(%rcx),%rcx - jnz .Loop_enc1_5 + jnz .Loop_enc1_5 .byte 102,15,56,221,209 movups (%rdi),%xmm8 paddq %xmm6,%xmm9 @@ -970,7 +970,7 @@ aesni_ccm64_decrypt_blocks: decl %eax movups (%r11),%xmm1 leaq 16(%r11),%r11 - jnz .Loop_enc1_6 + jnz .Loop_enc1_6 .byte 102,15,56,221,217 movups %xmm3,(%r9) movaps (%rsp),%xmm6 @@ -997,423 +997,211 @@ aesni_ctr32_encrypt_blocks: movq %r9,%rcx movq 40(%rsp),%r8 - leaq (%rsp),%rax - pushq %rbp - subq $288,%rsp - andq $-16,%rsp - movaps %xmm6,-168(%rax) - movaps %xmm7,-152(%rax) - movaps %xmm8,-136(%rax) - movaps %xmm9,-120(%rax) - movaps %xmm10,-104(%rax) - movaps %xmm11,-88(%rax) - movaps %xmm12,-72(%rax) - movaps %xmm13,-56(%rax) - movaps %xmm14,-40(%rax) - movaps %xmm15,-24(%rax) + leaq -200(%rsp),%rsp + movaps %xmm6,32(%rsp) + movaps %xmm7,48(%rsp) + movaps %xmm8,64(%rsp) + movaps %xmm9,80(%rsp) + movaps %xmm10,96(%rsp) + movaps %xmm11,112(%rsp) + movaps %xmm12,128(%rsp) + movaps %xmm13,144(%rsp) + movaps %xmm14,160(%rsp) + movaps %xmm15,176(%rsp) .Lctr32_body: - leaq -8(%rax),%rbp - cmpq $1,%rdx je .Lctr32_one_shortcut - movdqu (%r8),%xmm2 - movdqu (%rcx),%xmm0 - movl 12(%r8),%r8d - pxor %xmm0,%xmm2 - movl 12(%rcx),%r11d - movdqa %xmm2,0(%rsp) - bswapl %r8d - movdqa %xmm2,%xmm3 - movdqa %xmm2,%xmm4 - movdqa %xmm2,%xmm5 - movdqa %xmm2,64(%rsp) - movdqa %xmm2,80(%rsp) - movdqa %xmm2,96(%rsp) - movdqa %xmm2,112(%rsp) + movdqu (%r8),%xmm14 + movdqa .Lbswap_mask(%rip),%xmm15 + xorl %eax,%eax +.byte 102,69,15,58,22,242,3 +.byte 102,68,15,58,34,240,3 movl 240(%rcx),%eax - - leaq 1(%r8),%r9 - leaq 2(%r8),%r10 - bswapl %r9d - bswapl %r10d - xorl %r11d,%r9d - xorl %r11d,%r10d -.byte 102,65,15,58,34,217,3 - leaq 3(%r8),%r9 - movdqa %xmm3,16(%rsp) -.byte 102,65,15,58,34,226,3 - bswapl %r9d - leaq 4(%r8),%r10 - movdqa %xmm4,32(%rsp) - xorl %r11d,%r9d - bswapl %r10d -.byte 102,65,15,58,34,233,3 - xorl %r11d,%r10d - movdqa %xmm5,48(%rsp) - leaq 5(%r8),%r9 - movl %r10d,64+12(%rsp) - bswapl %r9d - leaq 6(%r8),%r10 - xorl %r11d,%r9d bswapl %r10d - movl %r9d,80+12(%rsp) - xorl %r11d,%r10d - leaq 7(%r8),%r9 - movl %r10d,96+12(%rsp) - bswapl %r9d - xorl %r11d,%r9d - movl %r9d,112+12(%rsp) + pxor %xmm12,%xmm12 + pxor %xmm13,%xmm13 +.byte 102,69,15,58,34,226,0 + leaq 3(%r10),%r11 +.byte 102,69,15,58,34,235,0 + incl %r10d +.byte 102,69,15,58,34,226,1 + incq %r11 +.byte 102,69,15,58,34,235,1 + incl %r10d +.byte 102,69,15,58,34,226,2 + incq %r11 +.byte 102,69,15,58,34,235,2 + movdqa %xmm12,0(%rsp) +.byte 102,69,15,56,0,231 + movdqa %xmm13,16(%rsp) +.byte 102,69,15,56,0,239 + + pshufd $192,%xmm12,%xmm2 + pshufd $128,%xmm12,%xmm3 + pshufd $64,%xmm12,%xmm4 + cmpq $6,%rdx + jb .Lctr32_tail + shrl $1,%eax + movq %rcx,%r11 + movl %eax,%r10d + subq $6,%rdx + jmp .Lctr32_loop6 - movups 16(%rcx),%xmm1 +.p2align 4 +.Lctr32_loop6: + pshufd $192,%xmm13,%xmm5 + por %xmm14,%xmm2 + movups (%r11),%xmm0 + pshufd $128,%xmm13,%xmm6 + por %xmm14,%xmm3 + movups 16(%r11),%xmm1 + pshufd $64,%xmm13,%xmm7 + por %xmm14,%xmm4 + por %xmm14,%xmm5 + xorps %xmm0,%xmm2 + por %xmm14,%xmm6 + por %xmm14,%xmm7 - movdqa 64(%rsp),%xmm6 - movdqa 80(%rsp),%xmm7 - cmpq $8,%rdx - jb .Lctr32_tail - leaq 128(%rcx),%rcx - subq $8,%rdx - jmp .Lctr32_loop8 -.p2align 5 -.Lctr32_loop8: - addl $8,%r8d - movdqa 96(%rsp),%xmm8 -.byte 102,15,56,220,209 - movl %r8d,%r9d - movdqa 112(%rsp),%xmm9 -.byte 102,15,56,220,217 - bswapl %r9d - movups 32-128(%rcx),%xmm0 -.byte 102,15,56,220,225 - xorl %r11d,%r9d -.byte 102,15,56,220,233 - movl %r9d,0+12(%rsp) - leaq 1(%r8),%r9 -.byte 102,15,56,220,241 -.byte 102,15,56,220,249 -.byte 102,68,15,56,220,193 -.byte 102,68,15,56,220,201 - movups 48-128(%rcx),%xmm1 -.byte 102,15,56,220,208 -.byte 102,15,56,220,216 - bswapl %r9d -.byte 102,15,56,220,224 - xorl %r11d,%r9d -.byte 102,15,56,220,232 - movl %r9d,16+12(%rsp) - leaq 2(%r8),%r9 -.byte 102,15,56,220,240 -.byte 102,15,56,220,248 -.byte 102,68,15,56,220,192 -.byte 102,68,15,56,220,200 - movups 64-128(%rcx),%xmm0 -.byte 102,15,56,220,209 -.byte 102,15,56,220,217 - bswapl %r9d -.byte 102,15,56,220,225 - xorl %r11d,%r9d -.byte 102,15,56,220,233 - movl %r9d,32+12(%rsp) - leaq 3(%r8),%r9 -.byte 102,15,56,220,241 -.byte 102,15,56,220,249 -.byte 102,68,15,56,220,193 -.byte 102,68,15,56,220,201 - movups 80-128(%rcx),%xmm1 -.byte 102,15,56,220,208 -.byte 102,15,56,220,216 - bswapl %r9d -.byte 102,15,56,220,224 - xorl %r11d,%r9d -.byte 102,15,56,220,232 - movl %r9d,48+12(%rsp) - leaq 4(%r8),%r9 -.byte 102,15,56,220,240 -.byte 102,15,56,220,248 -.byte 102,68,15,56,220,192 -.byte 102,68,15,56,220,200 - movups 96-128(%rcx),%xmm0 -.byte 102,15,56,220,209 -.byte 102,15,56,220,217 - bswapl %r9d -.byte 102,15,56,220,225 - xorl %r11d,%r9d -.byte 102,15,56,220,233 - movl %r9d,64+12(%rsp) - leaq 5(%r8),%r9 -.byte 102,15,56,220,241 -.byte 102,15,56,220,249 -.byte 102,68,15,56,220,193 -.byte 102,68,15,56,220,201 - movups 112-128(%rcx),%xmm1 -.byte 102,15,56,220,208 -.byte 102,15,56,220,216 - bswapl %r9d -.byte 102,15,56,220,224 - xorl %r11d,%r9d -.byte 102,15,56,220,232 - movl %r9d,80+12(%rsp) - leaq 6(%r8),%r9 -.byte 102,15,56,220,240 -.byte 102,15,56,220,248 -.byte 102,68,15,56,220,192 -.byte 102,68,15,56,220,200 - movups 128-128(%rcx),%xmm0 + pxor %xmm0,%xmm3 .byte 102,15,56,220,209 + leaq 32(%r11),%rcx + pxor %xmm0,%xmm4 .byte 102,15,56,220,217 - bswapl %r9d + movdqa .Lincrement32(%rip),%xmm13 + pxor %xmm0,%xmm5 .byte 102,15,56,220,225 - xorl %r11d,%r9d + movdqa 0(%rsp),%xmm12 + pxor %xmm0,%xmm6 .byte 102,15,56,220,233 - movl %r9d,96+12(%rsp) - leaq 7(%r8),%r9 + pxor %xmm0,%xmm7 + movups (%rcx),%xmm0 + decl %eax .byte 102,15,56,220,241 .byte 102,15,56,220,249 -.byte 102,68,15,56,220,193 -.byte 102,68,15,56,220,201 - movups 144-128(%rcx),%xmm1 -.byte 102,15,56,220,208 -.byte 102,15,56,220,216 - bswapl %r9d -.byte 102,15,56,220,224 - xorl %r11d,%r9d -.byte 102,15,56,220,232 - movl %r9d,112+12(%rsp) -.byte 102,15,56,220,240 -.byte 102,15,56,220,248 -.byte 102,68,15,56,220,192 - movdqu 0(%rdi),%xmm10 -.byte 102,68,15,56,220,200 - movups 160-128(%rcx),%xmm0 - - cmpl $11,%eax - jb .Lctr32_enc_done - + jmp .Lctr32_enc_loop6_enter +.p2align 4 +.Lctr32_enc_loop6: .byte 102,15,56,220,209 .byte 102,15,56,220,217 + decl %eax .byte 102,15,56,220,225 .byte 102,15,56,220,233 .byte 102,15,56,220,241 .byte 102,15,56,220,249 -.byte 102,68,15,56,220,193 -.byte 102,68,15,56,220,201 - movups 176-128(%rcx),%xmm1 - +.Lctr32_enc_loop6_enter: + movups 16(%rcx),%xmm1 .byte 102,15,56,220,208 .byte 102,15,56,220,216 + leaq 32(%rcx),%rcx .byte 102,15,56,220,224 .byte 102,15,56,220,232 .byte 102,15,56,220,240 .byte 102,15,56,220,248 -.byte 102,68,15,56,220,192 -.byte 102,68,15,56,220,200 - movups 192-128(%rcx),%xmm0 - je .Lctr32_enc_done - -.byte 102,15,56,220,209 -.byte 102,15,56,220,217 -.byte 102,15,56,220,225 -.byte 102,15,56,220,233 -.byte 102,15,56,220,241 -.byte 102,15,56,220,249 -.byte 102,68,15,56,220,193 -.byte 102,68,15,56,220,201 - movups 208-128(%rcx),%xmm1 + movups (%rcx),%xmm0 + jnz .Lctr32_enc_loop6 -.byte 102,15,56,220,208 -.byte 102,15,56,220,216 -.byte 102,15,56,220,224 -.byte 102,15,56,220,232 -.byte 102,15,56,220,240 -.byte 102,15,56,220,248 -.byte 102,68,15,56,220,192 -.byte 102,68,15,56,220,200 - movups 224-128(%rcx),%xmm0 - -.Lctr32_enc_done: - movdqu 16(%rdi),%xmm11 - pxor %xmm0,%xmm10 - movdqu 32(%rdi),%xmm12 - pxor %xmm0,%xmm11 - movdqu 48(%rdi),%xmm13 - pxor %xmm0,%xmm12 - movdqu 64(%rdi),%xmm14 - pxor %xmm0,%xmm13 - movdqu 80(%rdi),%xmm15 - pxor %xmm0,%xmm14 .byte 102,15,56,220,209 - pxor %xmm0,%xmm15 + paddd %xmm13,%xmm12 .byte 102,15,56,220,217 + paddd 16(%rsp),%xmm13 .byte 102,15,56,220,225 + movdqa %xmm12,0(%rsp) .byte 102,15,56,220,233 + movdqa %xmm13,16(%rsp) .byte 102,15,56,220,241 +.byte 102,69,15,56,0,231 .byte 102,15,56,220,249 -.byte 102,68,15,56,220,193 -.byte 102,68,15,56,220,201 - movdqu 96(%rdi),%xmm1 +.byte 102,69,15,56,0,239 -.byte 102,65,15,56,221,210 - pxor %xmm0,%xmm1 - movdqu 112(%rdi),%xmm10 - leaq 128(%rdi),%rdi -.byte 102,65,15,56,221,219 - pxor %xmm0,%xmm10 - movdqa 0(%rsp),%xmm11 -.byte 102,65,15,56,221,228 - movdqa 16(%rsp),%xmm12 -.byte 102,65,15,56,221,237 - movdqa 32(%rsp),%xmm13 -.byte 102,65,15,56,221,246 - movdqa 48(%rsp),%xmm14 -.byte 102,65,15,56,221,255 - movdqa 64(%rsp),%xmm15 -.byte 102,68,15,56,221,193 - movdqa 80(%rsp),%xmm0 -.byte 102,69,15,56,221,202 - movups 16-128(%rcx),%xmm1 - - movups %xmm2,(%rsi) - movdqa %xmm11,%xmm2 - movups %xmm3,16(%rsi) - movdqa %xmm12,%xmm3 - movups %xmm4,32(%rsi) - movdqa %xmm13,%xmm4 - movups %xmm5,48(%rsi) - movdqa %xmm14,%xmm5 - movups %xmm6,64(%rsi) - movdqa %xmm15,%xmm6 - movups %xmm7,80(%rsi) - movdqa %xmm0,%xmm7 - movups %xmm8,96(%rsi) - movups %xmm9,112(%rsi) - leaq 128(%rsi),%rsi +.byte 102,15,56,221,208 + movups (%rdi),%xmm8 +.byte 102,15,56,221,216 + movups 16(%rdi),%xmm9 +.byte 102,15,56,221,224 + movups 32(%rdi),%xmm10 +.byte 102,15,56,221,232 + movups 48(%rdi),%xmm11 +.byte 102,15,56,221,240 + movups 64(%rdi),%xmm1 +.byte 102,15,56,221,248 + movups 80(%rdi),%xmm0 + leaq 96(%rdi),%rdi - subq $8,%rdx - jnc .Lctr32_loop8 + xorps %xmm2,%xmm8 + pshufd $192,%xmm12,%xmm2 + xorps %xmm3,%xmm9 + pshufd $128,%xmm12,%xmm3 + movups %xmm8,(%rsi) + xorps %xmm4,%xmm10 + pshufd $64,%xmm12,%xmm4 + movups %xmm9,16(%rsi) + xorps %xmm5,%xmm11 + movups %xmm10,32(%rsi) + xorps %xmm6,%xmm1 + movups %xmm11,48(%rsi) + xorps %xmm7,%xmm0 + movups %xmm1,64(%rsi) + movups %xmm0,80(%rsi) + leaq 96(%rsi),%rsi + movl %r10d,%eax + subq $6,%rdx + jnc .Lctr32_loop6 - addq $8,%rdx + addq $6,%rdx jz .Lctr32_done - leaq -128(%rcx),%rcx + movq %r11,%rcx + leal 1(%rax,%rax,1),%eax .Lctr32_tail: - leaq 16(%rcx),%rcx - cmpq $4,%rdx - jb .Lctr32_loop3 - je .Lctr32_loop4 - - movdqa 96(%rsp),%xmm8 - pxor %xmm9,%xmm9 - - movups 16(%rcx),%xmm0 -.byte 102,15,56,220,209 - leaq 16(%rcx),%rcx -.byte 102,15,56,220,217 - shrl $1,%eax -.byte 102,15,56,220,225 - decl %eax -.byte 102,15,56,220,233 - movups (%rdi),%xmm10 -.byte 102,15,56,220,241 - movups 16(%rdi),%xmm11 -.byte 102,15,56,220,249 - movups 32(%rdi),%xmm12 -.byte 102,68,15,56,220,193 - movups 16(%rcx),%xmm1 - - call .Lenc_loop8_enter - - movdqu 48(%rdi),%xmm13 - pxor %xmm10,%xmm2 - movdqu 64(%rdi),%xmm10 - pxor %xmm11,%xmm3 - movdqu %xmm2,(%rsi) - pxor %xmm12,%xmm4 - movdqu %xmm3,16(%rsi) - pxor %xmm13,%xmm5 - movdqu %xmm4,32(%rsi) - pxor %xmm10,%xmm6 - movdqu %xmm5,48(%rsi) - movdqu %xmm6,64(%rsi) - cmpq $6,%rdx - jb .Lctr32_done - - movups 80(%rdi),%xmm11 - xorps %xmm11,%xmm7 - movups %xmm7,80(%rsi) - je .Lctr32_done + por %xmm14,%xmm2 + movups (%rdi),%xmm8 + cmpq $2,%rdx + jb .Lctr32_one - movups 96(%rdi),%xmm12 - xorps %xmm12,%xmm8 - movups %xmm8,96(%rsi) - jmp .Lctr32_done + por %xmm14,%xmm3 + movups 16(%rdi),%xmm9 + je .Lctr32_two -.p2align 5 -.Lctr32_loop4: -.byte 102,15,56,220,209 - leaq 16(%rcx),%rcx -.byte 102,15,56,220,217 -.byte 102,15,56,220,225 -.byte 102,15,56,220,233 - movups (%rcx),%xmm1 - decl %eax - jnz .Lctr32_loop4 -.byte 102,15,56,221,209 - movups (%rdi),%xmm10 -.byte 102,15,56,221,217 - movups 16(%rdi),%xmm11 -.byte 102,15,56,221,225 - movups 32(%rdi),%xmm12 -.byte 102,15,56,221,233 - movups 48(%rdi),%xmm13 - - xorps %xmm10,%xmm2 - movups %xmm2,(%rsi) - xorps %xmm11,%xmm3 - movups %xmm3,16(%rsi) - pxor %xmm12,%xmm4 - movdqu %xmm4,32(%rsi) - pxor %xmm13,%xmm5 - movdqu %xmm5,48(%rsi) - jmp .Lctr32_done + pshufd $192,%xmm13,%xmm5 + por %xmm14,%xmm4 + movups 32(%rdi),%xmm10 + cmpq $4,%rdx + jb .Lctr32_three -.p2align 5 -.Lctr32_loop3: -.byte 102,15,56,220,209 - leaq 16(%rcx),%rcx -.byte 102,15,56,220,217 -.byte 102,15,56,220,225 - movups (%rcx),%xmm1 - decl %eax - jnz .Lctr32_loop3 -.byte 102,15,56,221,209 -.byte 102,15,56,221,217 -.byte 102,15,56,221,225 + pshufd $128,%xmm13,%xmm6 + por %xmm14,%xmm5 + movups 48(%rdi),%xmm11 + je .Lctr32_four - movups (%rdi),%xmm10 - xorps %xmm10,%xmm2 - movups %xmm2,(%rsi) - cmpq $2,%rdx - jb .Lctr32_done + por %xmm14,%xmm6 + xorps %xmm7,%xmm7 - movups 16(%rdi),%xmm11 - xorps %xmm11,%xmm3 - movups %xmm3,16(%rsi) - je .Lctr32_done + call _aesni_encrypt6 - movups 32(%rdi),%xmm12 - xorps %xmm12,%xmm4 - movups %xmm4,32(%rsi) + movups 64(%rdi),%xmm1 + xorps %xmm2,%xmm8 + xorps %xmm3,%xmm9 + movups %xmm8,(%rsi) + xorps %xmm4,%xmm10 + movups %xmm9,16(%rsi) + xorps %xmm5,%xmm11 + movups %xmm10,32(%rsi) + xorps %xmm6,%xmm1 + movups %xmm11,48(%rsi) + movups %xmm1,64(%rsi) jmp .Lctr32_done .p2align 4 .Lctr32_one_shortcut: movups (%r8),%xmm2 - movups (%rdi),%xmm10 + movups (%rdi),%xmm8 movl 240(%rcx),%eax +.Lctr32_one: movups (%rcx),%xmm0 movups 16(%rcx),%xmm1 leaq 32(%rcx),%rcx @@ -1423,27 +1211,58 @@ aesni_ctr32_encrypt_blocks: decl %eax movups (%rcx),%xmm1 leaq 16(%rcx),%rcx - jnz .Loop_enc1_7 + jnz .Loop_enc1_7 .byte 102,15,56,221,209 - xorps %xmm10,%xmm2 - movups %xmm2,(%rsi) + xorps %xmm2,%xmm8 + movups %xmm8,(%rsi) + jmp .Lctr32_done + +.p2align 4 +.Lctr32_two: + xorps %xmm4,%xmm4 + call _aesni_encrypt3 + xorps %xmm2,%xmm8 + xorps %xmm3,%xmm9 + movups %xmm8,(%rsi) + movups %xmm9,16(%rsi) + jmp .Lctr32_done + +.p2align 4 +.Lctr32_three: + call _aesni_encrypt3 + xorps %xmm2,%xmm8 + xorps %xmm3,%xmm9 + movups %xmm8,(%rsi) + xorps %xmm4,%xmm10 + movups %xmm9,16(%rsi) + movups %xmm10,32(%rsi) jmp .Lctr32_done .p2align 4 +.Lctr32_four: + call _aesni_encrypt4 + xorps %xmm2,%xmm8 + xorps %xmm3,%xmm9 + movups %xmm8,(%rsi) + xorps %xmm4,%xmm10 + movups %xmm9,16(%rsi) + xorps %xmm5,%xmm11 + movups %xmm10,32(%rsi) + movups %xmm11,48(%rsi) + .Lctr32_done: - movaps -160(%rbp),%xmm6 - movaps -144(%rbp),%xmm7 - movaps -128(%rbp),%xmm8 - movaps -112(%rbp),%xmm9 - movaps -96(%rbp),%xmm10 - movaps -80(%rbp),%xmm11 - movaps -64(%rbp),%xmm12 - movaps -48(%rbp),%xmm13 - movaps -32(%rbp),%xmm14 - movaps -16(%rbp),%xmm15 - leaq (%rbp),%rsp - popq %rbp -.Lctr32_epilogue: + movaps 32(%rsp),%xmm6 + movaps 48(%rsp),%xmm7 + movaps 64(%rsp),%xmm8 + movaps 80(%rsp),%xmm9 + movaps 96(%rsp),%xmm10 + movaps 112(%rsp),%xmm11 + movaps 128(%rsp),%xmm12 + movaps 144(%rsp),%xmm13 + movaps 160(%rsp),%xmm14 + movaps 176(%rsp),%xmm15 + leaq 200(%rsp),%rsp +.Lctr32_ret: movq 8(%rsp),%rdi movq 16(%rsp),%rsi .byte 0xf3,0xc3 @@ -1463,22 +1282,18 @@ aesni_xts_encrypt: movq 40(%rsp),%r8 movq 48(%rsp),%r9 - leaq (%rsp),%rax - pushq %rbp - subq $272,%rsp - andq $-16,%rsp - movaps %xmm6,-168(%rax) - movaps %xmm7,-152(%rax) - movaps %xmm8,-136(%rax) - movaps %xmm9,-120(%rax) - movaps %xmm10,-104(%rax) - movaps %xmm11,-88(%rax) - movaps %xmm12,-72(%rax) - movaps %xmm13,-56(%rax) - movaps %xmm14,-40(%rax) - movaps %xmm15,-24(%rax) + leaq -264(%rsp),%rsp + movaps %xmm6,96(%rsp) + movaps %xmm7,112(%rsp) + movaps %xmm8,128(%rsp) + movaps %xmm9,144(%rsp) + movaps %xmm10,160(%rsp) + movaps %xmm11,176(%rsp) + movaps %xmm12,192(%rsp) + movaps %xmm13,208(%rsp) + movaps %xmm14,224(%rsp) + movaps %xmm15,240(%rsp) .Lxts_enc_body: - leaq -8(%rax),%rbp movups (%r9),%xmm15 movl 240(%r8),%eax movl 240(%rcx),%r10d @@ -1491,268 +1306,230 @@ aesni_xts_encrypt: decl %eax movups (%r8),%xmm1 leaq 16(%r8),%r8 - jnz .Loop_enc1_8 + jnz .Loop_enc1_8 .byte 102,68,15,56,221,249 - movups (%rcx),%xmm0 movq %rcx,%r11 movl %r10d,%eax - shll $4,%r10d movq %rdx,%r9 andq $-16,%rdx - movups 16(%rcx,%r10,1),%xmm1 - movl %eax,%r10d - movdqa .Lxts_magic(%rip),%xmm8 - pshufd $95,%xmm15,%xmm9 - pxor %xmm0,%xmm1 - movdqa %xmm9,%xmm14 - paddd %xmm9,%xmm9 + pxor %xmm14,%xmm14 + pcmpgtd %xmm15,%xmm14 + pshufd $19,%xmm14,%xmm9 + pxor %xmm14,%xmm14 movdqa %xmm15,%xmm10 - psrad $31,%xmm14 paddq %xmm15,%xmm15 - pand %xmm8,%xmm14 - pxor %xmm0,%xmm10 - pxor %xmm14,%xmm15 - movdqa %xmm9,%xmm14 - paddd %xmm9,%xmm9 + pand %xmm8,%xmm9 + pcmpgtd %xmm15,%xmm14 + pxor %xmm9,%xmm15 + pshufd $19,%xmm14,%xmm9 + pxor %xmm14,%xmm14 movdqa %xmm15,%xmm11 - psrad $31,%xmm14 paddq %xmm15,%xmm15 - pand %xmm8,%xmm14 - pxor %xmm0,%xmm11 - pxor %xmm14,%xmm15 - movdqa %xmm9,%xmm14 - paddd %xmm9,%xmm9 + pand %xmm8,%xmm9 + pcmpgtd %xmm15,%xmm14 + pxor %xmm9,%xmm15 + pshufd $19,%xmm14,%xmm9 + pxor %xmm14,%xmm14 movdqa %xmm15,%xmm12 - psrad $31,%xmm14 paddq %xmm15,%xmm15 - pand %xmm8,%xmm14 - pxor %xmm0,%xmm12 - pxor %xmm14,%xmm15 - movdqa %xmm9,%xmm14 - paddd %xmm9,%xmm9 + pand %xmm8,%xmm9 + pcmpgtd %xmm15,%xmm14 + pxor %xmm9,%xmm15 + pshufd $19,%xmm14,%xmm9 + pxor %xmm14,%xmm14 movdqa %xmm15,%xmm13 - psrad $31,%xmm14 - paddq %xmm15,%xmm15 - pand %xmm8,%xmm14 - pxor %xmm0,%xmm13 - pxor %xmm14,%xmm15 - movdqa %xmm15,%xmm14 - psrad $31,%xmm9 paddq %xmm15,%xmm15 pand %xmm8,%xmm9 - pxor %xmm0,%xmm14 + pcmpgtd %xmm15,%xmm14 pxor %xmm9,%xmm15 - movaps %xmm1,96(%rsp) - subq $96,%rdx jc .Lxts_enc_short shrl $1,%eax - subl $3,%eax - movups 16(%r11),%xmm1 + subl $1,%eax movl %eax,%r10d - leaq .Lxts_magic(%rip),%r8 jmp .Lxts_enc_grandloop -.p2align 5 +.p2align 4 .Lxts_enc_grandloop: + pshufd $19,%xmm14,%xmm9 + movdqa %xmm15,%xmm14 + paddq %xmm15,%xmm15 movdqu 0(%rdi),%xmm2 - movdqa %xmm0,%xmm8 + pand %xmm8,%xmm9 movdqu 16(%rdi),%xmm3 - pxor %xmm10,%xmm2 + pxor %xmm9,%xmm15 + movdqu 32(%rdi),%xmm4 - pxor %xmm11,%xmm3 -.byte 102,15,56,220,209 + pxor %xmm10,%xmm2 movdqu 48(%rdi),%xmm5 - pxor %xmm12,%xmm4 -.byte 102,15,56,220,217 + pxor %xmm11,%xmm3 movdqu 64(%rdi),%xmm6 - pxor %xmm13,%xmm5 -.byte 102,15,56,220,225 + pxor %xmm12,%xmm4 movdqu 80(%rdi),%xmm7 - pxor %xmm15,%xmm8 - movdqa 96(%rsp),%xmm9 - pxor %xmm14,%xmm6 -.byte 102,15,56,220,233 - movups 32(%r11),%xmm0 leaq 96(%rdi),%rdi - pxor %xmm8,%xmm7 + pxor %xmm13,%xmm5 + movups (%r11),%xmm0 + pxor %xmm14,%xmm6 + pxor %xmm15,%xmm7 - pxor %xmm9,%xmm10 -.byte 102,15,56,220,241 - pxor %xmm9,%xmm11 - movdqa %xmm10,0(%rsp) -.byte 102,15,56,220,249 - movups 48(%r11),%xmm1 -.byte 102,15,56,220,208 - pxor %xmm9,%xmm12 + + movups 16(%r11),%xmm1 + pxor %xmm0,%xmm2 + pxor %xmm0,%xmm3 + movdqa %xmm10,0(%rsp) +.byte 102,15,56,220,209 + leaq 32(%r11),%rcx + pxor %xmm0,%xmm4 movdqa %xmm11,16(%rsp) -.byte 102,15,56,220,216 - pxor %xmm9,%xmm13 +.byte 102,15,56,220,217 + pxor %xmm0,%xmm5 movdqa %xmm12,32(%rsp) -.byte 102,15,56,220,224 - pxor %xmm9,%xmm14 -.byte 102,15,56,220,232 - pxor %xmm9,%xmm8 +.byte 102,15,56,220,225 + pxor %xmm0,%xmm6 + movdqa %xmm13,48(%rsp) +.byte 102,15,56,220,233 + pxor %xmm0,%xmm7 + movups (%rcx),%xmm0 + decl %eax movdqa %xmm14,64(%rsp) -.byte 102,15,56,220,240 - movdqa %xmm8,80(%rsp) -.byte 102,15,56,220,248 - movups 64(%r11),%xmm0 - leaq 64(%r11),%rcx - pshufd $95,%xmm15,%xmm9 - jmp .Lxts_enc_loop6 -.p2align 5 +.byte 102,15,56,220,241 + movdqa %xmm15,80(%rsp) +.byte 102,15,56,220,249 + pxor %xmm14,%xmm14 + pcmpgtd %xmm15,%xmm14 + jmp .Lxts_enc_loop6_enter + +.p2align 4 .Lxts_enc_loop6: .byte 102,15,56,220,209 .byte 102,15,56,220,217 + decl %eax .byte 102,15,56,220,225 .byte 102,15,56,220,233 .byte 102,15,56,220,241 .byte 102,15,56,220,249 +.Lxts_enc_loop6_enter: movups 16(%rcx),%xmm1 - leaq 32(%rcx),%rcx - .byte 102,15,56,220,208 .byte 102,15,56,220,216 + leaq 32(%rcx),%rcx .byte 102,15,56,220,224 .byte 102,15,56,220,232 .byte 102,15,56,220,240 .byte 102,15,56,220,248 movups (%rcx),%xmm0 - decl %eax jnz .Lxts_enc_loop6 - movdqa (%r8),%xmm8 - movdqa %xmm9,%xmm14 - paddd %xmm9,%xmm9 -.byte 102,15,56,220,209 + pshufd $19,%xmm14,%xmm9 + pxor %xmm14,%xmm14 paddq %xmm15,%xmm15 - psrad $31,%xmm14 +.byte 102,15,56,220,209 + pand %xmm8,%xmm9 .byte 102,15,56,220,217 - pand %xmm8,%xmm14 - movups (%r11),%xmm10 + pcmpgtd %xmm15,%xmm14 .byte 102,15,56,220,225 + pxor %xmm9,%xmm15 .byte 102,15,56,220,233 - pxor %xmm14,%xmm15 .byte 102,15,56,220,241 - movaps %xmm10,%xmm11 .byte 102,15,56,220,249 movups 16(%rcx),%xmm1 - movdqa %xmm9,%xmm14 - paddd %xmm9,%xmm9 + pshufd $19,%xmm14,%xmm9 + pxor %xmm14,%xmm14 + movdqa %xmm15,%xmm10 + paddq %xmm15,%xmm15 .byte 102,15,56,220,208 - pxor %xmm15,%xmm10 - psrad $31,%xmm14 + pand %xmm8,%xmm9 .byte 102,15,56,220,216 - paddq %xmm15,%xmm15 - pand %xmm8,%xmm14 + pcmpgtd %xmm15,%xmm14 .byte 102,15,56,220,224 + pxor %xmm9,%xmm15 .byte 102,15,56,220,232 - pxor %xmm14,%xmm15 .byte 102,15,56,220,240 - movaps %xmm11,%xmm12 .byte 102,15,56,220,248 movups 32(%rcx),%xmm0 - movdqa %xmm9,%xmm14 - paddd %xmm9,%xmm9 + pshufd $19,%xmm14,%xmm9 + pxor %xmm14,%xmm14 + movdqa %xmm15,%xmm11 + paddq %xmm15,%xmm15 .byte 102,15,56,220,209 - pxor %xmm15,%xmm11 - psrad $31,%xmm14 + pand %xmm8,%xmm9 .byte 102,15,56,220,217 - paddq %xmm15,%xmm15 - pand %xmm8,%xmm14 + pcmpgtd %xmm15,%xmm14 .byte 102,15,56,220,225 - movdqa %xmm13,48(%rsp) + pxor %xmm9,%xmm15 .byte 102,15,56,220,233 - pxor %xmm14,%xmm15 .byte 102,15,56,220,241 - movaps %xmm12,%xmm13 .byte 102,15,56,220,249 - movups 48(%rcx),%xmm1 - movdqa %xmm9,%xmm14 - paddd %xmm9,%xmm9 -.byte 102,15,56,220,208 - pxor %xmm15,%xmm12 - psrad $31,%xmm14 -.byte 102,15,56,220,216 - paddq %xmm15,%xmm15 - pand %xmm8,%xmm14 -.byte 102,15,56,220,224 -.byte 102,15,56,220,232 - pxor %xmm14,%xmm15 -.byte 102,15,56,220,240 - movaps %xmm13,%xmm14 -.byte 102,15,56,220,248 - - movdqa %xmm9,%xmm0 - paddd %xmm9,%xmm9 -.byte 102,15,56,220,209 - pxor %xmm15,%xmm13 - psrad $31,%xmm0 -.byte 102,15,56,220,217 + pshufd $19,%xmm14,%xmm9 + pxor %xmm14,%xmm14 + movdqa %xmm15,%xmm12 paddq %xmm15,%xmm15 - pand %xmm8,%xmm0 -.byte 102,15,56,220,225 -.byte 102,15,56,220,233 - pxor %xmm0,%xmm15 - movups (%r11),%xmm0 -.byte 102,15,56,220,241 -.byte 102,15,56,220,249 - movups 16(%r11),%xmm1 +.byte 102,15,56,221,208 + pand %xmm8,%xmm9 +.byte 102,15,56,221,216 + pcmpgtd %xmm15,%xmm14 +.byte 102,15,56,221,224 + pxor %xmm9,%xmm15 +.byte 102,15,56,221,232 +.byte 102,15,56,221,240 +.byte 102,15,56,221,248 - pxor %xmm15,%xmm14 - psrad $31,%xmm9 -.byte 102,15,56,221,84,36,0 + pshufd $19,%xmm14,%xmm9 + pxor %xmm14,%xmm14 + movdqa %xmm15,%xmm13 paddq %xmm15,%xmm15 + xorps 0(%rsp),%xmm2 pand %xmm8,%xmm9 -.byte 102,15,56,221,92,36,16 -.byte 102,15,56,221,100,36,32 + xorps 16(%rsp),%xmm3 + pcmpgtd %xmm15,%xmm14 pxor %xmm9,%xmm15 -.byte 102,15,56,221,108,36,48 -.byte 102,15,56,221,116,36,64 -.byte 102,15,56,221,124,36,80 - movl %r10d,%eax + xorps 32(%rsp),%xmm4 + movups %xmm2,0(%rsi) + xorps 48(%rsp),%xmm5 + movups %xmm3,16(%rsi) + xorps 64(%rsp),%xmm6 + movups %xmm4,32(%rsi) + xorps 80(%rsp),%xmm7 + movups %xmm5,48(%rsi) + movl %r10d,%eax + movups %xmm6,64(%rsi) + movups %xmm7,80(%rsi) leaq 96(%rsi),%rsi - movups %xmm2,-96(%rsi) - movups %xmm3,-80(%rsi) - movups %xmm4,-64(%rsi) - movups %xmm5,-48(%rsi) - movups %xmm6,-32(%rsi) - movups %xmm7,-16(%rsi) subq $96,%rdx jnc .Lxts_enc_grandloop - leal 7(%rax,%rax,1),%eax + leal 3(%rax,%rax,1),%eax movq %r11,%rcx movl %eax,%r10d .Lxts_enc_short: - pxor %xmm0,%xmm10 addq $96,%rdx jz .Lxts_enc_done - pxor %xmm0,%xmm11 cmpq $32,%rdx jb .Lxts_enc_one - pxor %xmm0,%xmm12 je .Lxts_enc_two - pxor %xmm0,%xmm13 cmpq $64,%rdx jb .Lxts_enc_three - pxor %xmm0,%xmm14 je .Lxts_enc_four + pshufd $19,%xmm14,%xmm9 + movdqa %xmm15,%xmm14 + paddq %xmm15,%xmm15 movdqu (%rdi),%xmm2 + pand %xmm8,%xmm9 movdqu 16(%rdi),%xmm3 + pxor %xmm9,%xmm15 + movdqu 32(%rdi),%xmm4 pxor %xmm10,%xmm2 movdqu 48(%rdi),%xmm5 @@ -1793,7 +1570,7 @@ aesni_xts_encrypt: decl %eax movups (%rcx),%xmm1 leaq 16(%rcx),%rcx - jnz .Loop_enc1_9 + jnz .Loop_enc1_9 .byte 102,15,56,221,209 xorps %xmm10,%xmm2 movdqa %xmm11,%xmm10 @@ -1855,15 +1632,15 @@ aesni_xts_encrypt: call _aesni_encrypt4 - pxor %xmm10,%xmm2 - movdqa %xmm14,%xmm10 - pxor %xmm11,%xmm3 - pxor %xmm12,%xmm4 - movdqu %xmm2,(%rsi) - pxor %xmm13,%xmm5 - movdqu %xmm3,16(%rsi) - movdqu %xmm4,32(%rsi) - movdqu %xmm5,48(%rsi) + xorps %xmm10,%xmm2 + movdqa %xmm15,%xmm10 + xorps %xmm11,%xmm3 + xorps %xmm12,%xmm4 + movups %xmm2,(%rsi) + xorps %xmm13,%xmm5 + movups %xmm3,16(%rsi) + movups %xmm4,32(%rsi) + movups %xmm5,48(%rsi) leaq 64(%rsi),%rsi jmp .Lxts_enc_done @@ -1898,24 +1675,23 @@ aesni_xts_encrypt: decl %eax movups (%rcx),%xmm1 leaq 16(%rcx),%rcx - jnz .Loop_enc1_10 + jnz .Loop_enc1_10 .byte 102,15,56,221,209 xorps %xmm10,%xmm2 movups %xmm2,-16(%rsi) .Lxts_enc_ret: - movaps -160(%rbp),%xmm6 - movaps -144(%rbp),%xmm7 - movaps -128(%rbp),%xmm8 - movaps -112(%rbp),%xmm9 - movaps -96(%rbp),%xmm10 - movaps -80(%rbp),%xmm11 - movaps -64(%rbp),%xmm12 - movaps -48(%rbp),%xmm13 - movaps -32(%rbp),%xmm14 - movaps -16(%rbp),%xmm15 - leaq (%rbp),%rsp - popq %rbp + movaps 96(%rsp),%xmm6 + movaps 112(%rsp),%xmm7 + movaps 128(%rsp),%xmm8 + movaps 144(%rsp),%xmm9 + movaps 160(%rsp),%xmm10 + movaps 176(%rsp),%xmm11 + movaps 192(%rsp),%xmm12 + movaps 208(%rsp),%xmm13 + movaps 224(%rsp),%xmm14 + movaps 240(%rsp),%xmm15 + leaq 264(%rsp),%rsp .Lxts_enc_epilogue: movq 8(%rsp),%rdi movq 16(%rsp),%rsi @@ -1936,22 +1712,18 @@ aesni_xts_decrypt: movq 40(%rsp),%r8 movq 48(%rsp),%r9 - leaq (%rsp),%rax - pushq %rbp - subq $272,%rsp - andq $-16,%rsp - movaps %xmm6,-168(%rax) - movaps %xmm7,-152(%rax) - movaps %xmm8,-136(%rax) - movaps %xmm9,-120(%rax) - movaps %xmm10,-104(%rax) - movaps %xmm11,-88(%rax) - movaps %xmm12,-72(%rax) - movaps %xmm13,-56(%rax) - movaps %xmm14,-40(%rax) - movaps %xmm15,-24(%rax) + leaq -264(%rsp),%rsp + movaps %xmm6,96(%rsp) + movaps %xmm7,112(%rsp) + movaps %xmm8,128(%rsp) + movaps %xmm9,144(%rsp) + movaps %xmm10,160(%rsp) + movaps %xmm11,176(%rsp) + movaps %xmm12,192(%rsp) + movaps %xmm13,208(%rsp) + movaps %xmm14,224(%rsp) + movaps %xmm15,240(%rsp) .Lxts_dec_body: - leaq -8(%rax),%rbp movups (%r9),%xmm15 movl 240(%r8),%eax movl 240(%rcx),%r10d @@ -1964,7 +1736,7 @@ aesni_xts_decrypt: decl %eax movups (%r8),%xmm1 leaq 16(%r8),%r8 - jnz .Loop_enc1_11 + jnz .Loop_enc1_11 .byte 102,68,15,56,221,249 xorl %eax,%eax testq $15,%rdx @@ -1972,266 +1744,228 @@ aesni_xts_decrypt: shlq $4,%rax subq %rax,%rdx - movups (%rcx),%xmm0 movq %rcx,%r11 movl %r10d,%eax - shll $4,%r10d movq %rdx,%r9 andq $-16,%rdx - movups 16(%rcx,%r10,1),%xmm1 - movl %eax,%r10d - movdqa .Lxts_magic(%rip),%xmm8 - pshufd $95,%xmm15,%xmm9 - pxor %xmm0,%xmm1 - movdqa %xmm9,%xmm14 - paddd %xmm9,%xmm9 + pxor %xmm14,%xmm14 + pcmpgtd %xmm15,%xmm14 + pshufd $19,%xmm14,%xmm9 + pxor %xmm14,%xmm14 movdqa %xmm15,%xmm10 - psrad $31,%xmm14 paddq %xmm15,%xmm15 - pand %xmm8,%xmm14 - pxor %xmm0,%xmm10 - pxor %xmm14,%xmm15 - movdqa %xmm9,%xmm14 - paddd %xmm9,%xmm9 + pand %xmm8,%xmm9 + pcmpgtd %xmm15,%xmm14 + pxor %xmm9,%xmm15 + pshufd $19,%xmm14,%xmm9 + pxor %xmm14,%xmm14 movdqa %xmm15,%xmm11 - psrad $31,%xmm14 paddq %xmm15,%xmm15 - pand %xmm8,%xmm14 - pxor %xmm0,%xmm11 - pxor %xmm14,%xmm15 - movdqa %xmm9,%xmm14 - paddd %xmm9,%xmm9 + pand %xmm8,%xmm9 + pcmpgtd %xmm15,%xmm14 + pxor %xmm9,%xmm15 + pshufd $19,%xmm14,%xmm9 + pxor %xmm14,%xmm14 movdqa %xmm15,%xmm12 - psrad $31,%xmm14 paddq %xmm15,%xmm15 - pand %xmm8,%xmm14 - pxor %xmm0,%xmm12 - pxor %xmm14,%xmm15 - movdqa %xmm9,%xmm14 - paddd %xmm9,%xmm9 + pand %xmm8,%xmm9 + pcmpgtd %xmm15,%xmm14 + pxor %xmm9,%xmm15 + pshufd $19,%xmm14,%xmm9 + pxor %xmm14,%xmm14 movdqa %xmm15,%xmm13 - psrad $31,%xmm14 - paddq %xmm15,%xmm15 - pand %xmm8,%xmm14 - pxor %xmm0,%xmm13 - pxor %xmm14,%xmm15 - movdqa %xmm15,%xmm14 - psrad $31,%xmm9 paddq %xmm15,%xmm15 pand %xmm8,%xmm9 - pxor %xmm0,%xmm14 + pcmpgtd %xmm15,%xmm14 pxor %xmm9,%xmm15 - movaps %xmm1,96(%rsp) - subq $96,%rdx jc .Lxts_dec_short shrl $1,%eax - subl $3,%eax - movups 16(%r11),%xmm1 + subl $1,%eax movl %eax,%r10d - leaq .Lxts_magic(%rip),%r8 jmp .Lxts_dec_grandloop -.p2align 5 +.p2align 4 .Lxts_dec_grandloop: + pshufd $19,%xmm14,%xmm9 + movdqa %xmm15,%xmm14 + paddq %xmm15,%xmm15 movdqu 0(%rdi),%xmm2 - movdqa %xmm0,%xmm8 + pand %xmm8,%xmm9 movdqu 16(%rdi),%xmm3 - pxor %xmm10,%xmm2 + pxor %xmm9,%xmm15 + movdqu 32(%rdi),%xmm4 - pxor %xmm11,%xmm3 -.byte 102,15,56,222,209 + pxor %xmm10,%xmm2 movdqu 48(%rdi),%xmm5 - pxor %xmm12,%xmm4 -.byte 102,15,56,222,217 + pxor %xmm11,%xmm3 movdqu 64(%rdi),%xmm6 - pxor %xmm13,%xmm5 -.byte 102,15,56,222,225 + pxor %xmm12,%xmm4 movdqu 80(%rdi),%xmm7 - pxor %xmm15,%xmm8 - movdqa 96(%rsp),%xmm9 - pxor %xmm14,%xmm6 -.byte 102,15,56,222,233 - movups 32(%r11),%xmm0 leaq 96(%rdi),%rdi - pxor %xmm8,%xmm7 + pxor %xmm13,%xmm5 + movups (%r11),%xmm0 + pxor %xmm14,%xmm6 + pxor %xmm15,%xmm7 - pxor %xmm9,%xmm10 -.byte 102,15,56,222,241 - pxor %xmm9,%xmm11 - movdqa %xmm10,0(%rsp) -.byte 102,15,56,222,249 - movups 48(%r11),%xmm1 -.byte 102,15,56,222,208 - pxor %xmm9,%xmm12 - movdqa %xmm11,16(%rsp) -.byte 102,15,56,222,216 - pxor %xmm9,%xmm13 - movdqa %xmm12,32(%rsp) -.byte 102,15,56,222,224 - pxor %xmm9,%xmm14 -.byte 102,15,56,222,232 - pxor %xmm9,%xmm8 - movdqa %xmm14,64(%rsp) -.byte 102,15,56,222,240 - movdqa %xmm8,80(%rsp) -.byte 102,15,56,222,248 - movups 64(%r11),%xmm0 - leaq 64(%r11),%rcx - pshufd $95,%xmm15,%xmm9 - jmp .Lxts_dec_loop6 -.p2align 5 -.Lxts_dec_loop6: + + movups 16(%r11),%xmm1 + pxor %xmm0,%xmm2 + pxor %xmm0,%xmm3 + movdqa %xmm10,0(%rsp) .byte 102,15,56,222,209 + leaq 32(%r11),%rcx + pxor %xmm0,%xmm4 + movdqa %xmm11,16(%rsp) .byte 102,15,56,222,217 + pxor %xmm0,%xmm5 + movdqa %xmm12,32(%rsp) .byte 102,15,56,222,225 + pxor %xmm0,%xmm6 + movdqa %xmm13,48(%rsp) .byte 102,15,56,222,233 -.byte 102,15,56,222,241 -.byte 102,15,56,222,249 - movups 16(%rcx),%xmm1 - leaq 32(%rcx),%rcx - -.byte 102,15,56,222,208 -.byte 102,15,56,222,216 -.byte 102,15,56,222,224 -.byte 102,15,56,222,232 -.byte 102,15,56,222,240 -.byte 102,15,56,222,248 + pxor %xmm0,%xmm7 movups (%rcx),%xmm0 decl %eax - jnz .Lxts_dec_loop6 + movdqa %xmm14,64(%rsp) +.byte 102,15,56,222,241 + movdqa %xmm15,80(%rsp) +.byte 102,15,56,222,249 + pxor %xmm14,%xmm14 + pcmpgtd %xmm15,%xmm14 + jmp .Lxts_dec_loop6_enter - movdqa (%r8),%xmm8 - movdqa %xmm9,%xmm14 - paddd %xmm9,%xmm9 +.p2align 4 +.Lxts_dec_loop6: .byte 102,15,56,222,209 - paddq %xmm15,%xmm15 - psrad $31,%xmm14 .byte 102,15,56,222,217 - pand %xmm8,%xmm14 - movups (%r11),%xmm10 + decl %eax .byte 102,15,56,222,225 .byte 102,15,56,222,233 - pxor %xmm14,%xmm15 .byte 102,15,56,222,241 - movaps %xmm10,%xmm11 .byte 102,15,56,222,249 +.Lxts_dec_loop6_enter: movups 16(%rcx),%xmm1 - - movdqa %xmm9,%xmm14 - paddd %xmm9,%xmm9 .byte 102,15,56,222,208 - pxor %xmm15,%xmm10 - psrad $31,%xmm14 .byte 102,15,56,222,216 - paddq %xmm15,%xmm15 - pand %xmm8,%xmm14 + leaq 32(%rcx),%rcx .byte 102,15,56,222,224 .byte 102,15,56,222,232 - pxor %xmm14,%xmm15 .byte 102,15,56,222,240 - movaps %xmm11,%xmm12 .byte 102,15,56,222,248 - movups 32(%rcx),%xmm0 + movups (%rcx),%xmm0 + jnz .Lxts_dec_loop6 - movdqa %xmm9,%xmm14 - paddd %xmm9,%xmm9 + pshufd $19,%xmm14,%xmm9 + pxor %xmm14,%xmm14 + paddq %xmm15,%xmm15 .byte 102,15,56,222,209 - pxor %xmm15,%xmm11 - psrad $31,%xmm14 + pand %xmm8,%xmm9 .byte 102,15,56,222,217 - paddq %xmm15,%xmm15 - pand %xmm8,%xmm14 + pcmpgtd %xmm15,%xmm14 .byte 102,15,56,222,225 - movdqa %xmm13,48(%rsp) + pxor %xmm9,%xmm15 .byte 102,15,56,222,233 - pxor %xmm14,%xmm15 .byte 102,15,56,222,241 - movaps %xmm12,%xmm13 .byte 102,15,56,222,249 - movups 48(%rcx),%xmm1 + movups 16(%rcx),%xmm1 - movdqa %xmm9,%xmm14 - paddd %xmm9,%xmm9 + pshufd $19,%xmm14,%xmm9 + pxor %xmm14,%xmm14 + movdqa %xmm15,%xmm10 + paddq %xmm15,%xmm15 .byte 102,15,56,222,208 - pxor %xmm15,%xmm12 - psrad $31,%xmm14 + pand %xmm8,%xmm9 .byte 102,15,56,222,216 - paddq %xmm15,%xmm15 - pand %xmm8,%xmm14 + pcmpgtd %xmm15,%xmm14 .byte 102,15,56,222,224 + pxor %xmm9,%xmm15 .byte 102,15,56,222,232 - pxor %xmm14,%xmm15 .byte 102,15,56,222,240 - movaps %xmm13,%xmm14 .byte 102,15,56,222,248 + movups 32(%rcx),%xmm0 - movdqa %xmm9,%xmm0 - paddd %xmm9,%xmm9 + pshufd $19,%xmm14,%xmm9 + pxor %xmm14,%xmm14 + movdqa %xmm15,%xmm11 + paddq %xmm15,%xmm15 .byte 102,15,56,222,209 - pxor %xmm15,%xmm13 - psrad $31,%xmm0 + pand %xmm8,%xmm9 .byte 102,15,56,222,217 - paddq %xmm15,%xmm15 - pand %xmm8,%xmm0 + pcmpgtd %xmm15,%xmm14 .byte 102,15,56,222,225 + pxor %xmm9,%xmm15 .byte 102,15,56,222,233 - pxor %xmm0,%xmm15 - movups (%r11),%xmm0 .byte 102,15,56,222,241 .byte 102,15,56,222,249 - movups 16(%r11),%xmm1 - pxor %xmm15,%xmm14 - psrad $31,%xmm9 -.byte 102,15,56,223,84,36,0 + pshufd $19,%xmm14,%xmm9 + pxor %xmm14,%xmm14 + movdqa %xmm15,%xmm12 paddq %xmm15,%xmm15 +.byte 102,15,56,223,208 pand %xmm8,%xmm9 -.byte 102,15,56,223,92,36,16 -.byte 102,15,56,223,100,36,32 +.byte 102,15,56,223,216 + pcmpgtd %xmm15,%xmm14 +.byte 102,15,56,223,224 + pxor %xmm9,%xmm15 +.byte 102,15,56,223,232 +.byte 102,15,56,223,240 +.byte 102,15,56,223,248 + + pshufd $19,%xmm14,%xmm9 + pxor %xmm14,%xmm14 + movdqa %xmm15,%xmm13 + paddq %xmm15,%xmm15 + xorps 0(%rsp),%xmm2 + pand %xmm8,%xmm9 + xorps 16(%rsp),%xmm3 + pcmpgtd %xmm15,%xmm14 pxor %xmm9,%xmm15 -.byte 102,15,56,223,108,36,48 -.byte 102,15,56,223,116,36,64 -.byte 102,15,56,223,124,36,80 - movl %r10d,%eax + xorps 32(%rsp),%xmm4 + movups %xmm2,0(%rsi) + xorps 48(%rsp),%xmm5 + movups %xmm3,16(%rsi) + xorps 64(%rsp),%xmm6 + movups %xmm4,32(%rsi) + xorps 80(%rsp),%xmm7 + movups %xmm5,48(%rsi) + movl %r10d,%eax + movups %xmm6,64(%rsi) + movups %xmm7,80(%rsi) leaq 96(%rsi),%rsi - movups %xmm2,-96(%rsi) - movups %xmm3,-80(%rsi) - movups %xmm4,-64(%rsi) - movups %xmm5,-48(%rsi) - movups %xmm6,-32(%rsi) - movups %xmm7,-16(%rsi) subq $96,%rdx jnc .Lxts_dec_grandloop - leal 7(%rax,%rax,1),%eax + leal 3(%rax,%rax,1),%eax movq %r11,%rcx movl %eax,%r10d .Lxts_dec_short: - pxor %xmm0,%xmm10 - pxor %xmm0,%xmm11 addq $96,%rdx jz .Lxts_dec_done - pxor %xmm0,%xmm12 cmpq $32,%rdx jb .Lxts_dec_one - pxor %xmm0,%xmm13 je .Lxts_dec_two - pxor %xmm0,%xmm14 cmpq $64,%rdx jb .Lxts_dec_three je .Lxts_dec_four + pshufd $19,%xmm14,%xmm9 + movdqa %xmm15,%xmm14 + paddq %xmm15,%xmm15 movdqu (%rdi),%xmm2 + pand %xmm8,%xmm9 movdqu 16(%rdi),%xmm3 + pxor %xmm9,%xmm15 + movdqu 32(%rdi),%xmm4 pxor %xmm10,%xmm2 movdqu 48(%rdi),%xmm5 @@ -2281,7 +2015,7 @@ aesni_xts_decrypt: decl %eax movups (%rcx),%xmm1 leaq 16(%rcx),%rcx - jnz .Loop_dec1_12 + jnz .Loop_dec1_12 .byte 102,15,56,223,209 xorps %xmm10,%xmm2 movdqa %xmm11,%xmm10 @@ -2324,7 +2058,7 @@ aesni_xts_decrypt: xorps %xmm10,%xmm2 movdqa %xmm13,%xmm10 xorps %xmm11,%xmm3 - movdqa %xmm14,%xmm11 + movdqa %xmm15,%xmm11 xorps %xmm12,%xmm4 movups %xmm2,(%rsi) movups %xmm3,16(%rsi) @@ -2334,8 +2068,14 @@ aesni_xts_decrypt: .p2align 4 .Lxts_dec_four: + pshufd $19,%xmm14,%xmm9 + movdqa %xmm15,%xmm14 + paddq %xmm15,%xmm15 movups (%rdi),%xmm2 + pand %xmm8,%xmm9 movups 16(%rdi),%xmm3 + pxor %xmm9,%xmm15 + movups 32(%rdi),%xmm4 xorps %xmm10,%xmm2 movups 48(%rdi),%xmm5 @@ -2346,16 +2086,16 @@ aesni_xts_decrypt: call _aesni_decrypt4 - pxor %xmm10,%xmm2 + xorps %xmm10,%xmm2 movdqa %xmm14,%xmm10 - pxor %xmm11,%xmm3 + xorps %xmm11,%xmm3 movdqa %xmm15,%xmm11 - pxor %xmm12,%xmm4 - movdqu %xmm2,(%rsi) - pxor %xmm13,%xmm5 - movdqu %xmm3,16(%rsi) - movdqu %xmm4,32(%rsi) - movdqu %xmm5,48(%rsi) + xorps %xmm12,%xmm4 + movups %xmm2,(%rsi) + xorps %xmm13,%xmm5 + movups %xmm3,16(%rsi) + movups %xmm4,32(%rsi) + movups %xmm5,48(%rsi) leaq 64(%rsi),%rsi jmp .Lxts_dec_done @@ -2379,7 +2119,7 @@ aesni_xts_decrypt: decl %eax movups (%rcx),%xmm1 leaq 16(%rcx),%rcx - jnz .Loop_dec1_13 + jnz .Loop_dec1_13 .byte 102,15,56,223,209 xorps %xmm11,%xmm2 movups %xmm2,(%rsi) @@ -2409,24 +2149,23 @@ aesni_xts_decrypt: decl %eax movups (%rcx),%xmm1 leaq 16(%rcx),%rcx - jnz .Loop_dec1_14 + jnz .Loop_dec1_14 .byte 102,15,56,223,209 xorps %xmm10,%xmm2 movups %xmm2,(%rsi) .Lxts_dec_ret: - movaps -160(%rbp),%xmm6 - movaps -144(%rbp),%xmm7 - movaps -128(%rbp),%xmm8 - movaps -112(%rbp),%xmm9 - movaps -96(%rbp),%xmm10 - movaps -80(%rbp),%xmm11 - movaps -64(%rbp),%xmm12 - movaps -48(%rbp),%xmm13 - movaps -32(%rbp),%xmm14 - movaps -16(%rbp),%xmm15 - leaq (%rbp),%rsp - popq %rbp + movaps 96(%rsp),%xmm6 + movaps 112(%rsp),%xmm7 + movaps 128(%rsp),%xmm8 + movaps 144(%rsp),%xmm9 + movaps 160(%rsp),%xmm10 + movaps 176(%rsp),%xmm11 + movaps 192(%rsp),%xmm12 + movaps 208(%rsp),%xmm13 + movaps 224(%rsp),%xmm14 + movaps 240(%rsp),%xmm15 + leaq 264(%rsp),%rsp .Lxts_dec_epilogue: movq 8(%rsp),%rdi movq 16(%rsp),%rsi @@ -2467,374 +2206,194 @@ aesni_cbc_encrypt: leaq 16(%rdi),%rdi movups (%rcx),%xmm0 - movups 16(%rcx),%xmm1 - xorps %xmm0,%xmm3 - leaq 32(%rcx),%rcx - xorps %xmm3,%xmm2 -.Loop_enc1_15: -.byte 102,15,56,220,209 - decl %eax - movups (%rcx),%xmm1 - leaq 16(%rcx),%rcx - jnz .Loop_enc1_15 -.byte 102,15,56,221,209 - movl %r10d,%eax - movq %r11,%rcx - movups %xmm2,0(%rsi) - leaq 16(%rsi),%rsi - subq $16,%rdx - jnc .Lcbc_enc_loop - addq $16,%rdx - jnz .Lcbc_enc_tail - movups %xmm2,(%r8) - jmp .Lcbc_ret - -.Lcbc_enc_tail: - movq %rdx,%rcx - xchgq %rdi,%rsi -.long 0x9066A4F3 - movl $16,%ecx - subq %rdx,%rcx - xorl %eax,%eax -.long 0x9066AAF3 - leaq -16(%rdi),%rdi - movl %r10d,%eax - movq %rdi,%rsi - movq %r11,%rcx - xorq %rdx,%rdx - jmp .Lcbc_enc_loop - -.p2align 4 -.Lcbc_decrypt: - leaq (%rsp),%rax - pushq %rbp - subq $176,%rsp - andq $-16,%rsp - movaps %xmm6,16(%rsp) - movaps %xmm7,32(%rsp) - movaps %xmm8,48(%rsp) - movaps %xmm9,64(%rsp) - movaps %xmm10,80(%rsp) - movaps %xmm11,96(%rsp) - movaps %xmm12,112(%rsp) - movaps %xmm13,128(%rsp) - movaps %xmm14,144(%rsp) - movaps %xmm15,160(%rsp) -.Lcbc_decrypt_body: - leaq -8(%rax),%rbp - movups (%r8),%xmm10 - movl %r10d,%eax - cmpq $80,%rdx - jbe .Lcbc_dec_tail - - movups (%rcx),%xmm0 - movdqu 0(%rdi),%xmm2 - movdqu 16(%rdi),%xmm3 - movdqa %xmm2,%xmm11 - movdqu 32(%rdi),%xmm4 - movdqa %xmm3,%xmm12 - movdqu 48(%rdi),%xmm5 - movdqa %xmm4,%xmm13 - movdqu 64(%rdi),%xmm6 - movdqa %xmm5,%xmm14 - movdqu 80(%rdi),%xmm7 - movdqa %xmm6,%xmm15 - cmpq $112,%rdx - jbe .Lcbc_dec_six_or_seven - - subq $112,%rdx - leaq 112(%rcx),%rcx - jmp .Lcbc_dec_loop8_enter -.p2align 4 -.Lcbc_dec_loop8: - movups %xmm9,(%rsi) - leaq 16(%rsi),%rsi -.Lcbc_dec_loop8_enter: - movdqu 96(%rdi),%xmm8 - pxor %xmm0,%xmm2 - movdqu 112(%rdi),%xmm9 - pxor %xmm0,%xmm3 - movups 16-112(%rcx),%xmm1 - pxor %xmm0,%xmm4 - xorq %r11,%r11 - cmpq $112,%rdx - pxor %xmm0,%xmm5 - pxor %xmm0,%xmm6 - pxor %xmm0,%xmm7 - pxor %xmm0,%xmm8 - -.byte 102,15,56,222,209 - pxor %xmm0,%xmm9 - movups 32-112(%rcx),%xmm0 -.byte 102,15,56,222,217 -.byte 102,15,56,222,225 -.byte 102,15,56,222,233 -.byte 102,15,56,222,241 -.byte 102,15,56,222,249 - setnc %r11b -.byte 102,68,15,56,222,193 - shlq $7,%r11 -.byte 102,68,15,56,222,201 - addq %rdi,%r11 - movups 48-112(%rcx),%xmm1 -.byte 102,15,56,222,208 -.byte 102,15,56,222,216 -.byte 102,15,56,222,224 -.byte 102,15,56,222,232 -.byte 102,15,56,222,240 -.byte 102,15,56,222,248 -.byte 102,68,15,56,222,192 -.byte 102,68,15,56,222,200 - movups 64-112(%rcx),%xmm0 -.byte 102,15,56,222,209 -.byte 102,15,56,222,217 -.byte 102,15,56,222,225 -.byte 102,15,56,222,233 -.byte 102,15,56,222,241 -.byte 102,15,56,222,249 -.byte 102,68,15,56,222,193 -.byte 102,68,15,56,222,201 - movups 80-112(%rcx),%xmm1 -.byte 102,15,56,222,208 -.byte 102,15,56,222,216 -.byte 102,15,56,222,224 -.byte 102,15,56,222,232 -.byte 102,15,56,222,240 -.byte 102,15,56,222,248 -.byte 102,68,15,56,222,192 -.byte 102,68,15,56,222,200 - movups 96-112(%rcx),%xmm0 -.byte 102,15,56,222,209 -.byte 102,15,56,222,217 -.byte 102,15,56,222,225 -.byte 102,15,56,222,233 -.byte 102,15,56,222,241 -.byte 102,15,56,222,249 -.byte 102,68,15,56,222,193 -.byte 102,68,15,56,222,201 - movups 112-112(%rcx),%xmm1 -.byte 102,15,56,222,208 -.byte 102,15,56,222,216 -.byte 102,15,56,222,224 -.byte 102,15,56,222,232 -.byte 102,15,56,222,240 -.byte 102,15,56,222,248 -.byte 102,68,15,56,222,192 -.byte 102,68,15,56,222,200 - movups 128-112(%rcx),%xmm0 -.byte 102,15,56,222,209 -.byte 102,15,56,222,217 -.byte 102,15,56,222,225 -.byte 102,15,56,222,233 -.byte 102,15,56,222,241 -.byte 102,15,56,222,249 -.byte 102,68,15,56,222,193 -.byte 102,68,15,56,222,201 - movups 144-112(%rcx),%xmm1 -.byte 102,15,56,222,208 -.byte 102,15,56,222,216 -.byte 102,15,56,222,224 -.byte 102,15,56,222,232 -.byte 102,15,56,222,240 -.byte 102,15,56,222,248 -.byte 102,68,15,56,222,192 -.byte 102,68,15,56,222,200 - movups 160-112(%rcx),%xmm0 - cmpl $11,%eax - jb .Lcbc_dec_done -.byte 102,15,56,222,209 -.byte 102,15,56,222,217 -.byte 102,15,56,222,225 -.byte 102,15,56,222,233 -.byte 102,15,56,222,241 -.byte 102,15,56,222,249 -.byte 102,68,15,56,222,193 -.byte 102,68,15,56,222,201 - movups 176-112(%rcx),%xmm1 -.byte 102,15,56,222,208 -.byte 102,15,56,222,216 -.byte 102,15,56,222,224 -.byte 102,15,56,222,232 -.byte 102,15,56,222,240 -.byte 102,15,56,222,248 -.byte 102,68,15,56,222,192 -.byte 102,68,15,56,222,200 - movups 192-112(%rcx),%xmm0 - je .Lcbc_dec_done -.byte 102,15,56,222,209 -.byte 102,15,56,222,217 -.byte 102,15,56,222,225 -.byte 102,15,56,222,233 -.byte 102,15,56,222,241 -.byte 102,15,56,222,249 -.byte 102,68,15,56,222,193 -.byte 102,68,15,56,222,201 - movups 208-112(%rcx),%xmm1 -.byte 102,15,56,222,208 -.byte 102,15,56,222,216 -.byte 102,15,56,222,224 -.byte 102,15,56,222,232 -.byte 102,15,56,222,240 -.byte 102,15,56,222,248 -.byte 102,68,15,56,222,192 -.byte 102,68,15,56,222,200 - movups 224-112(%rcx),%xmm0 -.Lcbc_dec_done: + movups 16(%rcx),%xmm1 + xorps %xmm0,%xmm3 + leaq 32(%rcx),%rcx + xorps %xmm3,%xmm2 +.Loop_enc1_15: +.byte 102,15,56,220,209 + decl %eax + movups (%rcx),%xmm1 + leaq 16(%rcx),%rcx + jnz .Loop_enc1_15 +.byte 102,15,56,221,209 + movl %r10d,%eax + movq %r11,%rcx + movups %xmm2,0(%rsi) + leaq 16(%rsi),%rsi + subq $16,%rdx + jnc .Lcbc_enc_loop + addq $16,%rdx + jnz .Lcbc_enc_tail + movups %xmm2,(%r8) + jmp .Lcbc_ret + +.Lcbc_enc_tail: + movq %rdx,%rcx + xchgq %rdi,%rsi +.long 0x9066A4F3 + movl $16,%ecx + subq %rdx,%rcx + xorl %eax,%eax +.long 0x9066AAF3 + leaq -16(%rdi),%rdi + movl %r10d,%eax + movq %rdi,%rsi + movq %r11,%rcx + xorq %rdx,%rdx + jmp .Lcbc_enc_loop + +.p2align 4 +.Lcbc_decrypt: + leaq -88(%rsp),%rsp + movaps %xmm6,(%rsp) + movaps %xmm7,16(%rsp) + movaps %xmm8,32(%rsp) + movaps %xmm9,48(%rsp) +.Lcbc_decrypt_body: + movups (%r8),%xmm9 + movl %r10d,%eax + cmpq $112,%rdx + jbe .Lcbc_dec_tail + shrl $1,%r10d + subq $112,%rdx + movl %r10d,%eax + movaps %xmm9,64(%rsp) + jmp .Lcbc_dec_loop8_enter +.p2align 4 +.Lcbc_dec_loop8: + movaps %xmm0,64(%rsp) + movups %xmm9,(%rsi) + leaq 16(%rsi),%rsi +.Lcbc_dec_loop8_enter: + movups (%rcx),%xmm0 + movups (%rdi),%xmm2 + movups 16(%rdi),%xmm3 + movups 16(%rcx),%xmm1 + + leaq 32(%rcx),%rcx + movdqu 32(%rdi),%xmm4 + xorps %xmm0,%xmm2 + movdqu 48(%rdi),%xmm5 + xorps %xmm0,%xmm3 + movdqu 64(%rdi),%xmm6 .byte 102,15,56,222,209 - pxor %xmm0,%xmm10 + pxor %xmm0,%xmm4 + movdqu 80(%rdi),%xmm7 .byte 102,15,56,222,217 - pxor %xmm0,%xmm11 + pxor %xmm0,%xmm5 + movdqu 96(%rdi),%xmm8 .byte 102,15,56,222,225 - pxor %xmm0,%xmm12 + pxor %xmm0,%xmm6 + movdqu 112(%rdi),%xmm9 .byte 102,15,56,222,233 - pxor %xmm0,%xmm13 + pxor %xmm0,%xmm7 + decl %eax .byte 102,15,56,222,241 - pxor %xmm0,%xmm14 + pxor %xmm0,%xmm8 .byte 102,15,56,222,249 - pxor %xmm0,%xmm15 + pxor %xmm0,%xmm9 + movups (%rcx),%xmm0 .byte 102,68,15,56,222,193 .byte 102,68,15,56,222,201 - movdqu 80(%rdi),%xmm1 - -.byte 102,65,15,56,223,210 - movdqu 96(%rdi),%xmm10 - pxor %xmm0,%xmm1 -.byte 102,65,15,56,223,219 - pxor %xmm0,%xmm10 - movdqu 112(%rdi),%xmm0 - leaq 128(%rdi),%rdi -.byte 102,65,15,56,223,228 - movdqu 0(%r11),%xmm11 -.byte 102,65,15,56,223,237 - movdqu 16(%r11),%xmm12 -.byte 102,65,15,56,223,246 - movdqu 32(%r11),%xmm13 -.byte 102,65,15,56,223,255 - movdqu 48(%r11),%xmm14 -.byte 102,68,15,56,223,193 - movdqu 64(%r11),%xmm15 -.byte 102,69,15,56,223,202 - movdqa %xmm0,%xmm10 - movdqu 80(%r11),%xmm1 - movups -112(%rcx),%xmm0 + movups 16(%rcx),%xmm1 + call .Ldec_loop8_enter + + movups (%rdi),%xmm1 + movups 16(%rdi),%xmm0 + xorps 64(%rsp),%xmm2 + xorps %xmm1,%xmm3 + movups 32(%rdi),%xmm1 + xorps %xmm0,%xmm4 + movups 48(%rdi),%xmm0 + xorps %xmm1,%xmm5 + movups 64(%rdi),%xmm1 + xorps %xmm0,%xmm6 + movups 80(%rdi),%xmm0 + xorps %xmm1,%xmm7 + movups 96(%rdi),%xmm1 + xorps %xmm0,%xmm8 + movups 112(%rdi),%xmm0 + xorps %xmm1,%xmm9 movups %xmm2,(%rsi) - movdqa %xmm11,%xmm2 movups %xmm3,16(%rsi) - movdqa %xmm12,%xmm3 movups %xmm4,32(%rsi) - movdqa %xmm13,%xmm4 movups %xmm5,48(%rsi) - movdqa %xmm14,%xmm5 + movl %r10d,%eax movups %xmm6,64(%rsi) - movdqa %xmm15,%xmm6 + movq %r11,%rcx movups %xmm7,80(%rsi) - movdqa %xmm1,%xmm7 + leaq 128(%rdi),%rdi movups %xmm8,96(%rsi) leaq 112(%rsi),%rsi - subq $128,%rdx ja .Lcbc_dec_loop8 movaps %xmm9,%xmm2 - leaq -112(%rcx),%rcx + movaps %xmm0,%xmm9 addq $112,%rdx jle .Lcbc_dec_tail_collected - movups %xmm9,(%rsi) + movups %xmm2,(%rsi) + leal 1(%r10,%r10,1),%eax leaq 16(%rsi),%rsi - cmpq $80,%rdx - jbe .Lcbc_dec_tail - - movaps %xmm11,%xmm2 -.Lcbc_dec_six_or_seven: - cmpq $96,%rdx - ja .Lcbc_dec_seven - - movaps %xmm7,%xmm8 - call _aesni_decrypt6 - pxor %xmm10,%xmm2 - movaps %xmm8,%xmm10 - pxor %xmm11,%xmm3 - movdqu %xmm2,(%rsi) - pxor %xmm12,%xmm4 - movdqu %xmm3,16(%rsi) - pxor %xmm13,%xmm5 - movdqu %xmm4,32(%rsi) - pxor %xmm14,%xmm6 - movdqu %xmm5,48(%rsi) - pxor %xmm15,%xmm7 - movdqu %xmm6,64(%rsi) - leaq 80(%rsi),%rsi - movdqa %xmm7,%xmm2 - jmp .Lcbc_dec_tail_collected - -.p2align 4 -.Lcbc_dec_seven: - movups 96(%rdi),%xmm8 - xorps %xmm9,%xmm9 - call _aesni_decrypt8 - movups 80(%rdi),%xmm9 - pxor %xmm10,%xmm2 - movups 96(%rdi),%xmm10 - pxor %xmm11,%xmm3 - movdqu %xmm2,(%rsi) - pxor %xmm12,%xmm4 - movdqu %xmm3,16(%rsi) - pxor %xmm13,%xmm5 - movdqu %xmm4,32(%rsi) - pxor %xmm14,%xmm6 - movdqu %xmm5,48(%rsi) - pxor %xmm15,%xmm7 - movdqu %xmm6,64(%rsi) - pxor %xmm9,%xmm8 - movdqu %xmm7,80(%rsi) - leaq 96(%rsi),%rsi - movdqa %xmm8,%xmm2 - jmp .Lcbc_dec_tail_collected - .Lcbc_dec_tail: movups (%rdi),%xmm2 - subq $16,%rdx + movaps %xmm2,%xmm8 + cmpq $16,%rdx jbe .Lcbc_dec_one movups 16(%rdi),%xmm3 - movaps %xmm2,%xmm11 - subq $16,%rdx + movaps %xmm3,%xmm7 + cmpq $32,%rdx jbe .Lcbc_dec_two movups 32(%rdi),%xmm4 - movaps %xmm3,%xmm12 - subq $16,%rdx + movaps %xmm4,%xmm6 + cmpq $48,%rdx jbe .Lcbc_dec_three movups 48(%rdi),%xmm5 - movaps %xmm4,%xmm13 - subq $16,%rdx + cmpq $64,%rdx jbe .Lcbc_dec_four movups 64(%rdi),%xmm6 - movaps %xmm5,%xmm14 - movaps %xmm6,%xmm15 - xorps %xmm7,%xmm7 - call _aesni_decrypt6 - pxor %xmm10,%xmm2 - movaps %xmm15,%xmm10 - pxor %xmm11,%xmm3 - movdqu %xmm2,(%rsi) - pxor %xmm12,%xmm4 - movdqu %xmm3,16(%rsi) - pxor %xmm13,%xmm5 - movdqu %xmm4,32(%rsi) - pxor %xmm14,%xmm6 - movdqu %xmm5,48(%rsi) - leaq 64(%rsi),%rsi - movdqa %xmm6,%xmm2 - subq $16,%rdx - jmp .Lcbc_dec_tail_collected + cmpq $80,%rdx + jbe .Lcbc_dec_five + + movups 80(%rdi),%xmm7 + cmpq $96,%rdx + jbe .Lcbc_dec_six + movups 96(%rdi),%xmm8 + movaps %xmm9,64(%rsp) + call _aesni_decrypt8 + movups (%rdi),%xmm1 + movups 16(%rdi),%xmm0 + xorps 64(%rsp),%xmm2 + xorps %xmm1,%xmm3 + movups 32(%rdi),%xmm1 + xorps %xmm0,%xmm4 + movups 48(%rdi),%xmm0 + xorps %xmm1,%xmm5 + movups 64(%rdi),%xmm1 + xorps %xmm0,%xmm6 + movups 80(%rdi),%xmm0 + xorps %xmm1,%xmm7 + movups 96(%rdi),%xmm9 + xorps %xmm0,%xmm8 + movups %xmm2,(%rsi) + movups %xmm3,16(%rsi) + movups %xmm4,32(%rsi) + movups %xmm5,48(%rsi) + movups %xmm6,64(%rsi) + movups %xmm7,80(%rsi) + leaq 96(%rsi),%rsi + movaps %xmm8,%xmm2 + subq $112,%rdx + jmp .Lcbc_dec_tail_collected .p2align 4 .Lcbc_dec_one: - movaps %xmm2,%xmm11 movups (%rcx),%xmm0 movups 16(%rcx),%xmm1 leaq 32(%rcx),%rcx @@ -2844,81 +2403,118 @@ aesni_cbc_encrypt: decl %eax movups (%rcx),%xmm1 leaq 16(%rcx),%rcx - jnz .Loop_dec1_16 + jnz .Loop_dec1_16 .byte 102,15,56,223,209 - xorps %xmm10,%xmm2 - movaps %xmm11,%xmm10 + xorps %xmm9,%xmm2 + movaps %xmm8,%xmm9 + subq $16,%rdx jmp .Lcbc_dec_tail_collected .p2align 4 .Lcbc_dec_two: - movaps %xmm3,%xmm12 xorps %xmm4,%xmm4 call _aesni_decrypt3 - pxor %xmm10,%xmm2 - movaps %xmm12,%xmm10 - pxor %xmm11,%xmm3 - movdqu %xmm2,(%rsi) - movdqa %xmm3,%xmm2 + xorps %xmm9,%xmm2 + xorps %xmm8,%xmm3 + movups %xmm2,(%rsi) + movaps %xmm7,%xmm9 + movaps %xmm3,%xmm2 leaq 16(%rsi),%rsi + subq $32,%rdx jmp .Lcbc_dec_tail_collected .p2align 4 .Lcbc_dec_three: - movaps %xmm4,%xmm13 call _aesni_decrypt3 - pxor %xmm10,%xmm2 - movaps %xmm13,%xmm10 - pxor %xmm11,%xmm3 - movdqu %xmm2,(%rsi) - pxor %xmm12,%xmm4 - movdqu %xmm3,16(%rsi) - movdqa %xmm4,%xmm2 + xorps %xmm9,%xmm2 + xorps %xmm8,%xmm3 + movups %xmm2,(%rsi) + xorps %xmm7,%xmm4 + movups %xmm3,16(%rsi) + movaps %xmm6,%xmm9 + movaps %xmm4,%xmm2 leaq 32(%rsi),%rsi + subq $48,%rdx jmp .Lcbc_dec_tail_collected .p2align 4 .Lcbc_dec_four: - movaps %xmm5,%xmm14 call _aesni_decrypt4 - pxor %xmm10,%xmm2 - movaps %xmm14,%xmm10 - pxor %xmm11,%xmm3 - movdqu %xmm2,(%rsi) - pxor %xmm12,%xmm4 - movdqu %xmm3,16(%rsi) - pxor %xmm13,%xmm5 - movdqu %xmm4,32(%rsi) - movdqa %xmm5,%xmm2 + xorps %xmm9,%xmm2 + movups 48(%rdi),%xmm9 + xorps %xmm8,%xmm3 + movups %xmm2,(%rsi) + xorps %xmm7,%xmm4 + movups %xmm3,16(%rsi) + xorps %xmm6,%xmm5 + movups %xmm4,32(%rsi) + movaps %xmm5,%xmm2 leaq 48(%rsi),%rsi + subq $64,%rdx + jmp .Lcbc_dec_tail_collected +.p2align 4 +.Lcbc_dec_five: + xorps %xmm7,%xmm7 + call _aesni_decrypt6 + movups 16(%rdi),%xmm1 + movups 32(%rdi),%xmm0 + xorps %xmm9,%xmm2 + xorps %xmm8,%xmm3 + xorps %xmm1,%xmm4 + movups 48(%rdi),%xmm1 + xorps %xmm0,%xmm5 + movups 64(%rdi),%xmm9 + xorps %xmm1,%xmm6 + movups %xmm2,(%rsi) + movups %xmm3,16(%rsi) + movups %xmm4,32(%rsi) + movups %xmm5,48(%rsi) + leaq 64(%rsi),%rsi + movaps %xmm6,%xmm2 + subq $80,%rdx + jmp .Lcbc_dec_tail_collected +.p2align 4 +.Lcbc_dec_six: + call _aesni_decrypt6 + movups 16(%rdi),%xmm1 + movups 32(%rdi),%xmm0 + xorps %xmm9,%xmm2 + xorps %xmm8,%xmm3 + xorps %xmm1,%xmm4 + movups 48(%rdi),%xmm1 + xorps %xmm0,%xmm5 + movups 64(%rdi),%xmm0 + xorps %xmm1,%xmm6 + movups 80(%rdi),%xmm9 + xorps %xmm0,%xmm7 + movups %xmm2,(%rsi) + movups %xmm3,16(%rsi) + movups %xmm4,32(%rsi) + movups %xmm5,48(%rsi) + movups %xmm6,64(%rsi) + leaq 80(%rsi),%rsi + movaps %xmm7,%xmm2 + subq $96,%rdx jmp .Lcbc_dec_tail_collected - .p2align 4 .Lcbc_dec_tail_collected: - movups %xmm10,(%r8) andq $15,%rdx + movups %xmm9,(%r8) jnz .Lcbc_dec_tail_partial movups %xmm2,(%rsi) jmp .Lcbc_dec_ret .p2align 4 .Lcbc_dec_tail_partial: - movaps %xmm2,(%rsp) + movaps %xmm2,64(%rsp) movq $16,%rcx movq %rsi,%rdi subq %rdx,%rcx - leaq (%rsp),%rsi -.long 0x9066A4F3 + leaq 64(%rsp),%rsi +.long 0x9066A4F3 .Lcbc_dec_ret: - movaps 16(%rsp),%xmm6 - movaps 32(%rsp),%xmm7 - movaps 48(%rsp),%xmm8 - movaps 64(%rsp),%xmm9 - movaps 80(%rsp),%xmm10 - movaps 96(%rsp),%xmm11 - movaps 112(%rsp),%xmm12 - movaps 128(%rsp),%xmm13 - movaps 144(%rsp),%xmm14 - movaps 160(%rsp),%xmm15 - leaq (%rbp),%rsp - popq %rbp + movaps (%rsp),%xmm6 + movaps 16(%rsp),%xmm7 + movaps 32(%rsp),%xmm8 + movaps 48(%rsp),%xmm9 + leaq 88(%rsp),%rsp .Lcbc_ret: movq 8(%rsp),%rdi movq 16(%rsp),%rsi @@ -2928,7 +2524,7 @@ aesni_cbc_encrypt: .def aesni_set_decrypt_key; .scl 2; .type 32; .endef .p2align 4 aesni_set_decrypt_key: -.byte 0x48,0x83,0xEC,0x08 +.byte 0x48,0x83,0xEC,0x08 call __aesni_set_encrypt_key shll $4,%edx testl %eax,%eax @@ -2967,7 +2563,7 @@ aesni_set_decrypt_key: .p2align 4 aesni_set_encrypt_key: __aesni_set_encrypt_key: -.byte 0x48,0x83,0xEC,0x08 +.byte 0x48,0x83,0xEC,0x08 movq $-1,%rax testq %rcx,%rcx jz .Lenc_key_ret @@ -3163,8 +2759,6 @@ __aesni_set_encrypt_key: .long 1,0,0,0 .Lxts_magic: .long 0x87,0,1,0 -.Lincrement1: -.byte 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1 .byte 65,69,83,32,102,111,114,32,73,110,116,101,108,32,65,69,83,45,78,73,44,32,67,82,89,80,84,79,71,65,77,83,32,98,121,32,60,97,112,112,114,111,64,111,112,101,110,115,115,108,46,111,114,103,62,0 .p2align 6 @@ -3223,15 +2817,51 @@ ccm64_se_handler: leaq 0(%rax),%rsi leaq 512(%r8),%rdi movl $8,%ecx -.long 0xa548f3fc +.long 0xa548f3fc leaq 88(%rax),%rax jmp .Lcommon_seh_tail -.def ctr_xts_se_handler; .scl 3; .type 32; .endef +.def ctr32_se_handler; .scl 3; .type 32; .endef +.p2align 4 +ctr32_se_handler: + pushq %rsi + pushq %rdi + pushq %rbx + pushq %rbp + pushq %r12 + pushq %r13 + pushq %r14 + pushq %r15 + pushfq + subq $64,%rsp + + movq 120(%r8),%rax + movq 248(%r8),%rbx + + leaq .Lctr32_body(%rip),%r10 + cmpq %r10,%rbx + jb .Lcommon_seh_tail + + movq 152(%r8),%rax + + leaq .Lctr32_ret(%rip),%r10 + cmpq %r10,%rbx + jae .Lcommon_seh_tail + + leaq 32(%rax),%rsi + leaq 512(%r8),%rdi + movl $20,%ecx +.long 0xa548f3fc + leaq 200(%rax),%rax + + jmp .Lcommon_seh_tail + + +.def xts_se_handler; .scl 3; .type 32; .endef .p2align 4 -ctr_xts_se_handler: +xts_se_handler: pushq %rsi pushq %rdi pushq %rbx @@ -3261,13 +2891,13 @@ ctr_xts_se_handler: cmpq %r10,%rbx jae .Lcommon_seh_tail - movq 160(%r8),%rax - leaq -160(%rax),%rsi + leaq 96(%rax),%rsi leaq 512(%r8),%rdi movl $20,%ecx -.long 0xa548f3fc +.long 0xa548f3fc + leaq 104+160(%rax),%rax - jmp .Lcommon_rbp_tail + jmp .Lcommon_seh_tail .def cbc_se_handler; .scl 3; .type 32; .endef .p2align 4 @@ -3298,16 +2928,11 @@ cbc_se_handler: cmpq %r10,%rbx jae .Lcommon_seh_tail - leaq 16(%rax),%rsi + leaq 0(%rax),%rsi leaq 512(%r8),%rdi - movl $20,%ecx -.long 0xa548f3fc - -.Lcommon_rbp_tail: - movq 160(%r8),%rax - movq (%rax),%rbp - leaq 8(%rax),%rax - movq %rbp,160(%r8) + movl $8,%ecx +.long 0xa548f3fc + leaq 88(%rax),%rax jmp .Lcommon_seh_tail .Lrestore_cbc_rax: @@ -3323,7 +2948,7 @@ cbc_se_handler: movq 40(%r9),%rdi movq %r8,%rsi movl $154,%ecx -.long 0xa548f3fc +.long 0xa548f3fc movq %r9,%rsi xorq %rcx,%rcx @@ -3397,28 +3022,26 @@ cbc_se_handler: .LSEH_info_ccm64_enc: .byte 9,0,0,0 .rva ccm64_se_handler -.rva .Lccm64_enc_body,.Lccm64_enc_ret +.rva .Lccm64_enc_body,.Lccm64_enc_ret .LSEH_info_ccm64_dec: .byte 9,0,0,0 .rva ccm64_se_handler -.rva .Lccm64_dec_body,.Lccm64_dec_ret +.rva .Lccm64_dec_body,.Lccm64_dec_ret .LSEH_info_ctr32: .byte 9,0,0,0 -.rva ctr_xts_se_handler -.rva .Lctr32_body,.Lctr32_epilogue +.rva ctr32_se_handler .LSEH_info_xts_enc: .byte 9,0,0,0 -.rva ctr_xts_se_handler -.rva .Lxts_enc_body,.Lxts_enc_epilogue +.rva xts_se_handler +.rva .Lxts_enc_body,.Lxts_enc_epilogue .LSEH_info_xts_dec: .byte 9,0,0,0 -.rva ctr_xts_se_handler -.rva .Lxts_dec_body,.Lxts_dec_epilogue +.rva xts_se_handler +.rva .Lxts_dec_body,.Lxts_dec_epilogue .LSEH_info_cbc: .byte 9,0,0,0 .rva cbc_se_handler .LSEH_info_key: .byte 0x01,0x04,0x01,0x00 -.byte 0x04,0x02,0x00,0x00 +.byte 0x04,0x02,0x00,0x00 -.section .note.GNU-stack,"",%progbits diff --git a/lib/accelerated/x86/coff/cpuid-x86.s b/lib/accelerated/x86/coff/cpuid-x86.s index 9931ff05ba..0196ae9855 100644 --- a/lib/accelerated/x86/coff/cpuid-x86.s +++ b/lib/accelerated/x86/coff/cpuid-x86.s @@ -70,4 +70,3 @@ _gnutls_have_cpuid: ret .byte 67,80,85,73,68,32,102,111,114,32,120,56,54,0 -.section .note.GNU-stack,"",%progbits diff --git a/lib/accelerated/x86/coff/cpuid-x86_64.s b/lib/accelerated/x86/coff/cpuid-x86_64.s index 3add1900cf..5c7135cd96 100644 --- a/lib/accelerated/x86/coff/cpuid-x86_64.s +++ b/lib/accelerated/x86/coff/cpuid-x86_64.s @@ -69,4 +69,3 @@ gnutls_cpuid: .byte 0xf3,0xc3 .LSEH_end_gnutls_cpuid: -.section .note.GNU-stack,"",%progbits diff --git a/lib/accelerated/x86/coff/e_padlock-x86.s b/lib/accelerated/x86/coff/e_padlock-x86.s index 328e6462f6..47dd6d36d4 100644 --- a/lib/accelerated/x86/coff/e_padlock-x86.s +++ b/lib/accelerated/x86/coff/e_padlock-x86.s @@ -180,14 +180,16 @@ _padlock_ecb_encrypt: leal 16(%edx),%edx xorl %eax,%eax xorl %ebx,%ebx + cmpl $128,%ecx + jbe .L006ecb_short testl $32,(%edx) - jnz .L006ecb_aligned + jnz .L007ecb_aligned testl $15,%edi setz %al testl $15,%esi setz %bl testl %ebx,%eax - jnz .L006ecb_aligned + jnz .L007ecb_aligned negl %eax movl $512,%ebx notl %eax @@ -199,28 +201,10 @@ _padlock_ecb_encrypt: negl %eax andl $511,%ebx leal (%eax,%ebp,1),%esp - movl $512,%eax - cmovzl %eax,%ebx - movl %ebp,%eax - andl $-16,%ebp andl $-16,%esp - movl %eax,16(%ebp) - cmpl %ebx,%ecx - ja .L007ecb_loop - movl %esi,%eax - cmpl %esp,%ebp - cmovel %edi,%eax - addl %ecx,%eax - negl %eax - andl $4095,%eax - cmpl $128,%eax - movl $-128,%eax - cmovael %ebx,%eax - andl %eax,%ebx - jz .L008ecb_unaligned_tail - jmp .L007ecb_loop + jmp .L008ecb_loop .align 16 -.L007ecb_loop: +.L008ecb_loop: movl %edi,(%ebp) movl %esi,4(%ebp) movl %ecx,8(%ebp) @@ -245,8 +229,8 @@ _padlock_ecb_encrypt: testl $15,%edi jz .L010ecb_out_aligned movl %ebx,%ecx - leal (%esp),%esi shrl $2,%ecx + leal (%esp),%esi .byte 243,165 subl %ebx,%edi .L010ecb_out_aligned: @@ -256,75 +240,43 @@ _padlock_ecb_encrypt: addl %ebx,%esi subl %ebx,%ecx movl $512,%ebx - jz .L011ecb_break - cmpl %ebx,%ecx - jae .L007ecb_loop -.L008ecb_unaligned_tail: - xorl %eax,%eax + jnz .L008ecb_loop cmpl %ebp,%esp - cmovel %ecx,%eax - subl %eax,%esp - movl %edi,%eax - movl %ecx,%ebx - shrl $2,%ecx - leal (%esp),%edi -.byte 243,165 - movl %esp,%esi - movl %eax,%edi - movl %ebx,%ecx - jmp .L007ecb_loop -.align 16 -.L011ecb_break: - cmpl %ebp,%esp - je .L012ecb_done + je .L011ecb_done pxor %xmm0,%xmm0 leal (%esp),%eax -.L013ecb_bzero: +.L012ecb_bzero: movaps %xmm0,(%eax) leal 16(%eax),%eax cmpl %eax,%ebp - ja .L013ecb_bzero -.L012ecb_done: - movl 16(%ebp),%ebp + ja .L012ecb_bzero +.L011ecb_done: leal 24(%ebp),%esp - jmp .L014ecb_exit + jmp .L013ecb_exit .align 16 -.L006ecb_aligned: - leal (%esi,%ecx,1),%ebp - negl %ebp - andl $4095,%ebp +.L006ecb_short: xorl %eax,%eax - cmpl $128,%ebp - movl $127,%ebp - cmovael %eax,%ebp - andl %ecx,%ebp - subl %ebp,%ecx - jz .L015ecb_aligned_tail + leal -24(%esp),%ebp + subl %ecx,%eax + leal (%eax,%ebp,1),%esp + andl $-16,%esp + xorl %ebx,%ebx +.L014ecb_short_copy: + movups (%esi,%ebx,1),%xmm0 + leal 16(%ebx),%ebx + cmpl %ebx,%ecx + movaps %xmm0,-16(%esp,%ebx,1) + ja .L014ecb_short_copy + movl %esp,%esi + movl %ecx,%ebx + jmp .L008ecb_loop +.align 16 +.L007ecb_aligned: leal -16(%edx),%eax leal 16(%edx),%ebx shrl $4,%ecx .byte 243,15,167,200 - testl %ebp,%ebp - jz .L014ecb_exit -.L015ecb_aligned_tail: - movl %ebp,%ecx - leal -24(%esp),%ebp - movl %ebp,%esp - movl %ebp,%eax - subl %ecx,%esp - andl $-16,%ebp - andl $-16,%esp - movl %eax,16(%ebp) - movl %edi,%eax - movl %ecx,%ebx - shrl $2,%ecx - leal (%esp),%edi -.byte 243,165 - movl %esp,%esi - movl %eax,%edi - movl %ebx,%ecx - jmp .L007ecb_loop -.L014ecb_exit: +.L013ecb_exit: movl $1,%eax leal 4(%esp),%esp .L004ecb_abort: @@ -347,17 +299,19 @@ _padlock_cbc_encrypt: movl 28(%esp),%edx movl 32(%esp),%ecx testl $15,%edx - jnz .L016cbc_abort + jnz .L015cbc_abort testl $15,%ecx - jnz .L016cbc_abort + jnz .L015cbc_abort leal .Lpadlock_saved_context,%eax pushfl cld call __padlock_verify_ctx -.L017cbc_pic_point: +.L016cbc_pic_point: leal 16(%edx),%edx xorl %eax,%eax xorl %ebx,%ebx + cmpl $64,%ecx + jbe .L017cbc_short testl $32,(%edx) jnz .L018cbc_aligned testl $15,%edi @@ -377,25 +331,7 @@ _padlock_cbc_encrypt: negl %eax andl $511,%ebx leal (%eax,%ebp,1),%esp - movl $512,%eax - cmovzl %eax,%ebx - movl %ebp,%eax - andl $-16,%ebp andl $-16,%esp - movl %eax,16(%ebp) - cmpl %ebx,%ecx - ja .L019cbc_loop - movl %esi,%eax - cmpl %esp,%ebp - cmovel %edi,%eax - addl %ecx,%eax - negl %eax - andl $4095,%eax - cmpl $64,%eax - movl $-64,%eax - cmovael %ebx,%eax - andl %eax,%ebx - jz .L020cbc_unaligned_tail jmp .L019cbc_loop .align 16 .L019cbc_loop: @@ -407,13 +343,13 @@ _padlock_cbc_encrypt: testl $15,%edi cmovnzl %esp,%edi testl $15,%esi - jz .L021cbc_inp_aligned + jz .L020cbc_inp_aligned shrl $2,%ecx .byte 243,165 subl %ebx,%edi movl %ebx,%ecx movl %edi,%esi -.L021cbc_inp_aligned: +.L020cbc_inp_aligned: leal -16(%edx),%eax leal 16(%edx),%ebx shrl $4,%ecx @@ -423,93 +359,61 @@ _padlock_cbc_encrypt: movl (%ebp),%edi movl 12(%ebp),%ebx testl $15,%edi - jz .L022cbc_out_aligned + jz .L021cbc_out_aligned movl %ebx,%ecx - leal (%esp),%esi shrl $2,%ecx + leal (%esp),%esi .byte 243,165 subl %ebx,%edi -.L022cbc_out_aligned: +.L021cbc_out_aligned: movl 4(%ebp),%esi movl 8(%ebp),%ecx addl %ebx,%edi addl %ebx,%esi subl %ebx,%ecx movl $512,%ebx - jz .L023cbc_break - cmpl %ebx,%ecx - jae .L019cbc_loop -.L020cbc_unaligned_tail: - xorl %eax,%eax - cmpl %ebp,%esp - cmovel %ecx,%eax - subl %eax,%esp - movl %edi,%eax - movl %ecx,%ebx - shrl $2,%ecx - leal (%esp),%edi -.byte 243,165 - movl %esp,%esi - movl %eax,%edi - movl %ebx,%ecx - jmp .L019cbc_loop -.align 16 -.L023cbc_break: + jnz .L019cbc_loop cmpl %ebp,%esp - je .L024cbc_done + je .L022cbc_done pxor %xmm0,%xmm0 leal (%esp),%eax -.L025cbc_bzero: +.L023cbc_bzero: movaps %xmm0,(%eax) leal 16(%eax),%eax cmpl %eax,%ebp - ja .L025cbc_bzero -.L024cbc_done: - movl 16(%ebp),%ebp + ja .L023cbc_bzero +.L022cbc_done: leal 24(%ebp),%esp - jmp .L026cbc_exit + jmp .L024cbc_exit .align 16 -.L018cbc_aligned: - leal (%esi,%ecx,1),%ebp - negl %ebp - andl $4095,%ebp +.L017cbc_short: xorl %eax,%eax - cmpl $64,%ebp - movl $63,%ebp - cmovael %eax,%ebp - andl %ecx,%ebp - subl %ebp,%ecx - jz .L027cbc_aligned_tail + leal -24(%esp),%ebp + subl %ecx,%eax + leal (%eax,%ebp,1),%esp + andl $-16,%esp + xorl %ebx,%ebx +.L025cbc_short_copy: + movups (%esi,%ebx,1),%xmm0 + leal 16(%ebx),%ebx + cmpl %ebx,%ecx + movaps %xmm0,-16(%esp,%ebx,1) + ja .L025cbc_short_copy + movl %esp,%esi + movl %ecx,%ebx + jmp .L019cbc_loop +.align 16 +.L018cbc_aligned: leal -16(%edx),%eax leal 16(%edx),%ebx shrl $4,%ecx .byte 243,15,167,208 movaps (%eax),%xmm0 movaps %xmm0,-16(%edx) - testl %ebp,%ebp - jz .L026cbc_exit -.L027cbc_aligned_tail: - movl %ebp,%ecx - leal -24(%esp),%ebp - movl %ebp,%esp - movl %ebp,%eax - subl %ecx,%esp - andl $-16,%ebp - andl $-16,%esp - movl %eax,16(%ebp) - movl %edi,%eax - movl %ecx,%ebx - shrl $2,%ecx - leal (%esp),%edi -.byte 243,165 - movl %esp,%esi - movl %eax,%edi - movl %ebx,%ecx - jmp .L019cbc_loop -.L026cbc_exit: +.L024cbc_exit: movl $1,%eax leal 4(%esp),%esp -.L016cbc_abort: +.L015cbc_abort: popl %edi popl %esi popl %ebx @@ -529,25 +433,25 @@ _padlock_cfb_encrypt: movl 28(%esp),%edx movl 32(%esp),%ecx testl $15,%edx - jnz .L028cfb_abort + jnz .L026cfb_abort testl $15,%ecx - jnz .L028cfb_abort + jnz .L026cfb_abort leal .Lpadlock_saved_context,%eax pushfl cld call __padlock_verify_ctx -.L029cfb_pic_point: +.L027cfb_pic_point: leal 16(%edx),%edx xorl %eax,%eax xorl %ebx,%ebx testl $32,(%edx) - jnz .L030cfb_aligned + jnz .L028cfb_aligned testl $15,%edi setz %al testl $15,%esi setz %bl testl %ebx,%eax - jnz .L030cfb_aligned + jnz .L028cfb_aligned negl %eax movl $512,%ebx notl %eax @@ -559,15 +463,10 @@ _padlock_cfb_encrypt: negl %eax andl $511,%ebx leal (%eax,%ebp,1),%esp - movl $512,%eax - cmovzl %eax,%ebx - movl %ebp,%eax - andl $-16,%ebp andl $-16,%esp - movl %eax,16(%ebp) - jmp .L031cfb_loop + jmp .L029cfb_loop .align 16 -.L031cfb_loop: +.L029cfb_loop: movl %edi,(%ebp) movl %esi,4(%ebp) movl %ecx,8(%ebp) @@ -576,13 +475,13 @@ _padlock_cfb_encrypt: testl $15,%edi cmovnzl %esp,%edi testl $15,%esi - jz .L032cfb_inp_aligned + jz .L030cfb_inp_aligned shrl $2,%ecx .byte 243,165 subl %ebx,%edi movl %ebx,%ecx movl %edi,%esi -.L032cfb_inp_aligned: +.L030cfb_inp_aligned: leal -16(%edx),%eax leal 16(%edx),%ebx shrl $4,%ecx @@ -592,45 +491,61 @@ _padlock_cfb_encrypt: movl (%ebp),%edi movl 12(%ebp),%ebx testl $15,%edi - jz .L033cfb_out_aligned + jz .L031cfb_out_aligned movl %ebx,%ecx - leal (%esp),%esi shrl $2,%ecx + leal (%esp),%esi .byte 243,165 subl %ebx,%edi -.L033cfb_out_aligned: +.L031cfb_out_aligned: movl 4(%ebp),%esi movl 8(%ebp),%ecx addl %ebx,%edi addl %ebx,%esi subl %ebx,%ecx movl $512,%ebx - jnz .L031cfb_loop + jnz .L029cfb_loop cmpl %ebp,%esp - je .L034cfb_done + je .L032cfb_done pxor %xmm0,%xmm0 leal (%esp),%eax -.L035cfb_bzero: +.L033cfb_bzero: movaps %xmm0,(%eax) leal 16(%eax),%eax cmpl %eax,%ebp - ja .L035cfb_bzero -.L034cfb_done: - movl 16(%ebp),%ebp + ja .L033cfb_bzero +.L032cfb_done: leal 24(%ebp),%esp - jmp .L036cfb_exit + jmp .L034cfb_exit .align 16 -.L030cfb_aligned: +.L035cfb_short: + xorl %eax,%eax + leal -24(%esp),%ebp + subl %ecx,%eax + leal (%eax,%ebp,1),%esp + andl $-16,%esp + xorl %ebx,%ebx +.L036cfb_short_copy: + movups (%esi,%ebx,1),%xmm0 + leal 16(%ebx),%ebx + cmpl %ebx,%ecx + movaps %xmm0,-16(%esp,%ebx,1) + ja .L036cfb_short_copy + movl %esp,%esi + movl %ecx,%ebx + jmp .L029cfb_loop +.align 16 +.L028cfb_aligned: leal -16(%edx),%eax leal 16(%edx),%ebx shrl $4,%ecx .byte 243,15,167,224 movaps (%eax),%xmm0 movaps %xmm0,-16(%edx) -.L036cfb_exit: +.L034cfb_exit: movl $1,%eax leal 4(%esp),%esp -.L028cfb_abort: +.L026cfb_abort: popl %edi popl %esi popl %ebx @@ -680,12 +595,7 @@ _padlock_ofb_encrypt: negl %eax andl $511,%ebx leal (%eax,%ebp,1),%esp - movl $512,%eax - cmovzl %eax,%ebx - movl %ebp,%eax - andl $-16,%ebp andl $-16,%esp - movl %eax,16(%ebp) jmp .L040ofb_loop .align 16 .L040ofb_loop: @@ -715,8 +625,8 @@ _padlock_ofb_encrypt: testl $15,%edi jz .L042ofb_out_aligned movl %ebx,%ecx - leal (%esp),%esi shrl $2,%ecx + leal (%esp),%esi .byte 243,165 subl %ebx,%edi .L042ofb_out_aligned: @@ -737,10 +647,26 @@ _padlock_ofb_encrypt: cmpl %eax,%ebp ja .L044ofb_bzero .L043ofb_done: - movl 16(%ebp),%ebp leal 24(%ebp),%esp jmp .L045ofb_exit .align 16 +.L046ofb_short: + xorl %eax,%eax + leal -24(%esp),%ebp + subl %ecx,%eax + leal (%eax,%ebp,1),%esp + andl $-16,%esp + xorl %ebx,%ebx +.L047ofb_short_copy: + movups (%esi,%ebx,1),%xmm0 + leal 16(%ebx),%ebx + cmpl %ebx,%ecx + movaps %xmm0,-16(%esp,%ebx,1) + ja .L047ofb_short_copy + movl %esp,%esi + movl %ecx,%ebx + jmp .L040ofb_loop +.align 16 .L039ofb_aligned: leal -16(%edx),%eax leal 16(%edx),%ebx @@ -771,14 +697,14 @@ _padlock_ctr32_encrypt: movl 28(%esp),%edx movl 32(%esp),%ecx testl $15,%edx - jnz .L046ctr32_abort + jnz .L048ctr32_abort testl $15,%ecx - jnz .L046ctr32_abort + jnz .L048ctr32_abort leal .Lpadlock_saved_context,%eax pushfl cld call __padlock_verify_ctx -.L047ctr32_pic_point: +.L049ctr32_pic_point: leal 16(%edx),%edx xorl %eax,%eax movq -16(%edx),%mm0 @@ -792,15 +718,10 @@ _padlock_ctr32_encrypt: negl %eax andl $511,%ebx leal (%eax,%ebp,1),%esp - movl $512,%eax - cmovzl %eax,%ebx - movl %ebp,%eax - andl $-16,%ebp andl $-16,%esp - movl %eax,16(%ebp) - jmp .L048ctr32_loop + jmp .L050ctr32_loop .align 16 -.L048ctr32_loop: +.L050ctr32_loop: movl %edi,(%ebp) movl %esi,4(%ebp) movl %ecx,8(%ebp) @@ -809,7 +730,7 @@ _padlock_ctr32_encrypt: movl -4(%edx),%ecx xorl %edi,%edi movl -8(%edx),%eax -.L049ctr32_prepare: +.L051ctr32_prepare: movl %ecx,12(%esp,%edi,1) bswap %ecx movq %mm0,(%esp,%edi,1) @@ -818,7 +739,7 @@ _padlock_ctr32_encrypt: bswap %ecx leal 16(%edi),%edi cmpl %ebx,%edi - jb .L049ctr32_prepare + jb .L051ctr32_prepare movl %ecx,-4(%edx) leal (%esp),%esi leal (%esp),%edi @@ -831,33 +752,32 @@ _padlock_ctr32_encrypt: movl 12(%ebp),%ebx movl 4(%ebp),%esi xorl %ecx,%ecx -.L050ctr32_xor: +.L052ctr32_xor: movups (%esi,%ecx,1),%xmm1 leal 16(%ecx),%ecx pxor -16(%esp,%ecx,1),%xmm1 movups %xmm1,-16(%edi,%ecx,1) cmpl %ebx,%ecx - jb .L050ctr32_xor + jb .L052ctr32_xor movl 8(%ebp),%ecx addl %ebx,%edi addl %ebx,%esi subl %ebx,%ecx movl $512,%ebx - jnz .L048ctr32_loop + jnz .L050ctr32_loop pxor %xmm0,%xmm0 leal (%esp),%eax -.L051ctr32_bzero: +.L053ctr32_bzero: movaps %xmm0,(%eax) leal 16(%eax),%eax cmpl %eax,%ebp - ja .L051ctr32_bzero -.L052ctr32_done: - movl 16(%ebp),%ebp + ja .L053ctr32_bzero +.L054ctr32_done: leal 24(%ebp),%esp movl $1,%eax leal 4(%esp),%esp emms -.L046ctr32_abort: +.L048ctr32_abort: popl %edi popl %esi popl %ebx @@ -881,10 +801,10 @@ __win32_segv_handler: movl 4(%esp),%edx movl 12(%esp),%ecx cmpl $3221225477,(%edx) - jne .L053ret + jne .L055ret addl $4,184(%ecx) movl $0,%eax -.L053ret: +.L055ret: ret .globl _padlock_sha1_oneshot .def _padlock_sha1_oneshot; .scl 2; .type 32; .endef @@ -1060,4 +980,3 @@ _padlock_sha512_blocks: .Lpadlock_saved_context: .long 0 -.section .note.GNU-stack,"",%progbits diff --git a/lib/accelerated/x86/coff/e_padlock-x86_64.s b/lib/accelerated/x86/coff/e_padlock-x86_64.s index 56c70655ec..b9b93cc592 100644 --- a/lib/accelerated/x86/coff/e_padlock-x86_64.s +++ b/lib/accelerated/x86/coff/e_padlock-x86_64.s @@ -135,7 +135,7 @@ padlock_aes_block: movq $1,%rcx leaq 32(%rdx),%rbx leaq 16(%rdx),%rdx -.byte 0xf3,0x0f,0xa7,0xc8 +.byte 0xf3,0x0f,0xa7,0xc8 movq %r8,%rbx movq 8(%rsp),%rdi movq 16(%rsp),%rsi @@ -154,7 +154,7 @@ padlock_xstore: movq %rdx,%rsi movl %esi,%edx -.byte 0x0f,0xa7,0xc0 +.byte 0x0f,0xa7,0xc0 movq 8(%rsp),%rdi movq 16(%rsp),%rsi .byte 0xf3,0xc3 @@ -181,7 +181,7 @@ padlock_sha1_oneshot: movq %rsp,%rdi movl %eax,16(%rsp) xorq %rax,%rax -.byte 0xf3,0x0f,0xa6,0xc8 +.byte 0xf3,0x0f,0xa6,0xc8 movaps (%rsp),%xmm0 movl 16(%rsp),%eax addq $128+8,%rsp @@ -213,7 +213,7 @@ padlock_sha1_blocks: movq %rsp,%rdi movl %eax,16(%rsp) movq $-1,%rax -.byte 0xf3,0x0f,0xa6,0xc8 +.byte 0xf3,0x0f,0xa6,0xc8 movaps (%rsp),%xmm0 movl 16(%rsp),%eax addq $128+8,%rsp @@ -245,7 +245,7 @@ padlock_sha256_oneshot: movq %rsp,%rdi movaps %xmm1,16(%rsp) xorq %rax,%rax -.byte 0xf3,0x0f,0xa6,0xd0 +.byte 0xf3,0x0f,0xa6,0xd0 movaps (%rsp),%xmm0 movaps 16(%rsp),%xmm1 addq $128+8,%rsp @@ -277,7 +277,7 @@ padlock_sha256_blocks: movq %rsp,%rdi movaps %xmm1,16(%rsp) movq $-1,%rax -.byte 0xf3,0x0f,0xa6,0xd0 +.byte 0xf3,0x0f,0xa6,0xd0 movaps (%rsp),%xmm0 movaps 16(%rsp),%xmm1 addq $128+8,%rsp @@ -312,7 +312,7 @@ padlock_sha512_blocks: movaps %xmm1,16(%rsp) movaps %xmm2,32(%rsp) movaps %xmm3,48(%rsp) -.byte 0xf3,0x0f,0xa6,0xe0 +.byte 0xf3,0x0f,0xa6,0xe0 movaps (%rsp),%xmm0 movaps 16(%rsp),%xmm1 movaps 32(%rsp),%xmm2 @@ -354,6 +354,8 @@ padlock_ecb_encrypt: leaq 16(%rdx),%rdx xorl %eax,%eax xorl %ebx,%ebx + cmpq $128,%rcx + jbe .Lecb_short testl $32,(%rdx) jnz .Lecb_aligned testq $15,%rdi @@ -373,21 +375,6 @@ padlock_ecb_encrypt: negq %rax andq $512-1,%rbx leaq (%rax,%rbp,1),%rsp - movq $512,%rax - cmovzq %rax,%rbx - cmpq %rbx,%rcx - ja .Lecb_loop - movq %rsi,%rax - cmpq %rsp,%rbp - cmoveq %rdi,%rax - addq %rcx,%rax - negq %rax - andq $4095,%rax - cmpq $128,%rax - movq $-128,%rax - cmovaeq %rbx,%rax - andq %rax,%rbx - jz .Lecb_unaligned_tail jmp .Lecb_loop .p2align 4 .Lecb_loop: @@ -403,7 +390,7 @@ padlock_ecb_encrypt: testq $15,%rsi jz .Lecb_inp_aligned shrq $3,%rcx -.byte 0xf3,0x48,0xa5 +.byte 0xf3,0x48,0xa5 subq %rbx,%rdi movq %rbx,%rcx movq %rdi,%rsi @@ -411,15 +398,15 @@ padlock_ecb_encrypt: leaq -16(%rdx),%rax leaq 16(%rdx),%rbx shrq $4,%rcx -.byte 0xf3,0x0f,0xa7,200 +.byte 0xf3,0x0f,0xa7,200 movq %r8,%rdi movq %r11,%rbx testq $15,%rdi jz .Lecb_out_aligned movq %rbx,%rcx - leaq (%rsp),%rsi shrq $3,%rcx -.byte 0xf3,0x48,0xa5 + leaq (%rsp),%rsi +.byte 0xf3,0x48,0xa5 subq %rbx,%rdi .Lecb_out_aligned: movq %r9,%rsi @@ -428,26 +415,9 @@ padlock_ecb_encrypt: addq %rbx,%rsi subq %rbx,%rcx movq $512,%rbx - jz .Lecb_break - cmpq %rbx,%rcx - jae .Lecb_loop -.Lecb_unaligned_tail: - xorl %eax,%eax + jnz .Lecb_loop + cmpq %rsp,%rbp - cmoveq %rcx,%rax - movq %rdi,%r8 - movq %rcx,%rbx - subq %rax,%rsp - shrq $3,%rcx - leaq (%rsp),%rdi -.byte 0xf3,0x48,0xa5 - movq %rsp,%rsi - movq %r8,%rdi - movq %rbx,%rcx - jmp .Lecb_loop -.p2align 4 -.Lecb_break: - cmpq %rbp,%rsp je .Lecb_done pxor %xmm0,%xmm0 @@ -461,39 +431,26 @@ padlock_ecb_encrypt: .Lecb_done: leaq (%rbp),%rsp jmp .Lecb_exit - +.p2align 4 +.Lecb_short: + movq %rsp,%rbp + subq %rcx,%rsp + xorq %rbx,%rbx +.Lecb_short_copy: + movups (%rsi,%rbx,1),%xmm0 + leaq 16(%rbx),%rbx + cmpq %rbx,%rcx + movaps %xmm0,-16(%rsp,%rbx,1) + ja .Lecb_short_copy + movq %rsp,%rsi + movq %rcx,%rbx + jmp .Lecb_loop .p2align 4 .Lecb_aligned: - leaq (%rsi,%rcx,1),%rbp - negq %rbp - andq $4095,%rbp - xorl %eax,%eax - cmpq $128,%rbp - movq $128-1,%rbp - cmovaeq %rax,%rbp - andq %rcx,%rbp - subq %rbp,%rcx - jz .Lecb_aligned_tail leaq -16(%rdx),%rax leaq 16(%rdx),%rbx shrq $4,%rcx -.byte 0xf3,0x0f,0xa7,200 - testq %rbp,%rbp - jz .Lecb_exit - -.Lecb_aligned_tail: - movq %rdi,%r8 - movq %rbp,%rbx - movq %rbp,%rcx - leaq (%rsp),%rbp - subq %rcx,%rsp - shrq $3,%rcx - leaq (%rsp),%rdi -.byte 0xf3,0x48,0xa5 - leaq (%r8),%rdi - leaq (%rsp),%rsi - movq %rbx,%rcx - jmp .Lecb_loop +.byte 0xf3,0x0f,0xa7,200 .Lecb_exit: movl $1,%eax leaq 8(%rsp),%rsp @@ -532,6 +489,8 @@ padlock_cbc_encrypt: leaq 16(%rdx),%rdx xorl %eax,%eax xorl %ebx,%ebx + cmpq $64,%rcx + jbe .Lcbc_short testl $32,(%rdx) jnz .Lcbc_aligned testq $15,%rdi @@ -551,21 +510,6 @@ padlock_cbc_encrypt: negq %rax andq $512-1,%rbx leaq (%rax,%rbp,1),%rsp - movq $512,%rax - cmovzq %rax,%rbx - cmpq %rbx,%rcx - ja .Lcbc_loop - movq %rsi,%rax - cmpq %rsp,%rbp - cmoveq %rdi,%rax - addq %rcx,%rax - negq %rax - andq $4095,%rax - cmpq $64,%rax - movq $-64,%rax - cmovaeq %rbx,%rax - andq %rax,%rbx - jz .Lcbc_unaligned_tail jmp .Lcbc_loop .p2align 4 .Lcbc_loop: @@ -581,7 +525,7 @@ padlock_cbc_encrypt: testq $15,%rsi jz .Lcbc_inp_aligned shrq $3,%rcx -.byte 0xf3,0x48,0xa5 +.byte 0xf3,0x48,0xa5 subq %rbx,%rdi movq %rbx,%rcx movq %rdi,%rsi @@ -589,7 +533,7 @@ padlock_cbc_encrypt: leaq -16(%rdx),%rax leaq 16(%rdx),%rbx shrq $4,%rcx -.byte 0xf3,0x0f,0xa7,208 +.byte 0xf3,0x0f,0xa7,208 movdqa (%rax),%xmm0 movdqa %xmm0,-16(%rdx) movq %r8,%rdi @@ -597,9 +541,9 @@ padlock_cbc_encrypt: testq $15,%rdi jz .Lcbc_out_aligned movq %rbx,%rcx - leaq (%rsp),%rsi shrq $3,%rcx -.byte 0xf3,0x48,0xa5 + leaq (%rsp),%rsi +.byte 0xf3,0x48,0xa5 subq %rbx,%rdi .Lcbc_out_aligned: movq %r9,%rsi @@ -608,26 +552,9 @@ padlock_cbc_encrypt: addq %rbx,%rsi subq %rbx,%rcx movq $512,%rbx - jz .Lcbc_break - cmpq %rbx,%rcx - jae .Lcbc_loop -.Lcbc_unaligned_tail: - xorl %eax,%eax + jnz .Lcbc_loop + cmpq %rsp,%rbp - cmoveq %rcx,%rax - movq %rdi,%r8 - movq %rcx,%rbx - subq %rax,%rsp - shrq $3,%rcx - leaq (%rsp),%rdi -.byte 0xf3,0x48,0xa5 - movq %rsp,%rsi - movq %r8,%rdi - movq %rbx,%rcx - jmp .Lcbc_loop -.p2align 4 -.Lcbc_break: - cmpq %rbp,%rsp je .Lcbc_done pxor %xmm0,%xmm0 @@ -641,41 +568,28 @@ padlock_cbc_encrypt: .Lcbc_done: leaq (%rbp),%rsp jmp .Lcbc_exit - +.p2align 4 +.Lcbc_short: + movq %rsp,%rbp + subq %rcx,%rsp + xorq %rbx,%rbx +.Lcbc_short_copy: + movups (%rsi,%rbx,1),%xmm0 + leaq 16(%rbx),%rbx + cmpq %rbx,%rcx + movaps %xmm0,-16(%rsp,%rbx,1) + ja .Lcbc_short_copy + movq %rsp,%rsi + movq %rcx,%rbx + jmp .Lcbc_loop .p2align 4 .Lcbc_aligned: - leaq (%rsi,%rcx,1),%rbp - negq %rbp - andq $4095,%rbp - xorl %eax,%eax - cmpq $64,%rbp - movq $64-1,%rbp - cmovaeq %rax,%rbp - andq %rcx,%rbp - subq %rbp,%rcx - jz .Lcbc_aligned_tail leaq -16(%rdx),%rax leaq 16(%rdx),%rbx shrq $4,%rcx -.byte 0xf3,0x0f,0xa7,208 +.byte 0xf3,0x0f,0xa7,208 movdqa (%rax),%xmm0 movdqa %xmm0,-16(%rdx) - testq %rbp,%rbp - jz .Lcbc_exit - -.Lcbc_aligned_tail: - movq %rdi,%r8 - movq %rbp,%rbx - movq %rbp,%rcx - leaq (%rsp),%rbp - subq %rcx,%rsp - shrq $3,%rcx - leaq (%rsp),%rdi -.byte 0xf3,0x48,0xa5 - leaq (%r8),%rdi - leaq (%rsp),%rsi - movq %rbx,%rcx - jmp .Lcbc_loop .Lcbc_exit: movl $1,%eax leaq 8(%rsp),%rsp @@ -733,8 +647,6 @@ padlock_cfb_encrypt: negq %rax andq $512-1,%rbx leaq (%rax,%rbp,1),%rsp - movq $512,%rax - cmovzq %rax,%rbx jmp .Lcfb_loop .p2align 4 .Lcfb_loop: @@ -750,7 +662,7 @@ padlock_cfb_encrypt: testq $15,%rsi jz .Lcfb_inp_aligned shrq $3,%rcx -.byte 0xf3,0x48,0xa5 +.byte 0xf3,0x48,0xa5 subq %rbx,%rdi movq %rbx,%rcx movq %rdi,%rsi @@ -758,7 +670,7 @@ padlock_cfb_encrypt: leaq -16(%rdx),%rax leaq 16(%rdx),%rbx shrq $4,%rcx -.byte 0xf3,0x0f,0xa7,224 +.byte 0xf3,0x0f,0xa7,224 movdqa (%rax),%xmm0 movdqa %xmm0,-16(%rdx) movq %r8,%rdi @@ -766,9 +678,9 @@ padlock_cfb_encrypt: testq $15,%rdi jz .Lcfb_out_aligned movq %rbx,%rcx - leaq (%rsp),%rsi shrq $3,%rcx -.byte 0xf3,0x48,0xa5 + leaq (%rsp),%rsi +.byte 0xf3,0x48,0xa5 subq %rbx,%rdi .Lcfb_out_aligned: movq %r9,%rsi @@ -778,7 +690,8 @@ padlock_cfb_encrypt: subq %rbx,%rcx movq $512,%rbx jnz .Lcfb_loop - cmpq %rbp,%rsp + + cmpq %rsp,%rbp je .Lcfb_done pxor %xmm0,%xmm0 @@ -792,13 +705,12 @@ padlock_cfb_encrypt: .Lcfb_done: leaq (%rbp),%rsp jmp .Lcfb_exit - .p2align 4 .Lcfb_aligned: leaq -16(%rdx),%rax leaq 16(%rdx),%rbx shrq $4,%rcx -.byte 0xf3,0x0f,0xa7,224 +.byte 0xf3,0x0f,0xa7,224 movdqa (%rax),%xmm0 movdqa %xmm0,-16(%rdx) .Lcfb_exit: @@ -858,8 +770,6 @@ padlock_ofb_encrypt: negq %rax andq $512-1,%rbx leaq (%rax,%rbp,1),%rsp - movq $512,%rax - cmovzq %rax,%rbx jmp .Lofb_loop .p2align 4 .Lofb_loop: @@ -875,7 +785,7 @@ padlock_ofb_encrypt: testq $15,%rsi jz .Lofb_inp_aligned shrq $3,%rcx -.byte 0xf3,0x48,0xa5 +.byte 0xf3,0x48,0xa5 subq %rbx,%rdi movq %rbx,%rcx movq %rdi,%rsi @@ -883,7 +793,7 @@ padlock_ofb_encrypt: leaq -16(%rdx),%rax leaq 16(%rdx),%rbx shrq $4,%rcx -.byte 0xf3,0x0f,0xa7,232 +.byte 0xf3,0x0f,0xa7,232 movdqa (%rax),%xmm0 movdqa %xmm0,-16(%rdx) movq %r8,%rdi @@ -891,9 +801,9 @@ padlock_ofb_encrypt: testq $15,%rdi jz .Lofb_out_aligned movq %rbx,%rcx - leaq (%rsp),%rsi shrq $3,%rcx -.byte 0xf3,0x48,0xa5 + leaq (%rsp),%rsi +.byte 0xf3,0x48,0xa5 subq %rbx,%rdi .Lofb_out_aligned: movq %r9,%rsi @@ -903,7 +813,8 @@ padlock_ofb_encrypt: subq %rbx,%rcx movq $512,%rbx jnz .Lofb_loop - cmpq %rbp,%rsp + + cmpq %rsp,%rbp je .Lofb_done pxor %xmm0,%xmm0 @@ -917,13 +828,12 @@ padlock_ofb_encrypt: .Lofb_done: leaq (%rbp),%rsp jmp .Lofb_exit - .p2align 4 .Lofb_aligned: leaq -16(%rdx),%rax leaq 16(%rdx),%rbx shrq $4,%rcx -.byte 0xf3,0x0f,0xa7,232 +.byte 0xf3,0x0f,0xa7,232 movdqa (%rax),%xmm0 movdqa %xmm0,-16(%rdx) .Lofb_exit: @@ -964,6 +874,8 @@ padlock_ctr32_encrypt: leaq 16(%rdx),%rdx xorl %eax,%eax xorl %ebx,%ebx + cmpq $64,%rcx + jbe .Lctr32_short testl $32,(%rdx) jnz .Lctr32_aligned testq $15,%rdi @@ -983,32 +895,15 @@ padlock_ctr32_encrypt: negq %rax andq $512-1,%rbx leaq (%rax,%rbp,1),%rsp - movq $512,%rax - cmovzq %rax,%rbx .Lctr32_reenter: movl -4(%rdx),%eax bswapl %eax negl %eax andl $31,%eax - movq $512,%rbx + jz .Lctr32_loop shll $4,%eax - cmovzq %rbx,%rax cmpq %rax,%rcx cmovaq %rax,%rbx - cmovbeq %rcx,%rbx - cmpq %rbx,%rcx - ja .Lctr32_loop - movq %rsi,%rax - cmpq %rsp,%rbp - cmoveq %rdi,%rax - addq %rcx,%rax - negq %rax - andq $4095,%rax - cmpq $32,%rax - movq $-32,%rax - cmovaeq %rbx,%rax - andq %rax,%rbx - jz .Lctr32_unaligned_tail jmp .Lctr32_loop .p2align 4 .Lctr32_loop: @@ -1024,7 +919,7 @@ padlock_ctr32_encrypt: testq $15,%rsi jz .Lctr32_inp_aligned shrq $3,%rcx -.byte 0xf3,0x48,0xa5 +.byte 0xf3,0x48,0xa5 subq %rbx,%rdi movq %rbx,%rcx movq %rdi,%rsi @@ -1032,23 +927,23 @@ padlock_ctr32_encrypt: leaq -16(%rdx),%rax leaq 16(%rdx),%rbx shrq $4,%rcx -.byte 0xf3,0x0f,0xa7,216 +.byte 0xf3,0x0f,0xa7,216 movl -4(%rdx),%eax testl $4294901760,%eax - jnz .Lctr32_no_carry + jnz .Lctr32_no_corr bswapl %eax addl $65536,%eax bswapl %eax movl %eax,-4(%rdx) -.Lctr32_no_carry: +.Lctr32_no_corr: movq %r8,%rdi movq %r11,%rbx testq $15,%rdi jz .Lctr32_out_aligned movq %rbx,%rcx - leaq (%rsp),%rsi shrq $3,%rcx -.byte 0xf3,0x48,0xa5 + leaq (%rsp),%rsi +.byte 0xf3,0x48,0xa5 subq %rbx,%rdi .Lctr32_out_aligned: movq %r9,%rsi @@ -1057,38 +952,9 @@ padlock_ctr32_encrypt: addq %rbx,%rsi subq %rbx,%rcx movq $512,%rbx - jz .Lctr32_break - cmpq %rbx,%rcx - jae .Lctr32_loop - movq %rcx,%rbx - movq %rsi,%rax - cmpq %rsp,%rbp - cmoveq %rdi,%rax - addq %rcx,%rax - negq %rax - andq $4095,%rax - cmpq $32,%rax - movq $-32,%rax - cmovaeq %rbx,%rax - andq %rax,%rbx jnz .Lctr32_loop -.Lctr32_unaligned_tail: - xorl %eax,%eax + cmpq %rsp,%rbp - cmoveq %rcx,%rax - movq %rdi,%r8 - movq %rcx,%rbx - subq %rax,%rsp - shrq $3,%rcx - leaq (%rsp),%rdi -.byte 0xf3,0x48,0xa5 - movq %rsp,%rsi - movq %r8,%rdi - movq %rbx,%rcx - jmp .Lctr32_loop -.p2align 4 -.Lctr32_break: - cmpq %rbp,%rsp je .Lctr32_done pxor %xmm0,%xmm0 @@ -1102,75 +968,56 @@ padlock_ctr32_encrypt: .Lctr32_done: leaq (%rbp),%rsp jmp .Lctr32_exit - +.p2align 4 +.Lctr32_short: + movq %rsp,%rbp + subq %rcx,%rsp + xorq %rbx,%rbx +.Lctr32_short_copy: + movups (%rsi,%rbx,1),%xmm0 + leaq 16(%rbx),%rbx + cmpq %rbx,%rcx + movaps %xmm0,-16(%rsp,%rbx,1) + ja .Lctr32_short_copy + movq %rsp,%rsi + movq %rcx,%rbx + jmp .Lctr32_reenter .p2align 4 .Lctr32_aligned: movl -4(%rdx),%eax + movq $1048576,%rbx bswapl %eax + cmpq %rcx,%rbx + cmovaq %rcx,%rbx negl %eax andl $65535,%eax - movq $1048576,%rbx + jz .Lctr32_aligned_loop shll $4,%eax - cmovzq %rbx,%rax cmpq %rax,%rcx cmovaq %rax,%rbx - cmovbeq %rcx,%rbx - jbe .Lctr32_aligned_skip - + jmp .Lctr32_aligned_loop +.p2align 4 .Lctr32_aligned_loop: + cmpq %rcx,%rbx + cmovaq %rcx,%rbx movq %rcx,%r10 movq %rbx,%rcx movq %rbx,%r11 - leaq -16(%rdx),%rax leaq 16(%rdx),%rbx shrq $4,%rcx -.byte 0xf3,0x0f,0xa7,216 - +.byte 0xf3,0x0f,0xa7,216 movl -4(%rdx),%eax bswapl %eax addl $65536,%eax bswapl %eax movl %eax,-4(%rdx) + movq %r11,%rbx movq %r10,%rcx - subq %r11,%rcx + subq %rbx,%rcx movq $1048576,%rbx - jz .Lctr32_exit - cmpq %rbx,%rcx - jae .Lctr32_aligned_loop - -.Lctr32_aligned_skip: - leaq (%rsi,%rcx,1),%rbp - negq %rbp - andq $4095,%rbp - xorl %eax,%eax - cmpq $32,%rbp - movq $32-1,%rbp - cmovaeq %rax,%rbp - andq %rcx,%rbp - subq %rbp,%rcx - jz .Lctr32_aligned_tail - leaq -16(%rdx),%rax - leaq 16(%rdx),%rbx - shrq $4,%rcx -.byte 0xf3,0x0f,0xa7,216 - testq %rbp,%rbp - jz .Lctr32_exit - -.Lctr32_aligned_tail: - movq %rdi,%r8 - movq %rbp,%rbx - movq %rbp,%rcx - leaq (%rsp),%rbp - subq %rcx,%rsp - shrq $3,%rcx - leaq (%rsp),%rdi -.byte 0xf3,0x48,0xa5 - leaq (%r8),%rdi - leaq (%rsp),%rsi - movq %rbx,%rcx - jmp .Lctr32_loop + jnz .Lctr32_aligned_loop .Lctr32_exit: movl $1,%eax leaq 8(%rsp),%rsp @@ -1188,4 +1035,3 @@ padlock_ctr32_encrypt: .Lpadlock_saved_context: .quad 0 -.section .note.GNU-stack,"",%progbits diff --git a/lib/accelerated/x86/coff/ghash-x86_64.s b/lib/accelerated/x86/coff/ghash-x86_64.s index 7ecf821a8d..7a89b7a599 100644 --- a/lib/accelerated/x86/coff/ghash-x86_64.s +++ b/lib/accelerated/x86/coff/ghash-x86_64.s @@ -717,11 +717,6 @@ gcm_ghash_4bit: .def gcm_init_clmul; .scl 2; .type 32; .endef .p2align 4 gcm_init_clmul: -.L_init_clmul: -.LSEH_begin_gcm_init_clmul: - -.byte 0x48,0x83,0xec,0x18 -.byte 0x0f,0x29,0x34,0x24 movdqu (%rdx),%xmm2 pshufd $78,%xmm2,%xmm2 @@ -740,15 +735,15 @@ gcm_init_clmul: pxor %xmm5,%xmm2 - pshufd $78,%xmm2,%xmm6 movdqa %xmm2,%xmm0 - pxor %xmm2,%xmm6 movdqa %xmm0,%xmm1 pshufd $78,%xmm0,%xmm3 + pshufd $78,%xmm2,%xmm4 pxor %xmm0,%xmm3 + pxor %xmm2,%xmm4 .byte 102,15,58,68,194,0 .byte 102,15,58,68,202,17 -.byte 102,15,58,68,222,0 +.byte 102,15,58,68,220,0 pxor %xmm0,%xmm3 pxor %xmm1,%xmm3 @@ -758,137 +753,44 @@ gcm_init_clmul: pxor %xmm3,%xmm1 pxor %xmm4,%xmm0 - movdqa %xmm0,%xmm4 movdqa %xmm0,%xmm3 - psllq $5,%xmm0 - pxor %xmm0,%xmm3 psllq $1,%xmm0 pxor %xmm3,%xmm0 - psllq $57,%xmm0 - movdqa %xmm0,%xmm3 - pslldq $8,%xmm0 - psrldq $8,%xmm3 - pxor %xmm4,%xmm0 - pxor %xmm3,%xmm1 - - - movdqa %xmm0,%xmm4 - psrlq $1,%xmm0 - pxor %xmm4,%xmm1 - pxor %xmm0,%xmm4 - psrlq $5,%xmm0 - pxor %xmm4,%xmm0 - psrlq $1,%xmm0 - pxor %xmm1,%xmm0 - pshufd $78,%xmm2,%xmm3 - pshufd $78,%xmm0,%xmm4 - pxor %xmm2,%xmm3 - movdqu %xmm2,0(%rcx) - pxor %xmm0,%xmm4 - movdqu %xmm0,16(%rcx) -.byte 102,15,58,15,227,8 - movdqu %xmm4,32(%rcx) - movdqa %xmm0,%xmm1 - pshufd $78,%xmm0,%xmm3 - pxor %xmm0,%xmm3 -.byte 102,15,58,68,194,0 -.byte 102,15,58,68,202,17 -.byte 102,15,58,68,222,0 - pxor %xmm0,%xmm3 - pxor %xmm1,%xmm3 - - movdqa %xmm3,%xmm4 - psrldq $8,%xmm3 - pslldq $8,%xmm4 - pxor %xmm3,%xmm1 - pxor %xmm4,%xmm0 - - movdqa %xmm0,%xmm4 - movdqa %xmm0,%xmm3 psllq $5,%xmm0 - pxor %xmm0,%xmm3 - psllq $1,%xmm0 pxor %xmm3,%xmm0 psllq $57,%xmm0 - movdqa %xmm0,%xmm3 + movdqa %xmm0,%xmm4 pslldq $8,%xmm0 - psrldq $8,%xmm3 - pxor %xmm4,%xmm0 - pxor %xmm3,%xmm1 + psrldq $8,%xmm4 + pxor %xmm3,%xmm0 + pxor %xmm4,%xmm1 movdqa %xmm0,%xmm4 - psrlq $1,%xmm0 - pxor %xmm4,%xmm1 - pxor %xmm0,%xmm4 psrlq $5,%xmm0 pxor %xmm4,%xmm0 psrlq $1,%xmm0 - pxor %xmm1,%xmm0 - movdqa %xmm0,%xmm5 - movdqa %xmm0,%xmm1 - pshufd $78,%xmm0,%xmm3 - pxor %xmm0,%xmm3 -.byte 102,15,58,68,194,0 -.byte 102,15,58,68,202,17 -.byte 102,15,58,68,222,0 - pxor %xmm0,%xmm3 - pxor %xmm1,%xmm3 - - movdqa %xmm3,%xmm4 - psrldq $8,%xmm3 - pslldq $8,%xmm4 - pxor %xmm3,%xmm1 - pxor %xmm4,%xmm0 - - movdqa %xmm0,%xmm4 - movdqa %xmm0,%xmm3 - psllq $5,%xmm0 - pxor %xmm0,%xmm3 - psllq $1,%xmm0 - pxor %xmm3,%xmm0 - psllq $57,%xmm0 - movdqa %xmm0,%xmm3 - pslldq $8,%xmm0 - psrldq $8,%xmm3 pxor %xmm4,%xmm0 - pxor %xmm3,%xmm1 - - - movdqa %xmm0,%xmm4 + pxor %xmm1,%xmm4 psrlq $1,%xmm0 - pxor %xmm4,%xmm1 - pxor %xmm0,%xmm4 - psrlq $5,%xmm0 pxor %xmm4,%xmm0 - psrlq $1,%xmm0 - pxor %xmm1,%xmm0 - pshufd $78,%xmm5,%xmm3 - pshufd $78,%xmm0,%xmm4 - pxor %xmm5,%xmm3 - movdqu %xmm5,48(%rcx) - pxor %xmm0,%xmm4 - movdqu %xmm0,64(%rcx) -.byte 102,15,58,15,227,8 - movdqu %xmm4,80(%rcx) - movaps (%rsp),%xmm6 - leaq 24(%rsp),%rsp -.LSEH_end_gcm_init_clmul: + movdqu %xmm2,(%rcx) + movdqu %xmm0,16(%rcx) .byte 0xf3,0xc3 .globl gcm_gmult_clmul .def gcm_gmult_clmul; .scl 2; .type 32; .endef .p2align 4 gcm_gmult_clmul: -.L_gmult_clmul: movdqu (%rcx),%xmm0 movdqa .Lbswap_mask(%rip),%xmm5 movdqu (%rdx),%xmm2 - movdqu 32(%rdx),%xmm4 .byte 102,15,56,0,197 movdqa %xmm0,%xmm1 pshufd $78,%xmm0,%xmm3 + pshufd $78,%xmm2,%xmm4 pxor %xmm0,%xmm3 + pxor %xmm2,%xmm4 .byte 102,15,58,68,194,0 .byte 102,15,58,68,202,17 .byte 102,15,58,68,220,0 @@ -901,372 +803,194 @@ gcm_gmult_clmul: pxor %xmm3,%xmm1 pxor %xmm4,%xmm0 - movdqa %xmm0,%xmm4 movdqa %xmm0,%xmm3 - psllq $5,%xmm0 - pxor %xmm0,%xmm3 psllq $1,%xmm0 pxor %xmm3,%xmm0 + psllq $5,%xmm0 + pxor %xmm3,%xmm0 psllq $57,%xmm0 - movdqa %xmm0,%xmm3 + movdqa %xmm0,%xmm4 pslldq $8,%xmm0 - psrldq $8,%xmm3 - pxor %xmm4,%xmm0 - pxor %xmm3,%xmm1 + psrldq $8,%xmm4 + pxor %xmm3,%xmm0 + pxor %xmm4,%xmm1 movdqa %xmm0,%xmm4 - psrlq $1,%xmm0 - pxor %xmm4,%xmm1 - pxor %xmm0,%xmm4 psrlq $5,%xmm0 pxor %xmm4,%xmm0 psrlq $1,%xmm0 - pxor %xmm1,%xmm0 + pxor %xmm4,%xmm0 + pxor %xmm1,%xmm4 + psrlq $1,%xmm0 + pxor %xmm4,%xmm0 .byte 102,15,56,0,197 movdqu %xmm0,(%rcx) .byte 0xf3,0xc3 .globl gcm_ghash_clmul .def gcm_ghash_clmul; .scl 2; .type 32; .endef -.p2align 5 +.p2align 4 gcm_ghash_clmul: -.L_ghash_clmul: - leaq -136(%rsp),%rax .LSEH_begin_gcm_ghash_clmul: -.byte 0x48,0x8d,0x60,0xe0 -.byte 0x0f,0x29,0x70,0xe0 -.byte 0x0f,0x29,0x78,0xf0 -.byte 0x44,0x0f,0x29,0x00 -.byte 0x44,0x0f,0x29,0x48,0x10 -.byte 0x44,0x0f,0x29,0x50,0x20 -.byte 0x44,0x0f,0x29,0x58,0x30 -.byte 0x44,0x0f,0x29,0x60,0x40 -.byte 0x44,0x0f,0x29,0x68,0x50 -.byte 0x44,0x0f,0x29,0x70,0x60 -.byte 0x44,0x0f,0x29,0x78,0x70 +.byte 0x48,0x83,0xec,0x58 +.byte 0x0f,0x29,0x34,0x24 +.byte 0x0f,0x29,0x7c,0x24,0x10 +.byte 0x44,0x0f,0x29,0x44,0x24,0x20 +.byte 0x44,0x0f,0x29,0x4c,0x24,0x30 +.byte 0x44,0x0f,0x29,0x54,0x24,0x40 movdqa .Lbswap_mask(%rip),%xmm5 - movq $11547335547999543296,%rax movdqu (%rcx),%xmm0 movdqu (%rdx),%xmm2 - movdqu 32(%rdx),%xmm10 .byte 102,15,56,0,197 subq $16,%r9 jz .Lodd_tail - movdqu 16(%rdx),%xmm9 - cmpq $48,%r9 - jb .Lskip4x + movdqu 16(%rdx),%xmm8 - subq $48,%r9 - movdqu 48(%rdx),%xmm14 - movdqu 64(%rdx),%xmm15 - movdqu 48(%r8),%xmm6 - movdqu 32(%r8),%xmm11 -.byte 102,15,56,0,245 -.byte 102,68,15,56,0,221 - movdqa %xmm6,%xmm8 - pshufd $78,%xmm6,%xmm7 - pxor %xmm6,%xmm7 -.byte 102,15,58,68,242,0 -.byte 102,68,15,58,68,194,17 -.byte 102,65,15,58,68,250,0 - - movdqa %xmm11,%xmm13 - pshufd $78,%xmm11,%xmm12 - pxor %xmm11,%xmm12 -.byte 102,69,15,58,68,217,0 -.byte 102,69,15,58,68,233,17 - xorps %xmm11,%xmm6 -.byte 102,69,15,58,68,226,16 - xorps %xmm13,%xmm8 - movups 80(%rdx),%xmm10 - xorps %xmm12,%xmm7 - - movdqu 16(%r8),%xmm11 - movdqu 0(%r8),%xmm3 -.byte 102,68,15,56,0,221 + movdqu (%r8),%xmm3 + movdqu 16(%r8),%xmm6 .byte 102,15,56,0,221 - movdqa %xmm11,%xmm13 - pshufd $78,%xmm11,%xmm12 - pxor %xmm3,%xmm0 - pxor %xmm11,%xmm12 -.byte 102,69,15,58,68,222,0 - movdqa %xmm0,%xmm1 - pshufd $78,%xmm0,%xmm3 - pxor %xmm0,%xmm3 -.byte 102,69,15,58,68,238,17 - xorps %xmm11,%xmm6 -.byte 102,69,15,58,68,226,0 - xorps %xmm13,%xmm8 - - leaq 64(%r8),%r8 - subq $64,%r9 - jc .Ltail4x - - jmp .Lmod4_loop -.p2align 5 -.Lmod4_loop: -.byte 102,65,15,58,68,199,0 - xorps %xmm12,%xmm7 - movdqu 48(%r8),%xmm11 -.byte 102,68,15,56,0,221 -.byte 102,65,15,58,68,207,17 - xorps %xmm6,%xmm0 - movdqu 32(%r8),%xmm6 - movdqa %xmm11,%xmm13 - pshufd $78,%xmm11,%xmm12 -.byte 102,65,15,58,68,218,16 - xorps %xmm8,%xmm1 - pxor %xmm11,%xmm12 .byte 102,15,56,0,245 - movups 32(%rdx),%xmm10 -.byte 102,68,15,58,68,218,0 - xorps %xmm7,%xmm3 - movdqa %xmm6,%xmm8 - pshufd $78,%xmm6,%xmm7 + pxor %xmm3,%xmm0 + movdqa %xmm6,%xmm7 + pshufd $78,%xmm6,%xmm3 + pshufd $78,%xmm2,%xmm4 + pxor %xmm6,%xmm3 + pxor %xmm2,%xmm4 +.byte 102,15,58,68,242,0 +.byte 102,15,58,68,250,17 +.byte 102,15,58,68,220,0 + pxor %xmm6,%xmm3 + pxor %xmm7,%xmm3 - pxor %xmm0,%xmm3 - pxor %xmm6,%xmm7 - pxor %xmm1,%xmm3 movdqa %xmm3,%xmm4 - pslldq $8,%xmm3 -.byte 102,68,15,58,68,234,17 - psrldq $8,%xmm4 - pxor %xmm3,%xmm0 - movdqa .L7_mask(%rip),%xmm3 - pxor %xmm4,%xmm1 -.byte 102,72,15,110,224 - - pand %xmm0,%xmm3 -.byte 102,15,56,0,227 -.byte 102,69,15,58,68,226,0 - pxor %xmm0,%xmm4 - psllq $57,%xmm4 - movdqa %xmm4,%xmm3 - pslldq $8,%xmm4 -.byte 102,65,15,58,68,241,0 psrldq $8,%xmm3 - pxor %xmm4,%xmm0 - pxor %xmm3,%xmm1 - movdqu 0(%r8),%xmm3 - - movdqa %xmm0,%xmm4 - psrlq $1,%xmm0 -.byte 102,69,15,58,68,193,17 - xorps %xmm11,%xmm6 - movdqu 16(%r8),%xmm11 -.byte 102,68,15,56,0,221 -.byte 102,65,15,58,68,250,16 - xorps %xmm13,%xmm8 - movups 80(%rdx),%xmm10 -.byte 102,15,56,0,221 - pxor %xmm4,%xmm1 - pxor %xmm0,%xmm4 - psrlq $5,%xmm0 - - movdqa %xmm11,%xmm13 - pxor %xmm12,%xmm7 - pshufd $78,%xmm11,%xmm12 - pxor %xmm11,%xmm12 -.byte 102,69,15,58,68,222,0 - pxor %xmm4,%xmm0 - pxor %xmm3,%xmm1 - psrlq $1,%xmm0 -.byte 102,69,15,58,68,238,17 - xorps %xmm11,%xmm6 - pxor %xmm1,%xmm0 - -.byte 102,69,15,58,68,226,0 - xorps %xmm13,%xmm8 - + pslldq $8,%xmm4 + pxor %xmm3,%xmm7 + pxor %xmm4,%xmm6 movdqa %xmm0,%xmm1 pshufd $78,%xmm0,%xmm3 + pshufd $78,%xmm8,%xmm4 pxor %xmm0,%xmm3 + pxor %xmm8,%xmm4 - leaq 64(%r8),%r8 - subq $64,%r9 - jnc .Lmod4_loop - -.Ltail4x: -.byte 102,65,15,58,68,199,0 - xorps %xmm12,%xmm7 -.byte 102,65,15,58,68,207,17 - xorps %xmm6,%xmm0 -.byte 102,65,15,58,68,218,16 - xorps %xmm8,%xmm1 - pxor %xmm0,%xmm1 - pxor %xmm7,%xmm3 + leaq 32(%r8),%r8 + subq $32,%r9 + jbe .Leven_tail +.Lmod_loop: +.byte 102,65,15,58,68,192,0 +.byte 102,65,15,58,68,200,17 +.byte 102,15,58,68,220,0 + pxor %xmm0,%xmm3 pxor %xmm1,%xmm3 - pxor %xmm0,%xmm1 movdqa %xmm3,%xmm4 psrldq $8,%xmm3 pslldq $8,%xmm4 pxor %xmm3,%xmm1 pxor %xmm4,%xmm0 - - movdqa %xmm0,%xmm4 - movdqa %xmm0,%xmm3 - psllq $5,%xmm0 - pxor %xmm0,%xmm3 - psllq $1,%xmm0 - pxor %xmm3,%xmm0 - psllq $57,%xmm0 - movdqa %xmm0,%xmm3 - pslldq $8,%xmm0 - psrldq $8,%xmm3 - pxor %xmm4,%xmm0 - pxor %xmm3,%xmm1 - - - movdqa %xmm0,%xmm4 - psrlq $1,%xmm0 - pxor %xmm4,%xmm1 - pxor %xmm0,%xmm4 - psrlq $5,%xmm0 - pxor %xmm4,%xmm0 - psrlq $1,%xmm0 - pxor %xmm1,%xmm0 - addq $64,%r9 - jz .Ldone - movdqu 32(%rdx),%xmm10 - subq $16,%r9 - jz .Lodd_tail -.Lskip4x: - - - - - movdqu (%r8),%xmm3 + pxor %xmm6,%xmm0 + pxor %xmm7,%xmm1 + movdqu 16(%r8),%xmm6 .byte 102,15,56,0,221 .byte 102,15,56,0,245 - pxor %xmm3,%xmm0 - - movdqa %xmm6,%xmm8 - pshufd $78,%xmm6,%xmm3 - pxor %xmm6,%xmm3 -.byte 102,15,58,68,242,0 -.byte 102,68,15,58,68,194,17 -.byte 102,65,15,58,68,218,0 - - leaq 32(%r8),%r8 - subq $32,%r9 - jbe .Leven_tail - jmp .Lmod_loop - -.p2align 5 -.Lmod_loop: - movdqa %xmm0,%xmm1 - pshufd $78,%xmm0,%xmm4 - pxor %xmm0,%xmm4 - -.byte 102,65,15,58,68,193,0 -.byte 102,65,15,58,68,201,17 -.byte 102,65,15,58,68,226,16 - - pxor %xmm6,%xmm0 - pxor %xmm8,%xmm1 - movdqu (%r8),%xmm8 -.byte 102,68,15,56,0,197 - movdqu 16(%r8),%xmm6 - pxor %xmm0,%xmm3 - pxor %xmm1,%xmm3 - pxor %xmm8,%xmm1 - pxor %xmm3,%xmm4 -.byte 102,15,56,0,245 - movdqa %xmm4,%xmm3 - psrldq $8,%xmm3 - pslldq $8,%xmm4 + movdqa %xmm6,%xmm7 + pshufd $78,%xmm6,%xmm9 + pshufd $78,%xmm2,%xmm10 + pxor %xmm6,%xmm9 + pxor %xmm2,%xmm10 pxor %xmm3,%xmm1 - pxor %xmm4,%xmm0 - movdqa %xmm6,%xmm8 - - movdqa %xmm0,%xmm4 movdqa %xmm0,%xmm3 - psllq $5,%xmm0 -.byte 102,15,58,68,242,0 - pxor %xmm0,%xmm3 psllq $1,%xmm0 pxor %xmm3,%xmm0 + psllq $5,%xmm0 + pxor %xmm3,%xmm0 +.byte 102,15,58,68,242,0 psllq $57,%xmm0 - movdqa %xmm0,%xmm3 + movdqa %xmm0,%xmm4 pslldq $8,%xmm0 - psrldq $8,%xmm3 - pxor %xmm4,%xmm0 - pxor %xmm3,%xmm1 - pshufd $78,%xmm8,%xmm3 - pxor %xmm8,%xmm3 + psrldq $8,%xmm4 + pxor %xmm3,%xmm0 + pxor %xmm4,%xmm1 -.byte 102,68,15,58,68,194,17 +.byte 102,15,58,68,250,17 movdqa %xmm0,%xmm4 - psrlq $1,%xmm0 - pxor %xmm4,%xmm1 - pxor %xmm0,%xmm4 psrlq $5,%xmm0 pxor %xmm4,%xmm0 psrlq $1,%xmm0 -.byte 102,65,15,58,68,218,0 - pxor %xmm1,%xmm0 + pxor %xmm4,%xmm0 + pxor %xmm1,%xmm4 + psrlq $1,%xmm0 + pxor %xmm4,%xmm0 + +.byte 102,69,15,58,68,202,0 + movdqa %xmm0,%xmm1 + pshufd $78,%xmm0,%xmm3 + pshufd $78,%xmm8,%xmm4 + pxor %xmm0,%xmm3 + pxor %xmm8,%xmm4 + + pxor %xmm6,%xmm9 + pxor %xmm7,%xmm9 + movdqa %xmm9,%xmm10 + psrldq $8,%xmm9 + pslldq $8,%xmm10 + pxor %xmm9,%xmm7 + pxor %xmm10,%xmm6 leaq 32(%r8),%r8 subq $32,%r9 ja .Lmod_loop .Leven_tail: - movdqa %xmm0,%xmm1 - pshufd $78,%xmm0,%xmm4 - pxor %xmm0,%xmm4 - -.byte 102,65,15,58,68,193,0 -.byte 102,65,15,58,68,201,17 -.byte 102,65,15,58,68,226,16 - - pxor %xmm6,%xmm0 - pxor %xmm8,%xmm1 +.byte 102,65,15,58,68,192,0 +.byte 102,65,15,58,68,200,17 +.byte 102,15,58,68,220,0 pxor %xmm0,%xmm3 pxor %xmm1,%xmm3 - pxor %xmm3,%xmm4 - movdqa %xmm4,%xmm3 + + movdqa %xmm3,%xmm4 psrldq $8,%xmm3 pslldq $8,%xmm4 pxor %xmm3,%xmm1 pxor %xmm4,%xmm0 + pxor %xmm6,%xmm0 + pxor %xmm7,%xmm1 - movdqa %xmm0,%xmm4 movdqa %xmm0,%xmm3 - psllq $5,%xmm0 - pxor %xmm0,%xmm3 psllq $1,%xmm0 pxor %xmm3,%xmm0 + psllq $5,%xmm0 + pxor %xmm3,%xmm0 psllq $57,%xmm0 - movdqa %xmm0,%xmm3 + movdqa %xmm0,%xmm4 pslldq $8,%xmm0 - psrldq $8,%xmm3 - pxor %xmm4,%xmm0 - pxor %xmm3,%xmm1 + psrldq $8,%xmm4 + pxor %xmm3,%xmm0 + pxor %xmm4,%xmm1 movdqa %xmm0,%xmm4 - psrlq $1,%xmm0 - pxor %xmm4,%xmm1 - pxor %xmm0,%xmm4 psrlq $5,%xmm0 pxor %xmm4,%xmm0 psrlq $1,%xmm0 - pxor %xmm1,%xmm0 + pxor %xmm4,%xmm0 + pxor %xmm1,%xmm4 + psrlq $1,%xmm0 + pxor %xmm4,%xmm0 testq %r9,%r9 jnz .Ldone @@ -1276,10 +1000,12 @@ gcm_ghash_clmul: pxor %xmm3,%xmm0 movdqa %xmm0,%xmm1 pshufd $78,%xmm0,%xmm3 + pshufd $78,%xmm2,%xmm4 pxor %xmm0,%xmm3 + pxor %xmm2,%xmm4 .byte 102,15,58,68,194,0 .byte 102,15,58,68,202,17 -.byte 102,65,15,58,68,218,0 +.byte 102,15,58,68,220,0 pxor %xmm0,%xmm3 pxor %xmm1,%xmm3 @@ -1289,28 +1015,27 @@ gcm_ghash_clmul: pxor %xmm3,%xmm1 pxor %xmm4,%xmm0 - movdqa %xmm0,%xmm4 movdqa %xmm0,%xmm3 - psllq $5,%xmm0 - pxor %xmm0,%xmm3 psllq $1,%xmm0 pxor %xmm3,%xmm0 + psllq $5,%xmm0 + pxor %xmm3,%xmm0 psllq $57,%xmm0 - movdqa %xmm0,%xmm3 + movdqa %xmm0,%xmm4 pslldq $8,%xmm0 - psrldq $8,%xmm3 - pxor %xmm4,%xmm0 - pxor %xmm3,%xmm1 + psrldq $8,%xmm4 + pxor %xmm3,%xmm0 + pxor %xmm4,%xmm1 movdqa %xmm0,%xmm4 - psrlq $1,%xmm0 - pxor %xmm4,%xmm1 - pxor %xmm0,%xmm4 psrlq $5,%xmm0 pxor %xmm4,%xmm0 psrlq $1,%xmm0 - pxor %xmm1,%xmm0 + pxor %xmm4,%xmm0 + pxor %xmm1,%xmm4 + psrlq $1,%xmm0 + pxor %xmm4,%xmm0 .Ldone: .byte 102,15,56,0,197 movdqu %xmm0,(%rcx) @@ -1319,42 +1044,15 @@ gcm_ghash_clmul: movaps 32(%rsp),%xmm8 movaps 48(%rsp),%xmm9 movaps 64(%rsp),%xmm10 - movaps 80(%rsp),%xmm11 - movaps 96(%rsp),%xmm12 - movaps 112(%rsp),%xmm13 - movaps 128(%rsp),%xmm14 - movaps 144(%rsp),%xmm15 - leaq 168(%rsp),%rsp -.LSEH_end_gcm_ghash_clmul: + addq $88,%rsp .byte 0xf3,0xc3 - -.globl gcm_init_avx -.def gcm_init_avx; .scl 2; .type 32; .endef -.p2align 5 -gcm_init_avx: - jmp .L_init_clmul - -.globl gcm_gmult_avx -.def gcm_gmult_avx; .scl 2; .type 32; .endef -.p2align 5 -gcm_gmult_avx: - jmp .L_gmult_clmul - -.globl gcm_ghash_avx -.def gcm_ghash_avx; .scl 2; .type 32; .endef -.p2align 5 -gcm_ghash_avx: - jmp .L_ghash_clmul +.LSEH_end_gcm_ghash_clmul: .p2align 6 .Lbswap_mask: .byte 15,14,13,12,11,10,9,8,7,6,5,4,3,2,1,0 .L0x1c2_polynomial: .byte 1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0xc2 -.L7_mask: -.long 7,0,7,0 -.L7_mask_poly: -.long 7,0,450,0 .p2align 6 .Lrem_4bit: @@ -1451,7 +1149,7 @@ se_handler: movq 40(%r9),%rdi movq %r8,%rsi movl $154,%ecx -.long 0xa548f3fc +.long 0xa548f3fc movq %r9,%rsi xorq %rcx,%rcx @@ -1491,39 +1189,26 @@ se_handler: .rva .LSEH_end_gcm_ghash_4bit .rva .LSEH_info_gcm_ghash_4bit -.rva .LSEH_begin_gcm_init_clmul -.rva .LSEH_end_gcm_init_clmul -.rva .LSEH_info_gcm_init_clmul - .rva .LSEH_begin_gcm_ghash_clmul .rva .LSEH_end_gcm_ghash_clmul .rva .LSEH_info_gcm_ghash_clmul + .section .xdata .p2align 3 .LSEH_info_gcm_gmult_4bit: .byte 9,0,0,0 .rva se_handler -.rva .Lgmult_prologue,.Lgmult_epilogue +.rva .Lgmult_prologue,.Lgmult_epilogue .LSEH_info_gcm_ghash_4bit: .byte 9,0,0,0 .rva se_handler -.rva .Lghash_prologue,.Lghash_epilogue -.LSEH_info_gcm_init_clmul: -.byte 0x01,0x08,0x03,0x00 -.byte 0x08,0x68,0x00,0x00 -.byte 0x04,0x22,0x00,0x00 +.rva .Lghash_prologue,.Lghash_epilogue .LSEH_info_gcm_ghash_clmul: -.byte 0x01,0x33,0x16,0x00 -.byte 0x33,0xf8,0x09,0x00 -.byte 0x2e,0xe8,0x08,0x00 -.byte 0x29,0xd8,0x07,0x00 -.byte 0x24,0xc8,0x06,0x00 -.byte 0x1f,0xb8,0x05,0x00 -.byte 0x1a,0xa8,0x04,0x00 -.byte 0x15,0x98,0x03,0x00 -.byte 0x10,0x88,0x02,0x00 -.byte 0x0c,0x78,0x01,0x00 -.byte 0x08,0x68,0x00,0x00 -.byte 0x04,0x01,0x15,0x00 - -.section .note.GNU-stack,"",%progbits +.byte 0x01,0x1f,0x0b,0x00 +.byte 0x1f,0xa8,0x04,0x00 +.byte 0x19,0x98,0x03,0x00 +.byte 0x13,0x88,0x02,0x00 +.byte 0x0d,0x78,0x01,0x00 +.byte 0x08,0x68,0x00,0x00 +.byte 0x04,0xa2,0x00,0x00 + diff --git a/lib/accelerated/x86/coff/sha1-ssse3-x86.s b/lib/accelerated/x86/coff/sha1-ssse3-x86.s index 450f574e16..22c17e7353 100644 --- a/lib/accelerated/x86/coff/sha1-ssse3-x86.s +++ b/lib/accelerated/x86/coff/sha1-ssse3-x86.s @@ -1417,4 +1417,3 @@ _sha1_block_data_order: .byte 89,80,84,79,71,65,77,83,32,98,121,32,60,97,112,112 .byte 114,111,64,111,112,101,110,115,115,108,46,111,114,103,62,0 -.section .note.GNU-stack,"",%progbits diff --git a/lib/accelerated/x86/coff/sha1-ssse3-x86_64.s b/lib/accelerated/x86/coff/sha1-ssse3-x86_64.s index e2f7a1e595..9aa029f1bb 100644 --- a/lib/accelerated/x86/coff/sha1-ssse3-x86_64.s +++ b/lib/accelerated/x86/coff/sha1-ssse3-x86_64.s @@ -54,7 +54,6 @@ sha1_block_data_order: movl _gnutls_x86_cpuid_s+0(%rip),%r9d movl _gnutls_x86_cpuid_s+4(%rip),%r8d - movl _gnutls_x86_cpuid_s+8(%rip),%r10d testl $512,%r8d jz .Lialu jmp _ssse3_shortcut @@ -1353,13 +1352,12 @@ _ssse3_shortcut: pushq %rbx pushq %rbp pushq %r12 - leaq -160(%rsp),%rsp + leaq -144(%rsp),%rsp movaps %xmm6,64+0(%rsp) movaps %xmm7,64+16(%rsp) movaps %xmm8,64+32(%rsp) movaps %xmm9,64+48(%rsp) movaps %xmm10,64+64(%rsp) - movaps %xmm11,64+80(%rsp) .Lprologue_ssse3: movq %rdi,%r8 movq %rsi,%r9 @@ -1367,7 +1365,7 @@ _ssse3_shortcut: shlq $6,%r10 addq %r9,%r10 - leaq K_XX_XX+64(%rip),%r11 + leaq K_XX_XX(%rip),%r11 movl 0(%r8),%eax movl 4(%r8),%ebx @@ -1375,12 +1373,9 @@ _ssse3_shortcut: movl 12(%r8),%edx movl %ebx,%esi movl 16(%r8),%ebp - movl %ecx,%edi - xorl %edx,%edi - andl %edi,%esi movdqa 64(%r11),%xmm6 - movdqa -64(%r11),%xmm9 + movdqa 0(%r11),%xmm9 movdqu 0(%r9),%xmm0 movdqu 16(%r9),%xmm1 movdqu 32(%r9),%xmm2 @@ -1403,881 +1398,903 @@ _ssse3_shortcut: .p2align 4 .Loop_ssse3: movdqa %xmm1,%xmm4 - rorl $2,%ebx - xorl %edx,%esi + addl 0(%rsp),%ebp + xorl %edx,%ecx movdqa %xmm3,%xmm8 .byte 102,15,58,15,224,8 movl %eax,%edi - addl 0(%rsp),%ebp - paddd %xmm3,%xmm9 - xorl %ecx,%ebx roll $5,%eax + paddd %xmm3,%xmm9 + andl %ecx,%esi + xorl %edx,%ecx psrldq $4,%xmm8 - addl %esi,%ebp - andl %ebx,%edi - pxor %xmm0,%xmm4 - xorl %ecx,%ebx + xorl %edx,%esi addl %eax,%ebp + pxor %xmm0,%xmm4 + rorl $2,%ebx + addl %esi,%ebp pxor %xmm2,%xmm8 - rorl $7,%eax - xorl %ecx,%edi - movl %ebp,%esi addl 4(%rsp),%edx - pxor %xmm8,%xmm4 - xorl %ebx,%eax + xorl %ecx,%ebx + movl %ebp,%esi roll $5,%ebp + pxor %xmm8,%xmm4 + andl %ebx,%edi + xorl %ecx,%ebx movdqa %xmm9,48(%rsp) - addl %edi,%edx - andl %eax,%esi + xorl %ecx,%edi + addl %ebp,%edx movdqa %xmm4,%xmm10 movdqa %xmm4,%xmm8 + rorl $7,%eax + addl %edi,%edx + addl 8(%rsp),%ecx xorl %ebx,%eax - addl %ebp,%edx - rorl $7,%ebp - xorl %ebx,%esi pslldq $12,%xmm10 paddd %xmm4,%xmm4 movl %edx,%edi - addl 8(%rsp),%ecx - xorl %eax,%ebp roll $5,%edx + andl %eax,%esi + xorl %ebx,%eax psrld $31,%xmm8 - addl %esi,%ecx - andl %ebp,%edi - movdqa %xmm10,%xmm9 - xorl %eax,%ebp + xorl %ebx,%esi addl %edx,%ecx + movdqa %xmm10,%xmm9 + rorl $7,%ebp + addl %esi,%ecx psrld $30,%xmm10 por %xmm8,%xmm4 - rorl $7,%edx - xorl %eax,%edi - movl %ecx,%esi addl 12(%rsp),%ebx + xorl %eax,%ebp + movl %ecx,%esi + roll $5,%ecx pslld $2,%xmm9 pxor %xmm10,%xmm4 - xorl %ebp,%edx - roll $5,%ecx - movdqa -64(%r11),%xmm10 - addl %edi,%ebx - andl %edx,%esi - pxor %xmm9,%xmm4 - xorl %ebp,%edx + andl %ebp,%edi + xorl %eax,%ebp + movdqa 0(%r11),%xmm10 + xorl %eax,%edi addl %ecx,%ebx + pxor %xmm9,%xmm4 + rorl $7,%edx + addl %edi,%ebx movdqa %xmm2,%xmm5 - rorl $7,%ecx - xorl %ebp,%esi + addl 16(%rsp),%eax + xorl %ebp,%edx movdqa %xmm4,%xmm9 .byte 102,15,58,15,233,8 movl %ebx,%edi - addl 16(%rsp),%eax - paddd %xmm4,%xmm10 - xorl %edx,%ecx roll $5,%ebx + paddd %xmm4,%xmm10 + andl %edx,%esi + xorl %ebp,%edx psrldq $4,%xmm9 - addl %esi,%eax - andl %ecx,%edi - pxor %xmm1,%xmm5 - xorl %edx,%ecx + xorl %ebp,%esi addl %ebx,%eax + pxor %xmm1,%xmm5 + rorl $7,%ecx + addl %esi,%eax pxor %xmm3,%xmm9 - rorl $7,%ebx - xorl %edx,%edi - movl %eax,%esi addl 20(%rsp),%ebp - pxor %xmm9,%xmm5 - xorl %ecx,%ebx + xorl %edx,%ecx + movl %eax,%esi roll $5,%eax + pxor %xmm9,%xmm5 + andl %ecx,%edi + xorl %edx,%ecx movdqa %xmm10,0(%rsp) - addl %edi,%ebp - andl %ebx,%esi + xorl %edx,%edi + addl %eax,%ebp movdqa %xmm5,%xmm8 movdqa %xmm5,%xmm9 + rorl $7,%ebx + addl %edi,%ebp + addl 24(%rsp),%edx xorl %ecx,%ebx - addl %eax,%ebp - rorl $7,%eax - xorl %ecx,%esi pslldq $12,%xmm8 paddd %xmm5,%xmm5 movl %ebp,%edi - addl 24(%rsp),%edx - xorl %ebx,%eax roll $5,%ebp + andl %ebx,%esi + xorl %ecx,%ebx psrld $31,%xmm9 - addl %esi,%edx - andl %eax,%edi - movdqa %xmm8,%xmm10 - xorl %ebx,%eax + xorl %ecx,%esi addl %ebp,%edx + movdqa %xmm8,%xmm10 + rorl $7,%eax + addl %esi,%edx psrld $30,%xmm8 por %xmm9,%xmm5 - rorl $7,%ebp - xorl %ebx,%edi - movl %edx,%esi addl 28(%rsp),%ecx + xorl %ebx,%eax + movl %edx,%esi + roll $5,%edx pslld $2,%xmm10 pxor %xmm8,%xmm5 - xorl %eax,%ebp - roll $5,%edx - movdqa -32(%r11),%xmm8 - addl %edi,%ecx - andl %ebp,%esi - pxor %xmm10,%xmm5 - xorl %eax,%ebp + andl %eax,%edi + xorl %ebx,%eax + movdqa 16(%r11),%xmm8 + xorl %ebx,%edi addl %edx,%ecx + pxor %xmm10,%xmm5 + rorl $7,%ebp + addl %edi,%ecx movdqa %xmm3,%xmm6 - rorl $7,%edx - xorl %eax,%esi + addl 32(%rsp),%ebx + xorl %eax,%ebp movdqa %xmm5,%xmm10 .byte 102,15,58,15,242,8 movl %ecx,%edi - addl 32(%rsp),%ebx - paddd %xmm5,%xmm8 - xorl %ebp,%edx roll $5,%ecx + paddd %xmm5,%xmm8 + andl %ebp,%esi + xorl %eax,%ebp psrldq $4,%xmm10 - addl %esi,%ebx - andl %edx,%edi - pxor %xmm2,%xmm6 - xorl %ebp,%edx + xorl %eax,%esi addl %ecx,%ebx + pxor %xmm2,%xmm6 + rorl $7,%edx + addl %esi,%ebx pxor %xmm4,%xmm10 - rorl $7,%ecx - xorl %ebp,%edi - movl %ebx,%esi addl 36(%rsp),%eax - pxor %xmm10,%xmm6 - xorl %edx,%ecx + xorl %ebp,%edx + movl %ebx,%esi roll $5,%ebx + pxor %xmm10,%xmm6 + andl %edx,%edi + xorl %ebp,%edx movdqa %xmm8,16(%rsp) - addl %edi,%eax - andl %ecx,%esi + xorl %ebp,%edi + addl %ebx,%eax movdqa %xmm6,%xmm9 movdqa %xmm6,%xmm10 + rorl $7,%ecx + addl %edi,%eax + addl 40(%rsp),%ebp xorl %edx,%ecx - addl %ebx,%eax - rorl $7,%ebx - xorl %edx,%esi pslldq $12,%xmm9 paddd %xmm6,%xmm6 movl %eax,%edi - addl 40(%rsp),%ebp - xorl %ecx,%ebx roll $5,%eax + andl %ecx,%esi + xorl %edx,%ecx psrld $31,%xmm10 - addl %esi,%ebp - andl %ebx,%edi - movdqa %xmm9,%xmm8 - xorl %ecx,%ebx + xorl %edx,%esi addl %eax,%ebp + movdqa %xmm9,%xmm8 + rorl $7,%ebx + addl %esi,%ebp psrld $30,%xmm9 por %xmm10,%xmm6 - rorl $7,%eax - xorl %ecx,%edi - movl %ebp,%esi addl 44(%rsp),%edx + xorl %ecx,%ebx + movl %ebp,%esi + roll $5,%ebp pslld $2,%xmm8 pxor %xmm9,%xmm6 - xorl %ebx,%eax - roll $5,%ebp - movdqa -32(%r11),%xmm9 - addl %edi,%edx - andl %eax,%esi - pxor %xmm8,%xmm6 - xorl %ebx,%eax + andl %ebx,%edi + xorl %ecx,%ebx + movdqa 16(%r11),%xmm9 + xorl %ecx,%edi addl %ebp,%edx + pxor %xmm8,%xmm6 + rorl $7,%eax + addl %edi,%edx movdqa %xmm4,%xmm7 - rorl $7,%ebp - xorl %ebx,%esi + addl 48(%rsp),%ecx + xorl %ebx,%eax movdqa %xmm6,%xmm8 .byte 102,15,58,15,251,8 movl %edx,%edi - addl 48(%rsp),%ecx - paddd %xmm6,%xmm9 - xorl %eax,%ebp roll $5,%edx + paddd %xmm6,%xmm9 + andl %eax,%esi + xorl %ebx,%eax psrldq $4,%xmm8 - addl %esi,%ecx - andl %ebp,%edi - pxor %xmm3,%xmm7 - xorl %eax,%ebp + xorl %ebx,%esi addl %edx,%ecx + pxor %xmm3,%xmm7 + rorl $7,%ebp + addl %esi,%ecx pxor %xmm5,%xmm8 - rorl $7,%edx - xorl %eax,%edi - movl %ecx,%esi addl 52(%rsp),%ebx - pxor %xmm8,%xmm7 - xorl %ebp,%edx + xorl %eax,%ebp + movl %ecx,%esi roll $5,%ecx + pxor %xmm8,%xmm7 + andl %ebp,%edi + xorl %eax,%ebp movdqa %xmm9,32(%rsp) - addl %edi,%ebx - andl %edx,%esi + xorl %eax,%edi + addl %ecx,%ebx movdqa %xmm7,%xmm10 movdqa %xmm7,%xmm8 + rorl $7,%edx + addl %edi,%ebx + addl 56(%rsp),%eax xorl %ebp,%edx - addl %ecx,%ebx - rorl $7,%ecx - xorl %ebp,%esi pslldq $12,%xmm10 paddd %xmm7,%xmm7 movl %ebx,%edi - addl 56(%rsp),%eax - xorl %edx,%ecx roll $5,%ebx + andl %edx,%esi + xorl %ebp,%edx psrld $31,%xmm8 - addl %esi,%eax - andl %ecx,%edi - movdqa %xmm10,%xmm9 - xorl %edx,%ecx + xorl %ebp,%esi addl %ebx,%eax + movdqa %xmm10,%xmm9 + rorl $7,%ecx + addl %esi,%eax psrld $30,%xmm10 por %xmm8,%xmm7 - rorl $7,%ebx - xorl %edx,%edi - movl %eax,%esi addl 60(%rsp),%ebp - pslld $2,%xmm9 - pxor %xmm10,%xmm7 - xorl %ecx,%ebx - roll $5,%eax - movdqa -32(%r11),%xmm10 - addl %edi,%ebp - andl %ebx,%esi - pxor %xmm9,%xmm7 - xorl %ecx,%ebx + xorl %edx,%ecx + movl %eax,%esi + roll $5,%eax + pslld $2,%xmm9 + pxor %xmm10,%xmm7 + andl %ecx,%edi + xorl %edx,%ecx + movdqa 16(%r11),%xmm10 + xorl %edx,%edi addl %eax,%ebp + pxor %xmm9,%xmm7 + rorl $7,%ebx + addl %edi,%ebp movdqa %xmm7,%xmm9 - rorl $7,%eax + addl 0(%rsp),%edx pxor %xmm4,%xmm0 .byte 102,68,15,58,15,206,8 - xorl %ecx,%esi + xorl %ecx,%ebx movl %ebp,%edi - addl 0(%rsp),%edx - pxor %xmm1,%xmm0 - xorl %ebx,%eax roll $5,%ebp + pxor %xmm1,%xmm0 + andl %ebx,%esi + xorl %ecx,%ebx movdqa %xmm10,%xmm8 paddd %xmm7,%xmm10 - addl %esi,%edx - andl %eax,%edi + xorl %ecx,%esi + addl %ebp,%edx pxor %xmm9,%xmm0 + rorl $7,%eax + addl %esi,%edx + addl 4(%rsp),%ecx xorl %ebx,%eax - addl %ebp,%edx - rorl $7,%ebp - xorl %ebx,%edi movdqa %xmm0,%xmm9 movdqa %xmm10,48(%rsp) movl %edx,%esi - addl 4(%rsp),%ecx - xorl %eax,%ebp roll $5,%edx + andl %eax,%edi + xorl %ebx,%eax pslld $2,%xmm0 - addl %edi,%ecx - andl %ebp,%esi + xorl %ebx,%edi + addl %edx,%ecx psrld $30,%xmm9 + rorl $7,%ebp + addl %edi,%ecx + addl 8(%rsp),%ebx xorl %eax,%ebp - addl %edx,%ecx - rorl $7,%edx - xorl %eax,%esi movl %ecx,%edi - addl 8(%rsp),%ebx - por %xmm9,%xmm0 - xorl %ebp,%edx roll $5,%ecx + por %xmm9,%xmm0 + andl %ebp,%esi + xorl %eax,%ebp movdqa %xmm0,%xmm10 - addl %esi,%ebx - andl %edx,%edi - xorl %ebp,%edx + xorl %eax,%esi addl %ecx,%ebx + rorl $7,%edx + addl %esi,%ebx addl 12(%rsp),%eax - xorl %ebp,%edi + xorl %ebp,%edx movl %ebx,%esi roll $5,%ebx - addl %edi,%eax - xorl %edx,%esi - rorl $7,%ecx + andl %edx,%edi + xorl %ebp,%edx + xorl %ebp,%edi addl %ebx,%eax + rorl $7,%ecx + addl %edi,%eax addl 16(%rsp),%ebp pxor %xmm5,%xmm1 .byte 102,68,15,58,15,215,8 - xorl %ecx,%esi + xorl %edx,%esi movl %eax,%edi roll $5,%eax pxor %xmm2,%xmm1 - addl %esi,%ebp - xorl %ecx,%edi + xorl %ecx,%esi + addl %eax,%ebp movdqa %xmm8,%xmm9 paddd %xmm0,%xmm8 rorl $7,%ebx - addl %eax,%ebp + addl %esi,%ebp pxor %xmm10,%xmm1 addl 20(%rsp),%edx - xorl %ebx,%edi + xorl %ecx,%edi movl %ebp,%esi roll $5,%ebp movdqa %xmm1,%xmm10 movdqa %xmm8,0(%rsp) - addl %edi,%edx - xorl %ebx,%esi - rorl $7,%eax + xorl %ebx,%edi addl %ebp,%edx + rorl $7,%eax + addl %edi,%edx pslld $2,%xmm1 addl 24(%rsp),%ecx - xorl %eax,%esi + xorl %ebx,%esi psrld $30,%xmm10 movl %edx,%edi roll $5,%edx - addl %esi,%ecx - xorl %eax,%edi - rorl $7,%ebp + xorl %eax,%esi addl %edx,%ecx + rorl $7,%ebp + addl %esi,%ecx por %xmm10,%xmm1 addl 28(%rsp),%ebx - xorl %ebp,%edi + xorl %eax,%edi movdqa %xmm1,%xmm8 movl %ecx,%esi roll $5,%ecx - addl %edi,%ebx - xorl %ebp,%esi - rorl $7,%edx + xorl %ebp,%edi addl %ecx,%ebx + rorl $7,%edx + addl %edi,%ebx addl 32(%rsp),%eax pxor %xmm6,%xmm2 .byte 102,68,15,58,15,192,8 - xorl %edx,%esi + xorl %ebp,%esi movl %ebx,%edi roll $5,%ebx pxor %xmm3,%xmm2 - addl %esi,%eax - xorl %edx,%edi - movdqa 0(%r11),%xmm10 + xorl %edx,%esi + addl %ebx,%eax + movdqa 32(%r11),%xmm10 paddd %xmm1,%xmm9 rorl $7,%ecx - addl %ebx,%eax + addl %esi,%eax pxor %xmm8,%xmm2 addl 36(%rsp),%ebp - xorl %ecx,%edi + xorl %edx,%edi movl %eax,%esi roll $5,%eax movdqa %xmm2,%xmm8 movdqa %xmm9,16(%rsp) - addl %edi,%ebp - xorl %ecx,%esi - rorl $7,%ebx + xorl %ecx,%edi addl %eax,%ebp + rorl $7,%ebx + addl %edi,%ebp pslld $2,%xmm2 addl 40(%rsp),%edx - xorl %ebx,%esi + xorl %ecx,%esi psrld $30,%xmm8 movl %ebp,%edi roll $5,%ebp - addl %esi,%edx - xorl %ebx,%edi - rorl $7,%eax + xorl %ebx,%esi addl %ebp,%edx + rorl $7,%eax + addl %esi,%edx por %xmm8,%xmm2 addl 44(%rsp),%ecx - xorl %eax,%edi + xorl %ebx,%edi movdqa %xmm2,%xmm9 movl %edx,%esi roll $5,%edx - addl %edi,%ecx - xorl %eax,%esi - rorl $7,%ebp + xorl %eax,%edi addl %edx,%ecx + rorl $7,%ebp + addl %edi,%ecx addl 48(%rsp),%ebx pxor %xmm7,%xmm3 .byte 102,68,15,58,15,201,8 - xorl %ebp,%esi + xorl %eax,%esi movl %ecx,%edi roll $5,%ecx pxor %xmm4,%xmm3 - addl %esi,%ebx - xorl %ebp,%edi + xorl %ebp,%esi + addl %ecx,%ebx movdqa %xmm10,%xmm8 paddd %xmm2,%xmm10 rorl $7,%edx - addl %ecx,%ebx + addl %esi,%ebx pxor %xmm9,%xmm3 addl 52(%rsp),%eax - xorl %edx,%edi + xorl %ebp,%edi movl %ebx,%esi roll $5,%ebx movdqa %xmm3,%xmm9 movdqa %xmm10,32(%rsp) - addl %edi,%eax - xorl %edx,%esi - rorl $7,%ecx + xorl %edx,%edi addl %ebx,%eax + rorl $7,%ecx + addl %edi,%eax pslld $2,%xmm3 addl 56(%rsp),%ebp - xorl %ecx,%esi + xorl %edx,%esi psrld $30,%xmm9 movl %eax,%edi roll $5,%eax - addl %esi,%ebp - xorl %ecx,%edi - rorl $7,%ebx + xorl %ecx,%esi addl %eax,%ebp + rorl $7,%ebx + addl %esi,%ebp por %xmm9,%xmm3 addl 60(%rsp),%edx - xorl %ebx,%edi + xorl %ecx,%edi movdqa %xmm3,%xmm10 movl %ebp,%esi roll $5,%ebp - addl %edi,%edx - xorl %ebx,%esi - rorl $7,%eax + xorl %ebx,%edi addl %ebp,%edx + rorl $7,%eax + addl %edi,%edx addl 0(%rsp),%ecx pxor %xmm0,%xmm4 .byte 102,68,15,58,15,210,8 - xorl %eax,%esi + xorl %ebx,%esi movl %edx,%edi roll $5,%edx pxor %xmm5,%xmm4 - addl %esi,%ecx - xorl %eax,%edi + xorl %eax,%esi + addl %edx,%ecx movdqa %xmm8,%xmm9 paddd %xmm3,%xmm8 rorl $7,%ebp - addl %edx,%ecx + addl %esi,%ecx pxor %xmm10,%xmm4 addl 4(%rsp),%ebx - xorl %ebp,%edi + xorl %eax,%edi movl %ecx,%esi roll $5,%ecx movdqa %xmm4,%xmm10 movdqa %xmm8,48(%rsp) - addl %edi,%ebx - xorl %ebp,%esi - rorl $7,%edx + xorl %ebp,%edi addl %ecx,%ebx + rorl $7,%edx + addl %edi,%ebx pslld $2,%xmm4 addl 8(%rsp),%eax - xorl %edx,%esi + xorl %ebp,%esi psrld $30,%xmm10 movl %ebx,%edi roll $5,%ebx - addl %esi,%eax - xorl %edx,%edi - rorl $7,%ecx + xorl %edx,%esi addl %ebx,%eax + rorl $7,%ecx + addl %esi,%eax por %xmm10,%xmm4 addl 12(%rsp),%ebp - xorl %ecx,%edi + xorl %edx,%edi movdqa %xmm4,%xmm8 movl %eax,%esi roll $5,%eax - addl %edi,%ebp - xorl %ecx,%esi - rorl $7,%ebx + xorl %ecx,%edi addl %eax,%ebp + rorl $7,%ebx + addl %edi,%ebp addl 16(%rsp),%edx pxor %xmm1,%xmm5 .byte 102,68,15,58,15,195,8 - xorl %ebx,%esi + xorl %ecx,%esi movl %ebp,%edi roll $5,%ebp pxor %xmm6,%xmm5 - addl %esi,%edx - xorl %ebx,%edi + xorl %ebx,%esi + addl %ebp,%edx movdqa %xmm9,%xmm10 paddd %xmm4,%xmm9 rorl $7,%eax - addl %ebp,%edx + addl %esi,%edx pxor %xmm8,%xmm5 addl 20(%rsp),%ecx - xorl %eax,%edi + xorl %ebx,%edi movl %edx,%esi roll $5,%edx movdqa %xmm5,%xmm8 movdqa %xmm9,0(%rsp) - addl %edi,%ecx - xorl %eax,%esi - rorl $7,%ebp + xorl %eax,%edi addl %edx,%ecx + rorl $7,%ebp + addl %edi,%ecx pslld $2,%xmm5 addl 24(%rsp),%ebx - xorl %ebp,%esi + xorl %eax,%esi psrld $30,%xmm8 movl %ecx,%edi roll $5,%ecx - addl %esi,%ebx - xorl %ebp,%edi - rorl $7,%edx + xorl %ebp,%esi addl %ecx,%ebx + rorl $7,%edx + addl %esi,%ebx por %xmm8,%xmm5 addl 28(%rsp),%eax + xorl %ebp,%edi movdqa %xmm5,%xmm9 - rorl $7,%ecx movl %ebx,%esi - xorl %edx,%edi roll $5,%ebx - addl %edi,%eax - xorl %ecx,%esi - xorl %edx,%ecx + xorl %edx,%edi addl %ebx,%eax - addl 32(%rsp),%ebp + rorl $7,%ecx + addl %edi,%eax + movl %ecx,%edi pxor %xmm2,%xmm6 .byte 102,68,15,58,15,204,8 - andl %ecx,%esi xorl %edx,%ecx - rorl $7,%ebx + addl 32(%rsp),%ebp + andl %edx,%edi pxor %xmm7,%xmm6 - movl %eax,%edi - xorl %ecx,%esi + andl %ecx,%esi + rorl $7,%ebx movdqa %xmm10,%xmm8 paddd %xmm5,%xmm10 + addl %edi,%ebp + movl %eax,%edi + pxor %xmm9,%xmm6 roll $5,%eax addl %esi,%ebp - pxor %xmm9,%xmm6 - xorl %ebx,%edi - xorl %ecx,%ebx + xorl %edx,%ecx addl %eax,%ebp - addl 36(%rsp),%edx movdqa %xmm6,%xmm9 movdqa %xmm10,16(%rsp) - andl %ebx,%edi + movl %ebx,%esi xorl %ecx,%ebx - rorl $7,%eax - movl %ebp,%esi + addl 36(%rsp),%edx + andl %ecx,%esi pslld $2,%xmm6 - xorl %ebx,%edi + andl %ebx,%edi + rorl $7,%eax + psrld $30,%xmm9 + addl %esi,%edx + movl %ebp,%esi roll $5,%ebp - psrld $30,%xmm9 addl %edi,%edx - xorl %eax,%esi - xorl %ebx,%eax + xorl %ecx,%ebx addl %ebp,%edx - addl 40(%rsp),%ecx - andl %eax,%esi por %xmm9,%xmm6 + movl %eax,%edi xorl %ebx,%eax - rorl $7,%ebp movdqa %xmm6,%xmm10 + addl 40(%rsp),%ecx + andl %ebx,%edi + andl %eax,%esi + rorl $7,%ebp + addl %edi,%ecx movl %edx,%edi - xorl %eax,%esi roll $5,%edx addl %esi,%ecx - xorl %ebp,%edi - xorl %eax,%ebp + xorl %ebx,%eax addl %edx,%ecx + movl %ebp,%esi + xorl %eax,%ebp addl 44(%rsp),%ebx + andl %eax,%esi andl %ebp,%edi - xorl %eax,%ebp rorl $7,%edx + addl %esi,%ebx movl %ecx,%esi - xorl %ebp,%edi roll $5,%ecx addl %edi,%ebx - xorl %edx,%esi - xorl %ebp,%edx + xorl %eax,%ebp addl %ecx,%ebx - addl 48(%rsp),%eax + movl %edx,%edi pxor %xmm3,%xmm7 .byte 102,68,15,58,15,213,8 - andl %edx,%esi xorl %ebp,%edx - rorl $7,%ecx + addl 48(%rsp),%eax + andl %ebp,%edi pxor %xmm0,%xmm7 - movl %ebx,%edi - xorl %edx,%esi - movdqa 32(%r11),%xmm9 + andl %edx,%esi + rorl $7,%ecx + movdqa 48(%r11),%xmm9 paddd %xmm6,%xmm8 + addl %edi,%eax + movl %ebx,%edi + pxor %xmm10,%xmm7 roll $5,%ebx addl %esi,%eax - pxor %xmm10,%xmm7 - xorl %ecx,%edi - xorl %edx,%ecx + xorl %ebp,%edx addl %ebx,%eax - addl 52(%rsp),%ebp movdqa %xmm7,%xmm10 movdqa %xmm8,32(%rsp) - andl %ecx,%edi + movl %ecx,%esi xorl %edx,%ecx + addl 52(%rsp),%ebp + andl %edx,%esi + pslld $2,%xmm7 + andl %ecx,%edi rorl $7,%ebx + psrld $30,%xmm10 + addl %esi,%ebp movl %eax,%esi - pslld $2,%xmm7 - xorl %ecx,%edi roll $5,%eax - psrld $30,%xmm10 addl %edi,%ebp - xorl %ebx,%esi - xorl %ecx,%ebx + xorl %edx,%ecx addl %eax,%ebp - addl 56(%rsp),%edx - andl %ebx,%esi por %xmm10,%xmm7 + movl %ebx,%edi xorl %ecx,%ebx - rorl $7,%eax movdqa %xmm7,%xmm8 + addl 56(%rsp),%edx + andl %ecx,%edi + andl %ebx,%esi + rorl $7,%eax + addl %edi,%edx movl %ebp,%edi - xorl %ebx,%esi roll $5,%ebp addl %esi,%edx - xorl %eax,%edi - xorl %ebx,%eax + xorl %ecx,%ebx addl %ebp,%edx + movl %eax,%esi + xorl %ebx,%eax addl 60(%rsp),%ecx + andl %ebx,%esi andl %eax,%edi - xorl %ebx,%eax rorl $7,%ebp + addl %esi,%ecx movl %edx,%esi - xorl %eax,%edi roll $5,%edx addl %edi,%ecx - xorl %ebp,%esi - xorl %eax,%ebp + xorl %ebx,%eax addl %edx,%ecx - addl 0(%rsp),%ebx + movl %ebp,%edi pxor %xmm4,%xmm0 .byte 102,68,15,58,15,198,8 - andl %ebp,%esi xorl %eax,%ebp - rorl $7,%edx + addl 0(%rsp),%ebx + andl %eax,%edi pxor %xmm1,%xmm0 - movl %ecx,%edi - xorl %ebp,%esi + andl %ebp,%esi + rorl $7,%edx movdqa %xmm9,%xmm10 paddd %xmm7,%xmm9 + addl %edi,%ebx + movl %ecx,%edi + pxor %xmm8,%xmm0 roll $5,%ecx addl %esi,%ebx - pxor %xmm8,%xmm0 - xorl %edx,%edi - xorl %ebp,%edx + xorl %eax,%ebp addl %ecx,%ebx - addl 4(%rsp),%eax movdqa %xmm0,%xmm8 movdqa %xmm9,48(%rsp) - andl %edx,%edi + movl %edx,%esi xorl %ebp,%edx + addl 4(%rsp),%eax + andl %ebp,%esi + pslld $2,%xmm0 + andl %edx,%edi rorl $7,%ecx + psrld $30,%xmm8 + addl %esi,%eax movl %ebx,%esi - pslld $2,%xmm0 - xorl %edx,%edi roll $5,%ebx - psrld $30,%xmm8 addl %edi,%eax - xorl %ecx,%esi - xorl %edx,%ecx + xorl %ebp,%edx addl %ebx,%eax - addl 8(%rsp),%ebp - andl %ecx,%esi por %xmm8,%xmm0 + movl %ecx,%edi xorl %edx,%ecx - rorl $7,%ebx movdqa %xmm0,%xmm9 + addl 8(%rsp),%ebp + andl %edx,%edi + andl %ecx,%esi + rorl $7,%ebx + addl %edi,%ebp movl %eax,%edi - xorl %ecx,%esi roll $5,%eax addl %esi,%ebp - xorl %ebx,%edi - xorl %ecx,%ebx + xorl %edx,%ecx addl %eax,%ebp + movl %ebx,%esi + xorl %ecx,%ebx addl 12(%rsp),%edx + andl %ecx,%esi andl %ebx,%edi - xorl %ecx,%ebx rorl $7,%eax + addl %esi,%edx movl %ebp,%esi - xorl %ebx,%edi roll $5,%ebp addl %edi,%edx - xorl %eax,%esi - xorl %ebx,%eax + xorl %ecx,%ebx addl %ebp,%edx - addl 16(%rsp),%ecx + movl %eax,%edi pxor %xmm5,%xmm1 .byte 102,68,15,58,15,207,8 - andl %eax,%esi xorl %ebx,%eax - rorl $7,%ebp + addl 16(%rsp),%ecx + andl %ebx,%edi pxor %xmm2,%xmm1 - movl %edx,%edi - xorl %eax,%esi + andl %eax,%esi + rorl $7,%ebp movdqa %xmm10,%xmm8 paddd %xmm0,%xmm10 + addl %edi,%ecx + movl %edx,%edi + pxor %xmm9,%xmm1 roll $5,%edx addl %esi,%ecx - pxor %xmm9,%xmm1 - xorl %ebp,%edi - xorl %eax,%ebp + xorl %ebx,%eax addl %edx,%ecx - addl 20(%rsp),%ebx movdqa %xmm1,%xmm9 movdqa %xmm10,0(%rsp) - andl %ebp,%edi + movl %ebp,%esi xorl %eax,%ebp + addl 20(%rsp),%ebx + andl %eax,%esi + pslld $2,%xmm1 + andl %ebp,%edi rorl $7,%edx + psrld $30,%xmm9 + addl %esi,%ebx movl %ecx,%esi - pslld $2,%xmm1 - xorl %ebp,%edi roll $5,%ecx - psrld $30,%xmm9 addl %edi,%ebx - xorl %edx,%esi - xorl %ebp,%edx + xorl %eax,%ebp addl %ecx,%ebx - addl 24(%rsp),%eax - andl %edx,%esi por %xmm9,%xmm1 + movl %edx,%edi xorl %ebp,%edx - rorl $7,%ecx movdqa %xmm1,%xmm10 + addl 24(%rsp),%eax + andl %ebp,%edi + andl %edx,%esi + rorl $7,%ecx + addl %edi,%eax movl %ebx,%edi - xorl %edx,%esi roll $5,%ebx addl %esi,%eax - xorl %ecx,%edi - xorl %edx,%ecx + xorl %ebp,%edx addl %ebx,%eax + movl %ecx,%esi + xorl %edx,%ecx addl 28(%rsp),%ebp + andl %edx,%esi andl %ecx,%edi - xorl %edx,%ecx rorl $7,%ebx + addl %esi,%ebp movl %eax,%esi - xorl %ecx,%edi roll $5,%eax addl %edi,%ebp - xorl %ebx,%esi - xorl %ecx,%ebx + xorl %edx,%ecx addl %eax,%ebp - addl 32(%rsp),%edx + movl %ebx,%edi pxor %xmm6,%xmm2 .byte 102,68,15,58,15,208,8 - andl %ebx,%esi xorl %ecx,%ebx - rorl $7,%eax + addl 32(%rsp),%edx + andl %ecx,%edi pxor %xmm3,%xmm2 - movl %ebp,%edi - xorl %ebx,%esi + andl %ebx,%esi + rorl $7,%eax movdqa %xmm8,%xmm9 paddd %xmm1,%xmm8 + addl %edi,%edx + movl %ebp,%edi + pxor %xmm10,%xmm2 roll $5,%ebp addl %esi,%edx - pxor %xmm10,%xmm2 - xorl %eax,%edi - xorl %ebx,%eax + xorl %ecx,%ebx addl %ebp,%edx - addl 36(%rsp),%ecx movdqa %xmm2,%xmm10 movdqa %xmm8,16(%rsp) - andl %eax,%edi + movl %eax,%esi xorl %ebx,%eax + addl 36(%rsp),%ecx + andl %ebx,%esi + pslld $2,%xmm2 + andl %eax,%edi rorl $7,%ebp + psrld $30,%xmm10 + addl %esi,%ecx movl %edx,%esi - pslld $2,%xmm2 - xorl %eax,%edi roll $5,%edx - psrld $30,%xmm10 addl %edi,%ecx - xorl %ebp,%esi - xorl %eax,%ebp + xorl %ebx,%eax addl %edx,%ecx - addl 40(%rsp),%ebx - andl %ebp,%esi por %xmm10,%xmm2 + movl %ebp,%edi xorl %eax,%ebp - rorl $7,%edx movdqa %xmm2,%xmm8 + addl 40(%rsp),%ebx + andl %eax,%edi + andl %ebp,%esi + rorl $7,%edx + addl %edi,%ebx movl %ecx,%edi - xorl %ebp,%esi roll $5,%ecx addl %esi,%ebx - xorl %edx,%edi - xorl %ebp,%edx + xorl %eax,%ebp addl %ecx,%ebx + movl %edx,%esi + xorl %ebp,%edx addl 44(%rsp),%eax + andl %ebp,%esi andl %edx,%edi - xorl %ebp,%edx rorl $7,%ecx + addl %esi,%eax movl %ebx,%esi - xorl %edx,%edi roll $5,%ebx addl %edi,%eax - xorl %edx,%esi + xorl %ebp,%edx addl %ebx,%eax addl 48(%rsp),%ebp pxor %xmm7,%xmm3 .byte 102,68,15,58,15,193,8 - xorl %ecx,%esi + xorl %edx,%esi movl %eax,%edi roll $5,%eax pxor %xmm4,%xmm3 - addl %esi,%ebp - xorl %ecx,%edi + xorl %ecx,%esi + addl %eax,%ebp movdqa %xmm9,%xmm10 paddd %xmm2,%xmm9 rorl $7,%ebx - addl %eax,%ebp + addl %esi,%ebp pxor %xmm8,%xmm3 addl 52(%rsp),%edx - xorl %ebx,%edi + xorl %ecx,%edi movl %ebp,%esi roll $5,%ebp movdqa %xmm3,%xmm8 - movdqa %xmm9,32(%rsp) - addl %edi,%edx - xorl %ebx,%esi - rorl $7,%eax + movdqa %xmm9,32(%rsp) + xorl %ebx,%edi addl %ebp,%edx + rorl $7,%eax + addl %edi,%edx pslld $2,%xmm3 addl 56(%rsp),%ecx - xorl %eax,%esi + xorl %ebx,%esi psrld $30,%xmm8 movl %edx,%edi roll $5,%edx - addl %esi,%ecx - xorl %eax,%edi - rorl $7,%ebp + xorl %eax,%esi addl %edx,%ecx + rorl $7,%ebp + addl %esi,%ecx por %xmm8,%xmm3 addl 60(%rsp),%ebx - xorl %ebp,%edi + xorl %eax,%edi movl %ecx,%esi roll $5,%ecx - addl %edi,%ebx - xorl %ebp,%esi - rorl $7,%edx + xorl %ebp,%edi addl %ecx,%ebx + rorl $7,%edx + addl %edi,%ebx addl 0(%rsp),%eax paddd %xmm3,%xmm10 - xorl %edx,%esi + xorl %ebp,%esi movl %ebx,%edi roll $5,%ebx - addl %esi,%eax + xorl %edx,%esi movdqa %xmm10,48(%rsp) - xorl %edx,%edi - rorl $7,%ecx addl %ebx,%eax + rorl $7,%ecx + addl %esi,%eax addl 4(%rsp),%ebp - xorl %ecx,%edi + xorl %edx,%edi movl %eax,%esi roll $5,%eax - addl %edi,%ebp - xorl %ecx,%esi - rorl $7,%ebx + xorl %ecx,%edi addl %eax,%ebp + rorl $7,%ebx + addl %edi,%ebp addl 8(%rsp),%edx - xorl %ebx,%esi + xorl %ecx,%esi movl %ebp,%edi roll $5,%ebp - addl %esi,%edx - xorl %ebx,%edi - rorl $7,%eax + xorl %ebx,%esi addl %ebp,%edx + rorl $7,%eax + addl %esi,%edx addl 12(%rsp),%ecx - xorl %eax,%edi + xorl %ebx,%edi movl %edx,%esi roll $5,%edx - addl %edi,%ecx - xorl %eax,%esi - rorl $7,%ebp + xorl %eax,%edi addl %edx,%ecx + rorl $7,%ebp + addl %edi,%ecx cmpq %r10,%r9 je .Ldone_ssse3 movdqa 64(%r11),%xmm6 - movdqa -64(%r11),%xmm9 + movdqa 0(%r11),%xmm9 movdqu 0(%r9),%xmm0 movdqu 16(%r9),%xmm1 movdqu 32(%r9),%xmm2 @@ -2285,112 +2302,113 @@ _ssse3_shortcut: .byte 102,15,56,0,198 addq $64,%r9 addl 16(%rsp),%ebx - xorl %ebp,%esi + xorl %eax,%esi .byte 102,15,56,0,206 movl %ecx,%edi roll $5,%ecx - addl %esi,%ebx paddd %xmm9,%xmm0 - xorl %ebp,%edi - rorl $7,%edx + xorl %ebp,%esi addl %ecx,%ebx - addl 20(%rsp),%eax + rorl $7,%edx + addl %esi,%ebx movdqa %xmm0,0(%rsp) - xorl %edx,%edi - movl %ebx,%esi + addl 20(%rsp),%eax + xorl %ebp,%edi psubd %xmm9,%xmm0 + movl %ebx,%esi roll $5,%ebx - addl %edi,%eax - xorl %edx,%esi - rorl $7,%ecx + xorl %edx,%edi addl %ebx,%eax + rorl $7,%ecx + addl %edi,%eax addl 24(%rsp),%ebp - xorl %ecx,%esi + xorl %edx,%esi movl %eax,%edi roll $5,%eax - addl %esi,%ebp - xorl %ecx,%edi - rorl $7,%ebx + xorl %ecx,%esi addl %eax,%ebp + rorl $7,%ebx + addl %esi,%ebp addl 28(%rsp),%edx - xorl %ebx,%edi + xorl %ecx,%edi movl %ebp,%esi roll $5,%ebp - addl %edi,%edx - xorl %ebx,%esi - rorl $7,%eax + xorl %ebx,%edi addl %ebp,%edx + rorl $7,%eax + addl %edi,%edx addl 32(%rsp),%ecx - xorl %eax,%esi + xorl %ebx,%esi .byte 102,15,56,0,214 movl %edx,%edi roll $5,%edx - addl %esi,%ecx paddd %xmm9,%xmm1 - xorl %eax,%edi - rorl $7,%ebp + xorl %eax,%esi addl %edx,%ecx - addl 36(%rsp),%ebx + rorl $7,%ebp + addl %esi,%ecx movdqa %xmm1,16(%rsp) - xorl %ebp,%edi - movl %ecx,%esi + addl 36(%rsp),%ebx + xorl %eax,%edi psubd %xmm9,%xmm1 + movl %ecx,%esi roll $5,%ecx - addl %edi,%ebx - xorl %ebp,%esi - rorl $7,%edx + xorl %ebp,%edi addl %ecx,%ebx + rorl $7,%edx + addl %edi,%ebx addl 40(%rsp),%eax - xorl %edx,%esi + xorl %ebp,%esi movl %ebx,%edi roll $5,%ebx - addl %esi,%eax - xorl %edx,%edi - rorl $7,%ecx + xorl %edx,%esi addl %ebx,%eax + rorl $7,%ecx + addl %esi,%eax addl 44(%rsp),%ebp - xorl %ecx,%edi + xorl %edx,%edi movl %eax,%esi roll $5,%eax - addl %edi,%ebp - xorl %ecx,%esi - rorl $7,%ebx + xorl %ecx,%edi addl %eax,%ebp + rorl $7,%ebx + addl %edi,%ebp addl 48(%rsp),%edx - xorl %ebx,%esi + xorl %ecx,%esi .byte 102,15,56,0,222 movl %ebp,%edi roll $5,%ebp - addl %esi,%edx paddd %xmm9,%xmm2 - xorl %ebx,%edi - rorl $7,%eax + xorl %ebx,%esi addl %ebp,%edx - addl 52(%rsp),%ecx + rorl $7,%eax + addl %esi,%edx movdqa %xmm2,32(%rsp) - xorl %eax,%edi - movl %edx,%esi + addl 52(%rsp),%ecx + xorl %ebx,%edi psubd %xmm9,%xmm2 + movl %edx,%esi roll $5,%edx - addl %edi,%ecx - xorl %eax,%esi - rorl $7,%ebp + xorl %eax,%edi addl %edx,%ecx + rorl $7,%ebp + addl %edi,%ecx addl 56(%rsp),%ebx - xorl %ebp,%esi + xorl %eax,%esi movl %ecx,%edi roll $5,%ecx - addl %esi,%ebx - xorl %ebp,%edi - rorl $7,%edx + xorl %ebp,%esi addl %ecx,%ebx + rorl $7,%edx + addl %esi,%ebx addl 60(%rsp),%eax - xorl %edx,%edi + xorl %ebp,%edi movl %ebx,%esi roll $5,%ebx - addl %edi,%eax - rorl $7,%ecx + xorl %edx,%edi addl %ebx,%eax + rorl $7,%ecx + addl %edi,%eax addl 0(%r8),%eax addl 4(%r8),%esi addl 8(%r8),%ecx @@ -2400,110 +2418,108 @@ _ssse3_shortcut: movl %esi,4(%r8) movl %esi,%ebx movl %ecx,8(%r8) - movl %ecx,%edi movl %edx,12(%r8) - xorl %edx,%edi movl %ebp,16(%r8) - andl %edi,%esi jmp .Loop_ssse3 .p2align 4 .Ldone_ssse3: addl 16(%rsp),%ebx - xorl %ebp,%esi + xorl %eax,%esi movl %ecx,%edi roll $5,%ecx - addl %esi,%ebx - xorl %ebp,%edi - rorl $7,%edx + xorl %ebp,%esi addl %ecx,%ebx + rorl $7,%edx + addl %esi,%ebx addl 20(%rsp),%eax - xorl %edx,%edi + xorl %ebp,%edi movl %ebx,%esi roll $5,%ebx - addl %edi,%eax - xorl %edx,%esi - rorl $7,%ecx + xorl %edx,%edi addl %ebx,%eax + rorl $7,%ecx + addl %edi,%eax addl 24(%rsp),%ebp - xorl %ecx,%esi + xorl %edx,%esi movl %eax,%edi roll $5,%eax - addl %esi,%ebp - xorl %ecx,%edi - rorl $7,%ebx + xorl %ecx,%esi addl %eax,%ebp + rorl $7,%ebx + addl %esi,%ebp addl 28(%rsp),%edx - xorl %ebx,%edi + xorl %ecx,%edi movl %ebp,%esi roll $5,%ebp - addl %edi,%edx - xorl %ebx,%esi - rorl $7,%eax + xorl %ebx,%edi addl %ebp,%edx + rorl $7,%eax + addl %edi,%edx addl 32(%rsp),%ecx - xorl %eax,%esi + xorl %ebx,%esi movl %edx,%edi roll $5,%edx - addl %esi,%ecx - xorl %eax,%edi - rorl $7,%ebp + xorl %eax,%esi addl %edx,%ecx + rorl $7,%ebp + addl %esi,%ecx addl 36(%rsp),%ebx - xorl %ebp,%edi + xorl %eax,%edi movl %ecx,%esi roll $5,%ecx - addl %edi,%ebx - xorl %ebp,%esi - rorl $7,%edx + xorl %ebp,%edi addl %ecx,%ebx + rorl $7,%edx + addl %edi,%ebx addl 40(%rsp),%eax - xorl %edx,%esi + xorl %ebp,%esi movl %ebx,%edi roll $5,%ebx - addl %esi,%eax - xorl %edx,%edi - rorl $7,%ecx + xorl %edx,%esi addl %ebx,%eax + rorl $7,%ecx + addl %esi,%eax addl 44(%rsp),%ebp - xorl %ecx,%edi + xorl %edx,%edi movl %eax,%esi roll $5,%eax - addl %edi,%ebp - xorl %ecx,%esi - rorl $7,%ebx + xorl %ecx,%edi addl %eax,%ebp + rorl $7,%ebx + addl %edi,%ebp addl 48(%rsp),%edx - xorl %ebx,%esi + xorl %ecx,%esi movl %ebp,%edi roll $5,%ebp - addl %esi,%edx - xorl %ebx,%edi - rorl $7,%eax + xorl %ebx,%esi addl %ebp,%edx + rorl $7,%eax + addl %esi,%edx addl 52(%rsp),%ecx - xorl %eax,%edi + xorl %ebx,%edi movl %edx,%esi roll $5,%edx - addl %edi,%ecx - xorl %eax,%esi - rorl $7,%ebp + xorl %eax,%edi addl %edx,%ecx + rorl $7,%ebp + addl %edi,%ecx addl 56(%rsp),%ebx - xorl %ebp,%esi + xorl %eax,%esi movl %ecx,%edi roll $5,%ecx - addl %esi,%ebx - xorl %ebp,%edi - rorl $7,%edx + xorl %ebp,%esi addl %ecx,%ebx + rorl $7,%edx + addl %esi,%ebx addl 60(%rsp),%eax - xorl %edx,%edi + xorl %ebp,%edi movl %ebx,%esi roll $5,%ebx - addl %edi,%eax - rorl $7,%ecx + xorl %edx,%edi addl %ebx,%eax + rorl $7,%ecx + addl %edi,%eax addl 0(%r8),%eax addl 4(%r8),%esi addl 8(%r8),%ecx @@ -2519,8 +2535,7 @@ _ssse3_shortcut: movaps 64+32(%rsp),%xmm8 movaps 64+48(%rsp),%xmm9 movaps 64+64(%rsp),%xmm10 - movaps 64+80(%rsp),%xmm11 - leaq 160(%rsp),%rsi + leaq 144(%rsp),%rsi movq 0(%rsi),%r12 movq 8(%rsi),%rbp movq 16(%rsi),%rbx @@ -2532,16 +2547,11 @@ _ssse3_shortcut: .LSEH_end_sha1_block_data_order_ssse3: .p2align 6 K_XX_XX: -.long 0x5a827999,0x5a827999,0x5a827999,0x5a827999 -.long 0x5a827999,0x5a827999,0x5a827999,0x5a827999 -.long 0x6ed9eba1,0x6ed9eba1,0x6ed9eba1,0x6ed9eba1 -.long 0x6ed9eba1,0x6ed9eba1,0x6ed9eba1,0x6ed9eba1 -.long 0x8f1bbcdc,0x8f1bbcdc,0x8f1bbcdc,0x8f1bbcdc -.long 0x8f1bbcdc,0x8f1bbcdc,0x8f1bbcdc,0x8f1bbcdc -.long 0xca62c1d6,0xca62c1d6,0xca62c1d6,0xca62c1d6 -.long 0xca62c1d6,0xca62c1d6,0xca62c1d6,0xca62c1d6 -.long 0x00010203,0x04050607,0x08090a0b,0x0c0d0e0f -.long 0x00010203,0x04050607,0x08090a0b,0x0c0d0e0f +.long 0x5a827999,0x5a827999,0x5a827999,0x5a827999 +.long 0x6ed9eba1,0x6ed9eba1,0x6ed9eba1,0x6ed9eba1 +.long 0x8f1bbcdc,0x8f1bbcdc,0x8f1bbcdc,0x8f1bbcdc +.long 0xca62c1d6,0xca62c1d6,0xca62c1d6,0xca62c1d6 +.long 0x00010203,0x04050607,0x08090a0b,0x0c0d0e0f .byte 83,72,65,49,32,98,108,111,99,107,32,116,114,97,110,115,102,111,114,109,32,102,111,114,32,120,56,54,95,54,52,44,32,67,82,89,80,84,79,71,65,77,83,32,98,121,32,60,97,112,112,114,111,64,111,112,101,110,115,115,108,46,111,114,103,62,0 .p2align 6 @@ -2621,9 +2631,9 @@ ssse3_handler: leaq 64(%rax),%rsi leaq 512(%r8),%rdi - movl $12,%ecx -.long 0xa548f3fc - leaq 184(%rax),%rax + movl $10,%ecx +.long 0xa548f3fc + leaq 168(%rax),%rax movq -8(%rax),%rbx movq -16(%rax),%rbp @@ -2642,7 +2652,7 @@ ssse3_handler: movq 40(%r9),%rdi movq %r8,%rsi movl $154,%ecx -.long 0xa548f3fc +.long 0xa548f3fc movq %r9,%rsi xorq %rcx,%rcx @@ -2688,6 +2698,5 @@ ssse3_handler: .LSEH_info_sha1_block_data_order_ssse3: .byte 9,0,0,0 .rva ssse3_handler -.rva .Lprologue_ssse3,.Lepilogue_ssse3 +.rva .Lprologue_ssse3,.Lepilogue_ssse3 -.section .note.GNU-stack,"",%progbits diff --git a/lib/accelerated/x86/coff/sha256-ssse3-x86.s b/lib/accelerated/x86/coff/sha256-ssse3-x86.s index 117b2bd413..61d6eaccf9 100644 --- a/lib/accelerated/x86/coff/sha256-ssse3-x86.s +++ b/lib/accelerated/x86/coff/sha256-ssse3-x86.s @@ -64,405 +64,195 @@ _sha256_block_data_order: movl %edi,4(%esp) movl %eax,8(%esp) movl %ebx,12(%esp) - leal __gnutls_x86_cpuid_s-.L001K256(%ebp),%edx - movl (%edx),%ecx - movl 4(%edx),%ebx - testl $1048576,%ecx - jnz .L002loop - andl $1073741824,%ecx - andl $268435968,%ebx - orl %ebx,%ecx - andl $1342177280,%ecx - cmpl $1342177280,%ecx - je .L003loop_shrd - subl %edi,%eax - cmpl $256,%eax - jae .L004unrolled - jmp .L002loop .align 16 .L002loop: movl (%edi),%eax movl 4(%edi),%ebx movl 8(%edi),%ecx - bswap %eax movl 12(%edi),%edx + bswap %eax bswap %ebx - pushl %eax bswap %ecx - pushl %ebx bswap %edx + pushl %eax + pushl %ebx pushl %ecx pushl %edx movl 16(%edi),%eax movl 20(%edi),%ebx movl 24(%edi),%ecx - bswap %eax movl 28(%edi),%edx + bswap %eax bswap %ebx - pushl %eax bswap %ecx - pushl %ebx bswap %edx + pushl %eax + pushl %ebx pushl %ecx pushl %edx movl 32(%edi),%eax movl 36(%edi),%ebx movl 40(%edi),%ecx - bswap %eax movl 44(%edi),%edx + bswap %eax bswap %ebx - pushl %eax bswap %ecx - pushl %ebx bswap %edx + pushl %eax + pushl %ebx pushl %ecx pushl %edx movl 48(%edi),%eax movl 52(%edi),%ebx movl 56(%edi),%ecx - bswap %eax movl 60(%edi),%edx + bswap %eax bswap %ebx - pushl %eax bswap %ecx - pushl %ebx bswap %edx + pushl %eax + pushl %ebx pushl %ecx pushl %edx addl $64,%edi - leal -36(%esp),%esp - movl %edi,104(%esp) + subl $32,%esp + movl %edi,100(%esp) movl (%esi),%eax movl 4(%esi),%ebx movl 8(%esi),%ecx movl 12(%esi),%edi - movl %ebx,8(%esp) - xorl %ecx,%ebx - movl %ecx,12(%esp) - movl %edi,16(%esp) - movl %ebx,(%esp) + movl %ebx,4(%esp) + movl %ecx,8(%esp) + movl %edi,12(%esp) movl 16(%esi),%edx movl 20(%esi),%ebx movl 24(%esi),%ecx movl 28(%esi),%edi - movl %ebx,24(%esp) - movl %ecx,28(%esp) - movl %edi,32(%esp) + movl %ebx,20(%esp) + movl %ecx,24(%esp) + movl %edi,28(%esp) .align 16 -.L00500_15: +.L00300_15: + movl 92(%esp),%ebx movl %edx,%ecx - movl 24(%esp),%esi rorl $14,%ecx - movl 28(%esp),%edi + movl 20(%esp),%esi xorl %edx,%ecx - xorl %edi,%esi - movl 96(%esp),%ebx rorl $5,%ecx - andl %edx,%esi - movl %edx,20(%esp) - xorl %ecx,%edx - addl 32(%esp),%ebx + xorl %edx,%ecx + rorl $6,%ecx + movl 24(%esp),%edi + addl %ecx,%ebx xorl %edi,%esi - rorl $6,%edx + movl %edx,16(%esp) movl %eax,%ecx + andl %edx,%esi + movl 12(%esp),%edx + xorl %edi,%esi + movl %eax,%edi addl %esi,%ebx rorl $9,%ecx - addl %edx,%ebx - movl 8(%esp),%edi + addl 28(%esp),%ebx xorl %eax,%ecx - movl %eax,4(%esp) - leal -4(%esp),%esp rorl $11,%ecx - movl (%ebp),%esi + movl 4(%esp),%esi xorl %eax,%ecx - movl 20(%esp),%edx - xorl %edi,%eax rorl $2,%ecx - addl %esi,%ebx - movl %eax,(%esp) addl %ebx,%edx - andl 4(%esp),%eax + movl 8(%esp),%edi addl %ecx,%ebx - xorl %edi,%eax + movl %eax,(%esp) + movl %eax,%ecx + subl $4,%esp + orl %esi,%eax + andl %esi,%ecx + andl %edi,%eax + movl (%ebp),%esi + orl %ecx,%eax addl $4,%ebp addl %ebx,%eax + addl %esi,%edx + addl %esi,%eax cmpl $3248222580,%esi - jne .L00500_15 - movl 156(%esp),%ecx - jmp .L00616_63 + jne .L00300_15 + movl 152(%esp),%ebx .align 16 -.L00616_63: - movl %ecx,%ebx - movl 104(%esp),%esi - rorl $11,%ecx - movl %esi,%edi - rorl $2,%esi - xorl %ebx,%ecx +.L00416_63: + movl %ebx,%esi + movl 100(%esp),%ecx + rorl $11,%esi + movl %ecx,%edi + xorl %ebx,%esi + rorl $7,%esi shrl $3,%ebx - rorl $7,%ecx - xorl %edi,%esi - xorl %ecx,%ebx - rorl $17,%esi - addl 160(%esp),%ebx - shrl $10,%edi - addl 124(%esp),%ebx + rorl $2,%edi + xorl %esi,%ebx + xorl %ecx,%edi + rorl $17,%edi + shrl $10,%ecx + addl 156(%esp),%ebx + xorl %ecx,%edi + addl 120(%esp),%ebx movl %edx,%ecx - xorl %esi,%edi - movl 24(%esp),%esi - rorl $14,%ecx addl %edi,%ebx - movl 28(%esp),%edi + rorl $14,%ecx + movl 20(%esp),%esi xorl %edx,%ecx - xorl %edi,%esi - movl %ebx,96(%esp) rorl $5,%ecx - andl %edx,%esi - movl %edx,20(%esp) - xorl %ecx,%edx - addl 32(%esp),%ebx + movl %ebx,92(%esp) + xorl %edx,%ecx + rorl $6,%ecx + movl 24(%esp),%edi + addl %ecx,%ebx xorl %edi,%esi - rorl $6,%edx + movl %edx,16(%esp) movl %eax,%ecx + andl %edx,%esi + movl 12(%esp),%edx + xorl %edi,%esi + movl %eax,%edi addl %esi,%ebx rorl $9,%ecx - addl %edx,%ebx - movl 8(%esp),%edi + addl 28(%esp),%ebx xorl %eax,%ecx - movl %eax,4(%esp) - leal -4(%esp),%esp rorl $11,%ecx - movl (%ebp),%esi + movl 4(%esp),%esi xorl %eax,%ecx - movl 20(%esp),%edx - xorl %edi,%eax rorl $2,%ecx - addl %esi,%ebx - movl %eax,(%esp) addl %ebx,%edx - andl 4(%esp),%eax - addl %ecx,%ebx - xorl %edi,%eax - movl 156(%esp),%ecx - addl $4,%ebp - addl %ebx,%eax - cmpl $3329325298,%esi - jne .L00616_63 - movl 356(%esp),%esi - movl 8(%esp),%ebx - movl 16(%esp),%ecx - addl (%esi),%eax - addl 4(%esi),%ebx - addl 8(%esi),%edi - addl 12(%esi),%ecx - movl %eax,(%esi) - movl %ebx,4(%esi) - movl %edi,8(%esi) - movl %ecx,12(%esi) - movl 24(%esp),%eax - movl 28(%esp),%ebx - movl 32(%esp),%ecx - movl 360(%esp),%edi - addl 16(%esi),%edx - addl 20(%esi),%eax - addl 24(%esi),%ebx - addl 28(%esi),%ecx - movl %edx,16(%esi) - movl %eax,20(%esi) - movl %ebx,24(%esi) - movl %ecx,28(%esi) - leal 356(%esp),%esp - subl $256,%ebp - cmpl 8(%esp),%edi - jb .L002loop - movl 12(%esp),%esp - popl %edi - popl %esi - popl %ebx - popl %ebp - ret -.align 32 -.L003loop_shrd: - movl (%edi),%eax - movl 4(%edi),%ebx - movl 8(%edi),%ecx - bswap %eax - movl 12(%edi),%edx - bswap %ebx - pushl %eax - bswap %ecx - pushl %ebx - bswap %edx - pushl %ecx - pushl %edx - movl 16(%edi),%eax - movl 20(%edi),%ebx - movl 24(%edi),%ecx - bswap %eax - movl 28(%edi),%edx - bswap %ebx - pushl %eax - bswap %ecx - pushl %ebx - bswap %edx - pushl %ecx - pushl %edx - movl 32(%edi),%eax - movl 36(%edi),%ebx - movl 40(%edi),%ecx - bswap %eax - movl 44(%edi),%edx - bswap %ebx - pushl %eax - bswap %ecx - pushl %ebx - bswap %edx - pushl %ecx - pushl %edx - movl 48(%edi),%eax - movl 52(%edi),%ebx - movl 56(%edi),%ecx - bswap %eax - movl 60(%edi),%edx - bswap %ebx - pushl %eax - bswap %ecx - pushl %ebx - bswap %edx - pushl %ecx - pushl %edx - addl $64,%edi - leal -36(%esp),%esp - movl %edi,104(%esp) - movl (%esi),%eax - movl 4(%esi),%ebx - movl 8(%esi),%ecx - movl 12(%esi),%edi - movl %ebx,8(%esp) - xorl %ecx,%ebx - movl %ecx,12(%esp) - movl %edi,16(%esp) - movl %ebx,(%esp) - movl 16(%esi),%edx - movl 20(%esi),%ebx - movl 24(%esi),%ecx - movl 28(%esi),%edi - movl %ebx,24(%esp) - movl %ecx,28(%esp) - movl %edi,32(%esp) -.align 16 -.L00700_15_shrd: - movl %edx,%ecx - movl 24(%esp),%esi - shrdl $14,%ecx,%ecx - movl 28(%esp),%edi - xorl %edx,%ecx - xorl %edi,%esi - movl 96(%esp),%ebx - shrdl $5,%ecx,%ecx - andl %edx,%esi - movl %edx,20(%esp) - xorl %ecx,%edx - addl 32(%esp),%ebx - xorl %edi,%esi - shrdl $6,%edx,%edx - movl %eax,%ecx - addl %esi,%ebx - shrdl $9,%ecx,%ecx - addl %edx,%ebx movl 8(%esp),%edi - xorl %eax,%ecx - movl %eax,4(%esp) - leal -4(%esp),%esp - shrdl $11,%ecx,%ecx - movl (%ebp),%esi - xorl %eax,%ecx - movl 20(%esp),%edx - xorl %edi,%eax - shrdl $2,%ecx,%ecx - addl %esi,%ebx - movl %eax,(%esp) - addl %ebx,%edx - andl 4(%esp),%eax addl %ecx,%ebx - xorl %edi,%eax - addl $4,%ebp - addl %ebx,%eax - cmpl $3248222580,%esi - jne .L00700_15_shrd - movl 156(%esp),%ecx - jmp .L00816_63_shrd -.align 16 -.L00816_63_shrd: - movl %ecx,%ebx - movl 104(%esp),%esi - shrdl $11,%ecx,%ecx - movl %esi,%edi - shrdl $2,%esi,%esi - xorl %ebx,%ecx - shrl $3,%ebx - shrdl $7,%ecx,%ecx - xorl %edi,%esi - xorl %ecx,%ebx - shrdl $17,%esi,%esi - addl 160(%esp),%ebx - shrl $10,%edi - addl 124(%esp),%ebx - movl %edx,%ecx - xorl %esi,%edi - movl 24(%esp),%esi - shrdl $14,%ecx,%ecx - addl %edi,%ebx - movl 28(%esp),%edi - xorl %edx,%ecx - xorl %edi,%esi - movl %ebx,96(%esp) - shrdl $5,%ecx,%ecx - andl %edx,%esi - movl %edx,20(%esp) - xorl %ecx,%edx - addl 32(%esp),%ebx - xorl %edi,%esi - shrdl $6,%edx,%edx + movl %eax,(%esp) movl %eax,%ecx - addl %esi,%ebx - shrdl $9,%ecx,%ecx - addl %edx,%ebx - movl 8(%esp),%edi - xorl %eax,%ecx - movl %eax,4(%esp) - leal -4(%esp),%esp - shrdl $11,%ecx,%ecx + subl $4,%esp + orl %esi,%eax + andl %esi,%ecx + andl %edi,%eax movl (%ebp),%esi - xorl %eax,%ecx - movl 20(%esp),%edx - xorl %edi,%eax - shrdl $2,%ecx,%ecx - addl %esi,%ebx - movl %eax,(%esp) - addl %ebx,%edx - andl 4(%esp),%eax - addl %ecx,%ebx - xorl %edi,%eax - movl 156(%esp),%ecx + orl %ecx,%eax addl $4,%ebp addl %ebx,%eax + movl 152(%esp),%ebx + addl %esi,%edx + addl %esi,%eax cmpl $3329325298,%esi - jne .L00816_63_shrd - movl 356(%esp),%esi - movl 8(%esp),%ebx - movl 16(%esp),%ecx + jne .L00416_63 + movl 352(%esp),%esi + movl 4(%esp),%ebx + movl 8(%esp),%ecx + movl 12(%esp),%edi addl (%esi),%eax addl 4(%esi),%ebx - addl 8(%esi),%edi - addl 12(%esi),%ecx + addl 8(%esi),%ecx + addl 12(%esi),%edi movl %eax,(%esi) movl %ebx,4(%esi) - movl %edi,8(%esi) - movl %ecx,12(%esi) - movl 24(%esp),%eax - movl 28(%esp),%ebx - movl 32(%esp),%ecx - movl 360(%esp),%edi + movl %ecx,8(%esi) + movl %edi,12(%esi) + movl 20(%esp),%eax + movl 24(%esp),%ebx + movl 28(%esp),%ecx + movl 356(%esp),%edi addl 16(%esi),%edx addl 20(%esi),%eax addl 24(%esi),%ebx @@ -471,10 +261,10 @@ _sha256_block_data_order: movl %eax,20(%esi) movl %ebx,24(%esi) movl %ecx,28(%esi) - leal 356(%esp),%esp + addl $352,%esp subl $256,%ebp cmpl 8(%esp),%edi - jb .L003loop_shrd + jb .L002loop movl 12(%esp),%esp popl %edi popl %esi @@ -483,2920 +273,25 @@ _sha256_block_data_order: ret .align 64 .L001K256: -.long 1116352408,1899447441,3049323471,3921009573,961987163,1508970993,2453635748,2870763221,3624381080,310598401,607225278,1426881987,1925078388,2162078206,2614888103,3248222580,3835390401,4022224774,264347078,604807628,770255983,1249150122,1555081692,1996064986,2554220882,2821834349,2952996808,3210313671,3336571891,3584528711,113926993,338241895,666307205,773529912,1294757372,1396182291,1695183700,1986661051,2177026350,2456956037,2730485921,2820302411,3259730800,3345764771,3516065817,3600352804,4094571909,275423344,430227734,506948616,659060556,883997877,958139571,1322822218,1537002063,1747873779,1955562222,2024104815,2227730452,2361852424,2428436474,2756734187,3204031479,3329325298 -.long 66051,67438087,134810123,202182159 +.long 1116352408,1899447441,3049323471,3921009573 +.long 961987163,1508970993,2453635748,2870763221 +.long 3624381080,310598401,607225278,1426881987 +.long 1925078388,2162078206,2614888103,3248222580 +.long 3835390401,4022224774,264347078,604807628 +.long 770255983,1249150122,1555081692,1996064986 +.long 2554220882,2821834349,2952996808,3210313671 +.long 3336571891,3584528711,113926993,338241895 +.long 666307205,773529912,1294757372,1396182291 +.long 1695183700,1986661051,2177026350,2456956037 +.long 2730485921,2820302411,3259730800,3345764771 +.long 3516065817,3600352804,4094571909,275423344 +.long 430227734,506948616,659060556,883997877 +.long 958139571,1322822218,1537002063,1747873779 +.long 1955562222,2024104815,2227730452,2361852424 +.long 2428436474,2756734187,3204031479,3329325298 .byte 83,72,65,50,53,54,32,98,108,111,99,107,32,116,114,97 .byte 110,115,102,111,114,109,32,102,111,114,32,120,56,54,44,32 .byte 67,82,89,80,84,79,71,65,77,83,32,98,121,32,60,97 .byte 112,112,114,111,64,111,112,101,110,115,115,108,46,111,114,103 .byte 62,0 -.align 16 -.L004unrolled: - leal -96(%esp),%esp - movl (%esi),%eax - movl 4(%esi),%ebp - movl 8(%esi),%ecx - movl 12(%esi),%ebx - movl %ebp,4(%esp) - xorl %ecx,%ebp - movl %ecx,8(%esp) - movl %ebx,12(%esp) - movl 16(%esi),%edx - movl 20(%esi),%ebx - movl 24(%esi),%ecx - movl 28(%esi),%esi - movl %ebx,20(%esp) - movl %ecx,24(%esp) - movl %esi,28(%esp) - jmp .L009grand_loop -.align 16 -.L009grand_loop: - movl (%edi),%ebx - movl 4(%edi),%ecx - bswap %ebx - movl 8(%edi),%esi - bswap %ecx - movl %ebx,32(%esp) - bswap %esi - movl %ecx,36(%esp) - movl %esi,40(%esp) - movl 12(%edi),%ebx - movl 16(%edi),%ecx - bswap %ebx - movl 20(%edi),%esi - bswap %ecx - movl %ebx,44(%esp) - bswap %esi - movl %ecx,48(%esp) - movl %esi,52(%esp) - movl 24(%edi),%ebx - movl 28(%edi),%ecx - bswap %ebx - movl 32(%edi),%esi - bswap %ecx - movl %ebx,56(%esp) - bswap %esi - movl %ecx,60(%esp) - movl %esi,64(%esp) - movl 36(%edi),%ebx - movl 40(%edi),%ecx - bswap %ebx - movl 44(%edi),%esi - bswap %ecx - movl %ebx,68(%esp) - bswap %esi - movl %ecx,72(%esp) - movl %esi,76(%esp) - movl 48(%edi),%ebx - movl 52(%edi),%ecx - bswap %ebx - movl 56(%edi),%esi - bswap %ecx - movl %ebx,80(%esp) - bswap %esi - movl %ecx,84(%esp) - movl %esi,88(%esp) - movl 60(%edi),%ebx - addl $64,%edi - bswap %ebx - movl %edi,100(%esp) - movl %ebx,92(%esp) - movl %edx,%ecx - movl 20(%esp),%esi - rorl $14,%edx - movl 24(%esp),%edi - xorl %ecx,%edx - movl 32(%esp),%ebx - xorl %edi,%esi - rorl $5,%edx - andl %ecx,%esi - movl %ecx,16(%esp) - xorl %ecx,%edx - addl 28(%esp),%ebx - xorl %esi,%edi - rorl $6,%edx - movl %eax,%ecx - addl %edi,%ebx - rorl $9,%ecx - movl %eax,%esi - movl 4(%esp),%edi - xorl %eax,%ecx - movl %eax,(%esp) - xorl %edi,%eax - rorl $11,%ecx - andl %eax,%ebp - leal 1116352408(%ebx,%edx,1),%edx - xorl %esi,%ecx - xorl %edi,%ebp - rorl $2,%ecx - addl %edx,%ebp - addl 12(%esp),%edx - addl %ecx,%ebp - movl %edx,%esi - movl 16(%esp),%ecx - rorl $14,%edx - movl 20(%esp),%edi - xorl %esi,%edx - movl 36(%esp),%ebx - xorl %edi,%ecx - rorl $5,%edx - andl %esi,%ecx - movl %esi,12(%esp) - xorl %esi,%edx - addl 24(%esp),%ebx - xorl %ecx,%edi - rorl $6,%edx - movl %ebp,%esi - addl %edi,%ebx - rorl $9,%esi - movl %ebp,%ecx - movl (%esp),%edi - xorl %ebp,%esi - movl %ebp,28(%esp) - xorl %edi,%ebp - rorl $11,%esi - andl %ebp,%eax - leal 1899447441(%ebx,%edx,1),%edx - xorl %ecx,%esi - xorl %edi,%eax - rorl $2,%esi - addl %edx,%eax - addl 8(%esp),%edx - addl %esi,%eax - movl %edx,%ecx - movl 12(%esp),%esi - rorl $14,%edx - movl 16(%esp),%edi - xorl %ecx,%edx - movl 40(%esp),%ebx - xorl %edi,%esi - rorl $5,%edx - andl %ecx,%esi - movl %ecx,8(%esp) - xorl %ecx,%edx - addl 20(%esp),%ebx - xorl %esi,%edi - rorl $6,%edx - movl %eax,%ecx - addl %edi,%ebx - rorl $9,%ecx - movl %eax,%esi - movl 28(%esp),%edi - xorl %eax,%ecx - movl %eax,24(%esp) - xorl %edi,%eax - rorl $11,%ecx - andl %eax,%ebp - leal 3049323471(%ebx,%edx,1),%edx - xorl %esi,%ecx - xorl %edi,%ebp - rorl $2,%ecx - addl %edx,%ebp - addl 4(%esp),%edx - addl %ecx,%ebp - movl %edx,%esi - movl 8(%esp),%ecx - rorl $14,%edx - movl 12(%esp),%edi - xorl %esi,%edx - movl 44(%esp),%ebx - xorl %edi,%ecx - rorl $5,%edx - andl %esi,%ecx - movl %esi,4(%esp) - xorl %esi,%edx - addl 16(%esp),%ebx - xorl %ecx,%edi - rorl $6,%edx - movl %ebp,%esi - addl %edi,%ebx - rorl $9,%esi - movl %ebp,%ecx - movl 24(%esp),%edi - xorl %ebp,%esi - movl %ebp,20(%esp) - xorl %edi,%ebp - rorl $11,%esi - andl %ebp,%eax - leal 3921009573(%ebx,%edx,1),%edx - xorl %ecx,%esi - xorl %edi,%eax - rorl $2,%esi - addl %edx,%eax - addl (%esp),%edx - addl %esi,%eax - movl %edx,%ecx - movl 4(%esp),%esi - rorl $14,%edx - movl 8(%esp),%edi - xorl %ecx,%edx - movl 48(%esp),%ebx - xorl %edi,%esi - rorl $5,%edx - andl %ecx,%esi - movl %ecx,(%esp) - xorl %ecx,%edx - addl 12(%esp),%ebx - xorl %esi,%edi - rorl $6,%edx - movl %eax,%ecx - addl %edi,%ebx - rorl $9,%ecx - movl %eax,%esi - movl 20(%esp),%edi - xorl %eax,%ecx - movl %eax,16(%esp) - xorl %edi,%eax - rorl $11,%ecx - andl %eax,%ebp - leal 961987163(%ebx,%edx,1),%edx - xorl %esi,%ecx - xorl %edi,%ebp - rorl $2,%ecx - addl %edx,%ebp - addl 28(%esp),%edx - addl %ecx,%ebp - movl %edx,%esi - movl (%esp),%ecx - rorl $14,%edx - movl 4(%esp),%edi - xorl %esi,%edx - movl 52(%esp),%ebx - xorl %edi,%ecx - rorl $5,%edx - andl %esi,%ecx - movl %esi,28(%esp) - xorl %esi,%edx - addl 8(%esp),%ebx - xorl %ecx,%edi - rorl $6,%edx - movl %ebp,%esi - addl %edi,%ebx - rorl $9,%esi - movl %ebp,%ecx - movl 16(%esp),%edi - xorl %ebp,%esi - movl %ebp,12(%esp) - xorl %edi,%ebp - rorl $11,%esi - andl %ebp,%eax - leal 1508970993(%ebx,%edx,1),%edx - xorl %ecx,%esi - xorl %edi,%eax - rorl $2,%esi - addl %edx,%eax - addl 24(%esp),%edx - addl %esi,%eax - movl %edx,%ecx - movl 28(%esp),%esi - rorl $14,%edx - movl (%esp),%edi - xorl %ecx,%edx - movl 56(%esp),%ebx - xorl %edi,%esi - rorl $5,%edx - andl %ecx,%esi - movl %ecx,24(%esp) - xorl %ecx,%edx - addl 4(%esp),%ebx - xorl %esi,%edi - rorl $6,%edx - movl %eax,%ecx - addl %edi,%ebx - rorl $9,%ecx - movl %eax,%esi - movl 12(%esp),%edi - xorl %eax,%ecx - movl %eax,8(%esp) - xorl %edi,%eax - rorl $11,%ecx - andl %eax,%ebp - leal 2453635748(%ebx,%edx,1),%edx - xorl %esi,%ecx - xorl %edi,%ebp - rorl $2,%ecx - addl %edx,%ebp - addl 20(%esp),%edx - addl %ecx,%ebp - movl %edx,%esi - movl 24(%esp),%ecx - rorl $14,%edx - movl 28(%esp),%edi - xorl %esi,%edx - movl 60(%esp),%ebx - xorl %edi,%ecx - rorl $5,%edx - andl %esi,%ecx - movl %esi,20(%esp) - xorl %esi,%edx - addl (%esp),%ebx - xorl %ecx,%edi - rorl $6,%edx - movl %ebp,%esi - addl %edi,%ebx - rorl $9,%esi - movl %ebp,%ecx - movl 8(%esp),%edi - xorl %ebp,%esi - movl %ebp,4(%esp) - xorl %edi,%ebp - rorl $11,%esi - andl %ebp,%eax - leal 2870763221(%ebx,%edx,1),%edx - xorl %ecx,%esi - xorl %edi,%eax - rorl $2,%esi - addl %edx,%eax - addl 16(%esp),%edx - addl %esi,%eax - movl %edx,%ecx - movl 20(%esp),%esi - rorl $14,%edx - movl 24(%esp),%edi - xorl %ecx,%edx - movl 64(%esp),%ebx - xorl %edi,%esi - rorl $5,%edx - andl %ecx,%esi - movl %ecx,16(%esp) - xorl %ecx,%edx - addl 28(%esp),%ebx - xorl %esi,%edi - rorl $6,%edx - movl %eax,%ecx - addl %edi,%ebx - rorl $9,%ecx - movl %eax,%esi - movl 4(%esp),%edi - xorl %eax,%ecx - movl %eax,(%esp) - xorl %edi,%eax - rorl $11,%ecx - andl %eax,%ebp - leal 3624381080(%ebx,%edx,1),%edx - xorl %esi,%ecx - xorl %edi,%ebp - rorl $2,%ecx - addl %edx,%ebp - addl 12(%esp),%edx - addl %ecx,%ebp - movl %edx,%esi - movl 16(%esp),%ecx - rorl $14,%edx - movl 20(%esp),%edi - xorl %esi,%edx - movl 68(%esp),%ebx - xorl %edi,%ecx - rorl $5,%edx - andl %esi,%ecx - movl %esi,12(%esp) - xorl %esi,%edx - addl 24(%esp),%ebx - xorl %ecx,%edi - rorl $6,%edx - movl %ebp,%esi - addl %edi,%ebx - rorl $9,%esi - movl %ebp,%ecx - movl (%esp),%edi - xorl %ebp,%esi - movl %ebp,28(%esp) - xorl %edi,%ebp - rorl $11,%esi - andl %ebp,%eax - leal 310598401(%ebx,%edx,1),%edx - xorl %ecx,%esi - xorl %edi,%eax - rorl $2,%esi - addl %edx,%eax - addl 8(%esp),%edx - addl %esi,%eax - movl %edx,%ecx - movl 12(%esp),%esi - rorl $14,%edx - movl 16(%esp),%edi - xorl %ecx,%edx - movl 72(%esp),%ebx - xorl %edi,%esi - rorl $5,%edx - andl %ecx,%esi - movl %ecx,8(%esp) - xorl %ecx,%edx - addl 20(%esp),%ebx - xorl %esi,%edi - rorl $6,%edx - movl %eax,%ecx - addl %edi,%ebx - rorl $9,%ecx - movl %eax,%esi - movl 28(%esp),%edi - xorl %eax,%ecx - movl %eax,24(%esp) - xorl %edi,%eax - rorl $11,%ecx - andl %eax,%ebp - leal 607225278(%ebx,%edx,1),%edx - xorl %esi,%ecx - xorl %edi,%ebp - rorl $2,%ecx - addl %edx,%ebp - addl 4(%esp),%edx - addl %ecx,%ebp - movl %edx,%esi - movl 8(%esp),%ecx - rorl $14,%edx - movl 12(%esp),%edi - xorl %esi,%edx - movl 76(%esp),%ebx - xorl %edi,%ecx - rorl $5,%edx - andl %esi,%ecx - movl %esi,4(%esp) - xorl %esi,%edx - addl 16(%esp),%ebx - xorl %ecx,%edi - rorl $6,%edx - movl %ebp,%esi - addl %edi,%ebx - rorl $9,%esi - movl %ebp,%ecx - movl 24(%esp),%edi - xorl %ebp,%esi - movl %ebp,20(%esp) - xorl %edi,%ebp - rorl $11,%esi - andl %ebp,%eax - leal 1426881987(%ebx,%edx,1),%edx - xorl %ecx,%esi - xorl %edi,%eax - rorl $2,%esi - addl %edx,%eax - addl (%esp),%edx - addl %esi,%eax - movl %edx,%ecx - movl 4(%esp),%esi - rorl $14,%edx - movl 8(%esp),%edi - xorl %ecx,%edx - movl 80(%esp),%ebx - xorl %edi,%esi - rorl $5,%edx - andl %ecx,%esi - movl %ecx,(%esp) - xorl %ecx,%edx - addl 12(%esp),%ebx - xorl %esi,%edi - rorl $6,%edx - movl %eax,%ecx - addl %edi,%ebx - rorl $9,%ecx - movl %eax,%esi - movl 20(%esp),%edi - xorl %eax,%ecx - movl %eax,16(%esp) - xorl %edi,%eax - rorl $11,%ecx - andl %eax,%ebp - leal 1925078388(%ebx,%edx,1),%edx - xorl %esi,%ecx - xorl %edi,%ebp - rorl $2,%ecx - addl %edx,%ebp - addl 28(%esp),%edx - addl %ecx,%ebp - movl %edx,%esi - movl (%esp),%ecx - rorl $14,%edx - movl 4(%esp),%edi - xorl %esi,%edx - movl 84(%esp),%ebx - xorl %edi,%ecx - rorl $5,%edx - andl %esi,%ecx - movl %esi,28(%esp) - xorl %esi,%edx - addl 8(%esp),%ebx - xorl %ecx,%edi - rorl $6,%edx - movl %ebp,%esi - addl %edi,%ebx - rorl $9,%esi - movl %ebp,%ecx - movl 16(%esp),%edi - xorl %ebp,%esi - movl %ebp,12(%esp) - xorl %edi,%ebp - rorl $11,%esi - andl %ebp,%eax - leal 2162078206(%ebx,%edx,1),%edx - xorl %ecx,%esi - xorl %edi,%eax - rorl $2,%esi - addl %edx,%eax - addl 24(%esp),%edx - addl %esi,%eax - movl %edx,%ecx - movl 28(%esp),%esi - rorl $14,%edx - movl (%esp),%edi - xorl %ecx,%edx - movl 88(%esp),%ebx - xorl %edi,%esi - rorl $5,%edx - andl %ecx,%esi - movl %ecx,24(%esp) - xorl %ecx,%edx - addl 4(%esp),%ebx - xorl %esi,%edi - rorl $6,%edx - movl %eax,%ecx - addl %edi,%ebx - rorl $9,%ecx - movl %eax,%esi - movl 12(%esp),%edi - xorl %eax,%ecx - movl %eax,8(%esp) - xorl %edi,%eax - rorl $11,%ecx - andl %eax,%ebp - leal 2614888103(%ebx,%edx,1),%edx - xorl %esi,%ecx - xorl %edi,%ebp - rorl $2,%ecx - addl %edx,%ebp - addl 20(%esp),%edx - addl %ecx,%ebp - movl %edx,%esi - movl 24(%esp),%ecx - rorl $14,%edx - movl 28(%esp),%edi - xorl %esi,%edx - movl 92(%esp),%ebx - xorl %edi,%ecx - rorl $5,%edx - andl %esi,%ecx - movl %esi,20(%esp) - xorl %esi,%edx - addl (%esp),%ebx - xorl %ecx,%edi - rorl $6,%edx - movl %ebp,%esi - addl %edi,%ebx - rorl $9,%esi - movl %ebp,%ecx - movl 8(%esp),%edi - xorl %ebp,%esi - movl %ebp,4(%esp) - xorl %edi,%ebp - rorl $11,%esi - andl %ebp,%eax - leal 3248222580(%ebx,%edx,1),%edx - xorl %ecx,%esi - xorl %edi,%eax - movl 36(%esp),%ecx - rorl $2,%esi - addl %edx,%eax - addl 16(%esp),%edx - addl %esi,%eax - movl 88(%esp),%esi - movl %ecx,%ebx - rorl $11,%ecx - movl %esi,%edi - rorl $2,%esi - xorl %ebx,%ecx - shrl $3,%ebx - rorl $7,%ecx - xorl %edi,%esi - xorl %ecx,%ebx - rorl $17,%esi - addl 32(%esp),%ebx - shrl $10,%edi - addl 68(%esp),%ebx - movl %edx,%ecx - xorl %esi,%edi - movl 20(%esp),%esi - rorl $14,%edx - addl %edi,%ebx - movl 24(%esp),%edi - xorl %ecx,%edx - movl %ebx,32(%esp) - xorl %edi,%esi - rorl $5,%edx - andl %ecx,%esi - movl %ecx,16(%esp) - xorl %ecx,%edx - addl 28(%esp),%ebx - xorl %esi,%edi - rorl $6,%edx - movl %eax,%ecx - addl %edi,%ebx - rorl $9,%ecx - movl %eax,%esi - movl 4(%esp),%edi - xorl %eax,%ecx - movl %eax,(%esp) - xorl %edi,%eax - rorl $11,%ecx - andl %eax,%ebp - leal 3835390401(%ebx,%edx,1),%edx - xorl %esi,%ecx - xorl %edi,%ebp - movl 40(%esp),%esi - rorl $2,%ecx - addl %edx,%ebp - addl 12(%esp),%edx - addl %ecx,%ebp - movl 92(%esp),%ecx - movl %esi,%ebx - rorl $11,%esi - movl %ecx,%edi - rorl $2,%ecx - xorl %ebx,%esi - shrl $3,%ebx - rorl $7,%esi - xorl %edi,%ecx - xorl %esi,%ebx - rorl $17,%ecx - addl 36(%esp),%ebx - shrl $10,%edi - addl 72(%esp),%ebx - movl %edx,%esi - xorl %ecx,%edi - movl 16(%esp),%ecx - rorl $14,%edx - addl %edi,%ebx - movl 20(%esp),%edi - xorl %esi,%edx - movl %ebx,36(%esp) - xorl %edi,%ecx - rorl $5,%edx - andl %esi,%ecx - movl %esi,12(%esp) - xorl %esi,%edx - addl 24(%esp),%ebx - xorl %ecx,%edi - rorl $6,%edx - movl %ebp,%esi - addl %edi,%ebx - rorl $9,%esi - movl %ebp,%ecx - movl (%esp),%edi - xorl %ebp,%esi - movl %ebp,28(%esp) - xorl %edi,%ebp - rorl $11,%esi - andl %ebp,%eax - leal 4022224774(%ebx,%edx,1),%edx - xorl %ecx,%esi - xorl %edi,%eax - movl 44(%esp),%ecx - rorl $2,%esi - addl %edx,%eax - addl 8(%esp),%edx - addl %esi,%eax - movl 32(%esp),%esi - movl %ecx,%ebx - rorl $11,%ecx - movl %esi,%edi - rorl $2,%esi - xorl %ebx,%ecx - shrl $3,%ebx - rorl $7,%ecx - xorl %edi,%esi - xorl %ecx,%ebx - rorl $17,%esi - addl 40(%esp),%ebx - shrl $10,%edi - addl 76(%esp),%ebx - movl %edx,%ecx - xorl %esi,%edi - movl 12(%esp),%esi - rorl $14,%edx - addl %edi,%ebx - movl 16(%esp),%edi - xorl %ecx,%edx - movl %ebx,40(%esp) - xorl %edi,%esi - rorl $5,%edx - andl %ecx,%esi - movl %ecx,8(%esp) - xorl %ecx,%edx - addl 20(%esp),%ebx - xorl %esi,%edi - rorl $6,%edx - movl %eax,%ecx - addl %edi,%ebx - rorl $9,%ecx - movl %eax,%esi - movl 28(%esp),%edi - xorl %eax,%ecx - movl %eax,24(%esp) - xorl %edi,%eax - rorl $11,%ecx - andl %eax,%ebp - leal 264347078(%ebx,%edx,1),%edx - xorl %esi,%ecx - xorl %edi,%ebp - movl 48(%esp),%esi - rorl $2,%ecx - addl %edx,%ebp - addl 4(%esp),%edx - addl %ecx,%ebp - movl 36(%esp),%ecx - movl %esi,%ebx - rorl $11,%esi - movl %ecx,%edi - rorl $2,%ecx - xorl %ebx,%esi - shrl $3,%ebx - rorl $7,%esi - xorl %edi,%ecx - xorl %esi,%ebx - rorl $17,%ecx - addl 44(%esp),%ebx - shrl $10,%edi - addl 80(%esp),%ebx - movl %edx,%esi - xorl %ecx,%edi - movl 8(%esp),%ecx - rorl $14,%edx - addl %edi,%ebx - movl 12(%esp),%edi - xorl %esi,%edx - movl %ebx,44(%esp) - xorl %edi,%ecx - rorl $5,%edx - andl %esi,%ecx - movl %esi,4(%esp) - xorl %esi,%edx - addl 16(%esp),%ebx - xorl %ecx,%edi - rorl $6,%edx - movl %ebp,%esi - addl %edi,%ebx - rorl $9,%esi - movl %ebp,%ecx - movl 24(%esp),%edi - xorl %ebp,%esi - movl %ebp,20(%esp) - xorl %edi,%ebp - rorl $11,%esi - andl %ebp,%eax - leal 604807628(%ebx,%edx,1),%edx - xorl %ecx,%esi - xorl %edi,%eax - movl 52(%esp),%ecx - rorl $2,%esi - addl %edx,%eax - addl (%esp),%edx - addl %esi,%eax - movl 40(%esp),%esi - movl %ecx,%ebx - rorl $11,%ecx - movl %esi,%edi - rorl $2,%esi - xorl %ebx,%ecx - shrl $3,%ebx - rorl $7,%ecx - xorl %edi,%esi - xorl %ecx,%ebx - rorl $17,%esi - addl 48(%esp),%ebx - shrl $10,%edi - addl 84(%esp),%ebx - movl %edx,%ecx - xorl %esi,%edi - movl 4(%esp),%esi - rorl $14,%edx - addl %edi,%ebx - movl 8(%esp),%edi - xorl %ecx,%edx - movl %ebx,48(%esp) - xorl %edi,%esi - rorl $5,%edx - andl %ecx,%esi - movl %ecx,(%esp) - xorl %ecx,%edx - addl 12(%esp),%ebx - xorl %esi,%edi - rorl $6,%edx - movl %eax,%ecx - addl %edi,%ebx - rorl $9,%ecx - movl %eax,%esi - movl 20(%esp),%edi - xorl %eax,%ecx - movl %eax,16(%esp) - xorl %edi,%eax - rorl $11,%ecx - andl %eax,%ebp - leal 770255983(%ebx,%edx,1),%edx - xorl %esi,%ecx - xorl %edi,%ebp - movl 56(%esp),%esi - rorl $2,%ecx - addl %edx,%ebp - addl 28(%esp),%edx - addl %ecx,%ebp - movl 44(%esp),%ecx - movl %esi,%ebx - rorl $11,%esi - movl %ecx,%edi - rorl $2,%ecx - xorl %ebx,%esi - shrl $3,%ebx - rorl $7,%esi - xorl %edi,%ecx - xorl %esi,%ebx - rorl $17,%ecx - addl 52(%esp),%ebx - shrl $10,%edi - addl 88(%esp),%ebx - movl %edx,%esi - xorl %ecx,%edi - movl (%esp),%ecx - rorl $14,%edx - addl %edi,%ebx - movl 4(%esp),%edi - xorl %esi,%edx - movl %ebx,52(%esp) - xorl %edi,%ecx - rorl $5,%edx - andl %esi,%ecx - movl %esi,28(%esp) - xorl %esi,%edx - addl 8(%esp),%ebx - xorl %ecx,%edi - rorl $6,%edx - movl %ebp,%esi - addl %edi,%ebx - rorl $9,%esi - movl %ebp,%ecx - movl 16(%esp),%edi - xorl %ebp,%esi - movl %ebp,12(%esp) - xorl %edi,%ebp - rorl $11,%esi - andl %ebp,%eax - leal 1249150122(%ebx,%edx,1),%edx - xorl %ecx,%esi - xorl %edi,%eax - movl 60(%esp),%ecx - rorl $2,%esi - addl %edx,%eax - addl 24(%esp),%edx - addl %esi,%eax - movl 48(%esp),%esi - movl %ecx,%ebx - rorl $11,%ecx - movl %esi,%edi - rorl $2,%esi - xorl %ebx,%ecx - shrl $3,%ebx - rorl $7,%ecx - xorl %edi,%esi - xorl %ecx,%ebx - rorl $17,%esi - addl 56(%esp),%ebx - shrl $10,%edi - addl 92(%esp),%ebx - movl %edx,%ecx - xorl %esi,%edi - movl 28(%esp),%esi - rorl $14,%edx - addl %edi,%ebx - movl (%esp),%edi - xorl %ecx,%edx - movl %ebx,56(%esp) - xorl %edi,%esi - rorl $5,%edx - andl %ecx,%esi - movl %ecx,24(%esp) - xorl %ecx,%edx - addl 4(%esp),%ebx - xorl %esi,%edi - rorl $6,%edx - movl %eax,%ecx - addl %edi,%ebx - rorl $9,%ecx - movl %eax,%esi - movl 12(%esp),%edi - xorl %eax,%ecx - movl %eax,8(%esp) - xorl %edi,%eax - rorl $11,%ecx - andl %eax,%ebp - leal 1555081692(%ebx,%edx,1),%edx - xorl %esi,%ecx - xorl %edi,%ebp - movl 64(%esp),%esi - rorl $2,%ecx - addl %edx,%ebp - addl 20(%esp),%edx - addl %ecx,%ebp - movl 52(%esp),%ecx - movl %esi,%ebx - rorl $11,%esi - movl %ecx,%edi - rorl $2,%ecx - xorl %ebx,%esi - shrl $3,%ebx - rorl $7,%esi - xorl %edi,%ecx - xorl %esi,%ebx - rorl $17,%ecx - addl 60(%esp),%ebx - shrl $10,%edi - addl 32(%esp),%ebx - movl %edx,%esi - xorl %ecx,%edi - movl 24(%esp),%ecx - rorl $14,%edx - addl %edi,%ebx - movl 28(%esp),%edi - xorl %esi,%edx - movl %ebx,60(%esp) - xorl %edi,%ecx - rorl $5,%edx - andl %esi,%ecx - movl %esi,20(%esp) - xorl %esi,%edx - addl (%esp),%ebx - xorl %ecx,%edi - rorl $6,%edx - movl %ebp,%esi - addl %edi,%ebx - rorl $9,%esi - movl %ebp,%ecx - movl 8(%esp),%edi - xorl %ebp,%esi - movl %ebp,4(%esp) - xorl %edi,%ebp - rorl $11,%esi - andl %ebp,%eax - leal 1996064986(%ebx,%edx,1),%edx - xorl %ecx,%esi - xorl %edi,%eax - movl 68(%esp),%ecx - rorl $2,%esi - addl %edx,%eax - addl 16(%esp),%edx - addl %esi,%eax - movl 56(%esp),%esi - movl %ecx,%ebx - rorl $11,%ecx - movl %esi,%edi - rorl $2,%esi - xorl %ebx,%ecx - shrl $3,%ebx - rorl $7,%ecx - xorl %edi,%esi - xorl %ecx,%ebx - rorl $17,%esi - addl 64(%esp),%ebx - shrl $10,%edi - addl 36(%esp),%ebx - movl %edx,%ecx - xorl %esi,%edi - movl 20(%esp),%esi - rorl $14,%edx - addl %edi,%ebx - movl 24(%esp),%edi - xorl %ecx,%edx - movl %ebx,64(%esp) - xorl %edi,%esi - rorl $5,%edx - andl %ecx,%esi - movl %ecx,16(%esp) - xorl %ecx,%edx - addl 28(%esp),%ebx - xorl %esi,%edi - rorl $6,%edx - movl %eax,%ecx - addl %edi,%ebx - rorl $9,%ecx - movl %eax,%esi - movl 4(%esp),%edi - xorl %eax,%ecx - movl %eax,(%esp) - xorl %edi,%eax - rorl $11,%ecx - andl %eax,%ebp - leal 2554220882(%ebx,%edx,1),%edx - xorl %esi,%ecx - xorl %edi,%ebp - movl 72(%esp),%esi - rorl $2,%ecx - addl %edx,%ebp - addl 12(%esp),%edx - addl %ecx,%ebp - movl 60(%esp),%ecx - movl %esi,%ebx - rorl $11,%esi - movl %ecx,%edi - rorl $2,%ecx - xorl %ebx,%esi - shrl $3,%ebx - rorl $7,%esi - xorl %edi,%ecx - xorl %esi,%ebx - rorl $17,%ecx - addl 68(%esp),%ebx - shrl $10,%edi - addl 40(%esp),%ebx - movl %edx,%esi - xorl %ecx,%edi - movl 16(%esp),%ecx - rorl $14,%edx - addl %edi,%ebx - movl 20(%esp),%edi - xorl %esi,%edx - movl %ebx,68(%esp) - xorl %edi,%ecx - rorl $5,%edx - andl %esi,%ecx - movl %esi,12(%esp) - xorl %esi,%edx - addl 24(%esp),%ebx - xorl %ecx,%edi - rorl $6,%edx - movl %ebp,%esi - addl %edi,%ebx - rorl $9,%esi - movl %ebp,%ecx - movl (%esp),%edi - xorl %ebp,%esi - movl %ebp,28(%esp) - xorl %edi,%ebp - rorl $11,%esi - andl %ebp,%eax - leal 2821834349(%ebx,%edx,1),%edx - xorl %ecx,%esi - xorl %edi,%eax - movl 76(%esp),%ecx - rorl $2,%esi - addl %edx,%eax - addl 8(%esp),%edx - addl %esi,%eax - movl 64(%esp),%esi - movl %ecx,%ebx - rorl $11,%ecx - movl %esi,%edi - rorl $2,%esi - xorl %ebx,%ecx - shrl $3,%ebx - rorl $7,%ecx - xorl %edi,%esi - xorl %ecx,%ebx - rorl $17,%esi - addl 72(%esp),%ebx - shrl $10,%edi - addl 44(%esp),%ebx - movl %edx,%ecx - xorl %esi,%edi - movl 12(%esp),%esi - rorl $14,%edx - addl %edi,%ebx - movl 16(%esp),%edi - xorl %ecx,%edx - movl %ebx,72(%esp) - xorl %edi,%esi - rorl $5,%edx - andl %ecx,%esi - movl %ecx,8(%esp) - xorl %ecx,%edx - addl 20(%esp),%ebx - xorl %esi,%edi - rorl $6,%edx - movl %eax,%ecx - addl %edi,%ebx - rorl $9,%ecx - movl %eax,%esi - movl 28(%esp),%edi - xorl %eax,%ecx - movl %eax,24(%esp) - xorl %edi,%eax - rorl $11,%ecx - andl %eax,%ebp - leal 2952996808(%ebx,%edx,1),%edx - xorl %esi,%ecx - xorl %edi,%ebp - movl 80(%esp),%esi - rorl $2,%ecx - addl %edx,%ebp - addl 4(%esp),%edx - addl %ecx,%ebp - movl 68(%esp),%ecx - movl %esi,%ebx - rorl $11,%esi - movl %ecx,%edi - rorl $2,%ecx - xorl %ebx,%esi - shrl $3,%ebx - rorl $7,%esi - xorl %edi,%ecx - xorl %esi,%ebx - rorl $17,%ecx - addl 76(%esp),%ebx - shrl $10,%edi - addl 48(%esp),%ebx - movl %edx,%esi - xorl %ecx,%edi - movl 8(%esp),%ecx - rorl $14,%edx - addl %edi,%ebx - movl 12(%esp),%edi - xorl %esi,%edx - movl %ebx,76(%esp) - xorl %edi,%ecx - rorl $5,%edx - andl %esi,%ecx - movl %esi,4(%esp) - xorl %esi,%edx - addl 16(%esp),%ebx - xorl %ecx,%edi - rorl $6,%edx - movl %ebp,%esi - addl %edi,%ebx - rorl $9,%esi - movl %ebp,%ecx - movl 24(%esp),%edi - xorl %ebp,%esi - movl %ebp,20(%esp) - xorl %edi,%ebp - rorl $11,%esi - andl %ebp,%eax - leal 3210313671(%ebx,%edx,1),%edx - xorl %ecx,%esi - xorl %edi,%eax - movl 84(%esp),%ecx - rorl $2,%esi - addl %edx,%eax - addl (%esp),%edx - addl %esi,%eax - movl 72(%esp),%esi - movl %ecx,%ebx - rorl $11,%ecx - movl %esi,%edi - rorl $2,%esi - xorl %ebx,%ecx - shrl $3,%ebx - rorl $7,%ecx - xorl %edi,%esi - xorl %ecx,%ebx - rorl $17,%esi - addl 80(%esp),%ebx - shrl $10,%edi - addl 52(%esp),%ebx - movl %edx,%ecx - xorl %esi,%edi - movl 4(%esp),%esi - rorl $14,%edx - addl %edi,%ebx - movl 8(%esp),%edi - xorl %ecx,%edx - movl %ebx,80(%esp) - xorl %edi,%esi - rorl $5,%edx - andl %ecx,%esi - movl %ecx,(%esp) - xorl %ecx,%edx - addl 12(%esp),%ebx - xorl %esi,%edi - rorl $6,%edx - movl %eax,%ecx - addl %edi,%ebx - rorl $9,%ecx - movl %eax,%esi - movl 20(%esp),%edi - xorl %eax,%ecx - movl %eax,16(%esp) - xorl %edi,%eax - rorl $11,%ecx - andl %eax,%ebp - leal 3336571891(%ebx,%edx,1),%edx - xorl %esi,%ecx - xorl %edi,%ebp - movl 88(%esp),%esi - rorl $2,%ecx - addl %edx,%ebp - addl 28(%esp),%edx - addl %ecx,%ebp - movl 76(%esp),%ecx - movl %esi,%ebx - rorl $11,%esi - movl %ecx,%edi - rorl $2,%ecx - xorl %ebx,%esi - shrl $3,%ebx - rorl $7,%esi - xorl %edi,%ecx - xorl %esi,%ebx - rorl $17,%ecx - addl 84(%esp),%ebx - shrl $10,%edi - addl 56(%esp),%ebx - movl %edx,%esi - xorl %ecx,%edi - movl (%esp),%ecx - rorl $14,%edx - addl %edi,%ebx - movl 4(%esp),%edi - xorl %esi,%edx - movl %ebx,84(%esp) - xorl %edi,%ecx - rorl $5,%edx - andl %esi,%ecx - movl %esi,28(%esp) - xorl %esi,%edx - addl 8(%esp),%ebx - xorl %ecx,%edi - rorl $6,%edx - movl %ebp,%esi - addl %edi,%ebx - rorl $9,%esi - movl %ebp,%ecx - movl 16(%esp),%edi - xorl %ebp,%esi - movl %ebp,12(%esp) - xorl %edi,%ebp - rorl $11,%esi - andl %ebp,%eax - leal 3584528711(%ebx,%edx,1),%edx - xorl %ecx,%esi - xorl %edi,%eax - movl 92(%esp),%ecx - rorl $2,%esi - addl %edx,%eax - addl 24(%esp),%edx - addl %esi,%eax - movl 80(%esp),%esi - movl %ecx,%ebx - rorl $11,%ecx - movl %esi,%edi - rorl $2,%esi - xorl %ebx,%ecx - shrl $3,%ebx - rorl $7,%ecx - xorl %edi,%esi - xorl %ecx,%ebx - rorl $17,%esi - addl 88(%esp),%ebx - shrl $10,%edi - addl 60(%esp),%ebx - movl %edx,%ecx - xorl %esi,%edi - movl 28(%esp),%esi - rorl $14,%edx - addl %edi,%ebx - movl (%esp),%edi - xorl %ecx,%edx - movl %ebx,88(%esp) - xorl %edi,%esi - rorl $5,%edx - andl %ecx,%esi - movl %ecx,24(%esp) - xorl %ecx,%edx - addl 4(%esp),%ebx - xorl %esi,%edi - rorl $6,%edx - movl %eax,%ecx - addl %edi,%ebx - rorl $9,%ecx - movl %eax,%esi - movl 12(%esp),%edi - xorl %eax,%ecx - movl %eax,8(%esp) - xorl %edi,%eax - rorl $11,%ecx - andl %eax,%ebp - leal 113926993(%ebx,%edx,1),%edx - xorl %esi,%ecx - xorl %edi,%ebp - movl 32(%esp),%esi - rorl $2,%ecx - addl %edx,%ebp - addl 20(%esp),%edx - addl %ecx,%ebp - movl 84(%esp),%ecx - movl %esi,%ebx - rorl $11,%esi - movl %ecx,%edi - rorl $2,%ecx - xorl %ebx,%esi - shrl $3,%ebx - rorl $7,%esi - xorl %edi,%ecx - xorl %esi,%ebx - rorl $17,%ecx - addl 92(%esp),%ebx - shrl $10,%edi - addl 64(%esp),%ebx - movl %edx,%esi - xorl %ecx,%edi - movl 24(%esp),%ecx - rorl $14,%edx - addl %edi,%ebx - movl 28(%esp),%edi - xorl %esi,%edx - movl %ebx,92(%esp) - xorl %edi,%ecx - rorl $5,%edx - andl %esi,%ecx - movl %esi,20(%esp) - xorl %esi,%edx - addl (%esp),%ebx - xorl %ecx,%edi - rorl $6,%edx - movl %ebp,%esi - addl %edi,%ebx - rorl $9,%esi - movl %ebp,%ecx - movl 8(%esp),%edi - xorl %ebp,%esi - movl %ebp,4(%esp) - xorl %edi,%ebp - rorl $11,%esi - andl %ebp,%eax - leal 338241895(%ebx,%edx,1),%edx - xorl %ecx,%esi - xorl %edi,%eax - movl 36(%esp),%ecx - rorl $2,%esi - addl %edx,%eax - addl 16(%esp),%edx - addl %esi,%eax - movl 88(%esp),%esi - movl %ecx,%ebx - rorl $11,%ecx - movl %esi,%edi - rorl $2,%esi - xorl %ebx,%ecx - shrl $3,%ebx - rorl $7,%ecx - xorl %edi,%esi - xorl %ecx,%ebx - rorl $17,%esi - addl 32(%esp),%ebx - shrl $10,%edi - addl 68(%esp),%ebx - movl %edx,%ecx - xorl %esi,%edi - movl 20(%esp),%esi - rorl $14,%edx - addl %edi,%ebx - movl 24(%esp),%edi - xorl %ecx,%edx - movl %ebx,32(%esp) - xorl %edi,%esi - rorl $5,%edx - andl %ecx,%esi - movl %ecx,16(%esp) - xorl %ecx,%edx - addl 28(%esp),%ebx - xorl %esi,%edi - rorl $6,%edx - movl %eax,%ecx - addl %edi,%ebx - rorl $9,%ecx - movl %eax,%esi - movl 4(%esp),%edi - xorl %eax,%ecx - movl %eax,(%esp) - xorl %edi,%eax - rorl $11,%ecx - andl %eax,%ebp - leal 666307205(%ebx,%edx,1),%edx - xorl %esi,%ecx - xorl %edi,%ebp - movl 40(%esp),%esi - rorl $2,%ecx - addl %edx,%ebp - addl 12(%esp),%edx - addl %ecx,%ebp - movl 92(%esp),%ecx - movl %esi,%ebx - rorl $11,%esi - movl %ecx,%edi - rorl $2,%ecx - xorl %ebx,%esi - shrl $3,%ebx - rorl $7,%esi - xorl %edi,%ecx - xorl %esi,%ebx - rorl $17,%ecx - addl 36(%esp),%ebx - shrl $10,%edi - addl 72(%esp),%ebx - movl %edx,%esi - xorl %ecx,%edi - movl 16(%esp),%ecx - rorl $14,%edx - addl %edi,%ebx - movl 20(%esp),%edi - xorl %esi,%edx - movl %ebx,36(%esp) - xorl %edi,%ecx - rorl $5,%edx - andl %esi,%ecx - movl %esi,12(%esp) - xorl %esi,%edx - addl 24(%esp),%ebx - xorl %ecx,%edi - rorl $6,%edx - movl %ebp,%esi - addl %edi,%ebx - rorl $9,%esi - movl %ebp,%ecx - movl (%esp),%edi - xorl %ebp,%esi - movl %ebp,28(%esp) - xorl %edi,%ebp - rorl $11,%esi - andl %ebp,%eax - leal 773529912(%ebx,%edx,1),%edx - xorl %ecx,%esi - xorl %edi,%eax - movl 44(%esp),%ecx - rorl $2,%esi - addl %edx,%eax - addl 8(%esp),%edx - addl %esi,%eax - movl 32(%esp),%esi - movl %ecx,%ebx - rorl $11,%ecx - movl %esi,%edi - rorl $2,%esi - xorl %ebx,%ecx - shrl $3,%ebx - rorl $7,%ecx - xorl %edi,%esi - xorl %ecx,%ebx - rorl $17,%esi - addl 40(%esp),%ebx - shrl $10,%edi - addl 76(%esp),%ebx - movl %edx,%ecx - xorl %esi,%edi - movl 12(%esp),%esi - rorl $14,%edx - addl %edi,%ebx - movl 16(%esp),%edi - xorl %ecx,%edx - movl %ebx,40(%esp) - xorl %edi,%esi - rorl $5,%edx - andl %ecx,%esi - movl %ecx,8(%esp) - xorl %ecx,%edx - addl 20(%esp),%ebx - xorl %esi,%edi - rorl $6,%edx - movl %eax,%ecx - addl %edi,%ebx - rorl $9,%ecx - movl %eax,%esi - movl 28(%esp),%edi - xorl %eax,%ecx - movl %eax,24(%esp) - xorl %edi,%eax - rorl $11,%ecx - andl %eax,%ebp - leal 1294757372(%ebx,%edx,1),%edx - xorl %esi,%ecx - xorl %edi,%ebp - movl 48(%esp),%esi - rorl $2,%ecx - addl %edx,%ebp - addl 4(%esp),%edx - addl %ecx,%ebp - movl 36(%esp),%ecx - movl %esi,%ebx - rorl $11,%esi - movl %ecx,%edi - rorl $2,%ecx - xorl %ebx,%esi - shrl $3,%ebx - rorl $7,%esi - xorl %edi,%ecx - xorl %esi,%ebx - rorl $17,%ecx - addl 44(%esp),%ebx - shrl $10,%edi - addl 80(%esp),%ebx - movl %edx,%esi - xorl %ecx,%edi - movl 8(%esp),%ecx - rorl $14,%edx - addl %edi,%ebx - movl 12(%esp),%edi - xorl %esi,%edx - movl %ebx,44(%esp) - xorl %edi,%ecx - rorl $5,%edx - andl %esi,%ecx - movl %esi,4(%esp) - xorl %esi,%edx - addl 16(%esp),%ebx - xorl %ecx,%edi - rorl $6,%edx - movl %ebp,%esi - addl %edi,%ebx - rorl $9,%esi - movl %ebp,%ecx - movl 24(%esp),%edi - xorl %ebp,%esi - movl %ebp,20(%esp) - xorl %edi,%ebp - rorl $11,%esi - andl %ebp,%eax - leal 1396182291(%ebx,%edx,1),%edx - xorl %ecx,%esi - xorl %edi,%eax - movl 52(%esp),%ecx - rorl $2,%esi - addl %edx,%eax - addl (%esp),%edx - addl %esi,%eax - movl 40(%esp),%esi - movl %ecx,%ebx - rorl $11,%ecx - movl %esi,%edi - rorl $2,%esi - xorl %ebx,%ecx - shrl $3,%ebx - rorl $7,%ecx - xorl %edi,%esi - xorl %ecx,%ebx - rorl $17,%esi - addl 48(%esp),%ebx - shrl $10,%edi - addl 84(%esp),%ebx - movl %edx,%ecx - xorl %esi,%edi - movl 4(%esp),%esi - rorl $14,%edx - addl %edi,%ebx - movl 8(%esp),%edi - xorl %ecx,%edx - movl %ebx,48(%esp) - xorl %edi,%esi - rorl $5,%edx - andl %ecx,%esi - movl %ecx,(%esp) - xorl %ecx,%edx - addl 12(%esp),%ebx - xorl %esi,%edi - rorl $6,%edx - movl %eax,%ecx - addl %edi,%ebx - rorl $9,%ecx - movl %eax,%esi - movl 20(%esp),%edi - xorl %eax,%ecx - movl %eax,16(%esp) - xorl %edi,%eax - rorl $11,%ecx - andl %eax,%ebp - leal 1695183700(%ebx,%edx,1),%edx - xorl %esi,%ecx - xorl %edi,%ebp - movl 56(%esp),%esi - rorl $2,%ecx - addl %edx,%ebp - addl 28(%esp),%edx - addl %ecx,%ebp - movl 44(%esp),%ecx - movl %esi,%ebx - rorl $11,%esi - movl %ecx,%edi - rorl $2,%ecx - xorl %ebx,%esi - shrl $3,%ebx - rorl $7,%esi - xorl %edi,%ecx - xorl %esi,%ebx - rorl $17,%ecx - addl 52(%esp),%ebx - shrl $10,%edi - addl 88(%esp),%ebx - movl %edx,%esi - xorl %ecx,%edi - movl (%esp),%ecx - rorl $14,%edx - addl %edi,%ebx - movl 4(%esp),%edi - xorl %esi,%edx - movl %ebx,52(%esp) - xorl %edi,%ecx - rorl $5,%edx - andl %esi,%ecx - movl %esi,28(%esp) - xorl %esi,%edx - addl 8(%esp),%ebx - xorl %ecx,%edi - rorl $6,%edx - movl %ebp,%esi - addl %edi,%ebx - rorl $9,%esi - movl %ebp,%ecx - movl 16(%esp),%edi - xorl %ebp,%esi - movl %ebp,12(%esp) - xorl %edi,%ebp - rorl $11,%esi - andl %ebp,%eax - leal 1986661051(%ebx,%edx,1),%edx - xorl %ecx,%esi - xorl %edi,%eax - movl 60(%esp),%ecx - rorl $2,%esi - addl %edx,%eax - addl 24(%esp),%edx - addl %esi,%eax - movl 48(%esp),%esi - movl %ecx,%ebx - rorl $11,%ecx - movl %esi,%edi - rorl $2,%esi - xorl %ebx,%ecx - shrl $3,%ebx - rorl $7,%ecx - xorl %edi,%esi - xorl %ecx,%ebx - rorl $17,%esi - addl 56(%esp),%ebx - shrl $10,%edi - addl 92(%esp),%ebx - movl %edx,%ecx - xorl %esi,%edi - movl 28(%esp),%esi - rorl $14,%edx - addl %edi,%ebx - movl (%esp),%edi - xorl %ecx,%edx - movl %ebx,56(%esp) - xorl %edi,%esi - rorl $5,%edx - andl %ecx,%esi - movl %ecx,24(%esp) - xorl %ecx,%edx - addl 4(%esp),%ebx - xorl %esi,%edi - rorl $6,%edx - movl %eax,%ecx - addl %edi,%ebx - rorl $9,%ecx - movl %eax,%esi - movl 12(%esp),%edi - xorl %eax,%ecx - movl %eax,8(%esp) - xorl %edi,%eax - rorl $11,%ecx - andl %eax,%ebp - leal 2177026350(%ebx,%edx,1),%edx - xorl %esi,%ecx - xorl %edi,%ebp - movl 64(%esp),%esi - rorl $2,%ecx - addl %edx,%ebp - addl 20(%esp),%edx - addl %ecx,%ebp - movl 52(%esp),%ecx - movl %esi,%ebx - rorl $11,%esi - movl %ecx,%edi - rorl $2,%ecx - xorl %ebx,%esi - shrl $3,%ebx - rorl $7,%esi - xorl %edi,%ecx - xorl %esi,%ebx - rorl $17,%ecx - addl 60(%esp),%ebx - shrl $10,%edi - addl 32(%esp),%ebx - movl %edx,%esi - xorl %ecx,%edi - movl 24(%esp),%ecx - rorl $14,%edx - addl %edi,%ebx - movl 28(%esp),%edi - xorl %esi,%edx - movl %ebx,60(%esp) - xorl %edi,%ecx - rorl $5,%edx - andl %esi,%ecx - movl %esi,20(%esp) - xorl %esi,%edx - addl (%esp),%ebx - xorl %ecx,%edi - rorl $6,%edx - movl %ebp,%esi - addl %edi,%ebx - rorl $9,%esi - movl %ebp,%ecx - movl 8(%esp),%edi - xorl %ebp,%esi - movl %ebp,4(%esp) - xorl %edi,%ebp - rorl $11,%esi - andl %ebp,%eax - leal 2456956037(%ebx,%edx,1),%edx - xorl %ecx,%esi - xorl %edi,%eax - movl 68(%esp),%ecx - rorl $2,%esi - addl %edx,%eax - addl 16(%esp),%edx - addl %esi,%eax - movl 56(%esp),%esi - movl %ecx,%ebx - rorl $11,%ecx - movl %esi,%edi - rorl $2,%esi - xorl %ebx,%ecx - shrl $3,%ebx - rorl $7,%ecx - xorl %edi,%esi - xorl %ecx,%ebx - rorl $17,%esi - addl 64(%esp),%ebx - shrl $10,%edi - addl 36(%esp),%ebx - movl %edx,%ecx - xorl %esi,%edi - movl 20(%esp),%esi - rorl $14,%edx - addl %edi,%ebx - movl 24(%esp),%edi - xorl %ecx,%edx - movl %ebx,64(%esp) - xorl %edi,%esi - rorl $5,%edx - andl %ecx,%esi - movl %ecx,16(%esp) - xorl %ecx,%edx - addl 28(%esp),%ebx - xorl %esi,%edi - rorl $6,%edx - movl %eax,%ecx - addl %edi,%ebx - rorl $9,%ecx - movl %eax,%esi - movl 4(%esp),%edi - xorl %eax,%ecx - movl %eax,(%esp) - xorl %edi,%eax - rorl $11,%ecx - andl %eax,%ebp - leal 2730485921(%ebx,%edx,1),%edx - xorl %esi,%ecx - xorl %edi,%ebp - movl 72(%esp),%esi - rorl $2,%ecx - addl %edx,%ebp - addl 12(%esp),%edx - addl %ecx,%ebp - movl 60(%esp),%ecx - movl %esi,%ebx - rorl $11,%esi - movl %ecx,%edi - rorl $2,%ecx - xorl %ebx,%esi - shrl $3,%ebx - rorl $7,%esi - xorl %edi,%ecx - xorl %esi,%ebx - rorl $17,%ecx - addl 68(%esp),%ebx - shrl $10,%edi - addl 40(%esp),%ebx - movl %edx,%esi - xorl %ecx,%edi - movl 16(%esp),%ecx - rorl $14,%edx - addl %edi,%ebx - movl 20(%esp),%edi - xorl %esi,%edx - movl %ebx,68(%esp) - xorl %edi,%ecx - rorl $5,%edx - andl %esi,%ecx - movl %esi,12(%esp) - xorl %esi,%edx - addl 24(%esp),%ebx - xorl %ecx,%edi - rorl $6,%edx - movl %ebp,%esi - addl %edi,%ebx - rorl $9,%esi - movl %ebp,%ecx - movl (%esp),%edi - xorl %ebp,%esi - movl %ebp,28(%esp) - xorl %edi,%ebp - rorl $11,%esi - andl %ebp,%eax - leal 2820302411(%ebx,%edx,1),%edx - xorl %ecx,%esi - xorl %edi,%eax - movl 76(%esp),%ecx - rorl $2,%esi - addl %edx,%eax - addl 8(%esp),%edx - addl %esi,%eax - movl 64(%esp),%esi - movl %ecx,%ebx - rorl $11,%ecx - movl %esi,%edi - rorl $2,%esi - xorl %ebx,%ecx - shrl $3,%ebx - rorl $7,%ecx - xorl %edi,%esi - xorl %ecx,%ebx - rorl $17,%esi - addl 72(%esp),%ebx - shrl $10,%edi - addl 44(%esp),%ebx - movl %edx,%ecx - xorl %esi,%edi - movl 12(%esp),%esi - rorl $14,%edx - addl %edi,%ebx - movl 16(%esp),%edi - xorl %ecx,%edx - movl %ebx,72(%esp) - xorl %edi,%esi - rorl $5,%edx - andl %ecx,%esi - movl %ecx,8(%esp) - xorl %ecx,%edx - addl 20(%esp),%ebx - xorl %esi,%edi - rorl $6,%edx - movl %eax,%ecx - addl %edi,%ebx - rorl $9,%ecx - movl %eax,%esi - movl 28(%esp),%edi - xorl %eax,%ecx - movl %eax,24(%esp) - xorl %edi,%eax - rorl $11,%ecx - andl %eax,%ebp - leal 3259730800(%ebx,%edx,1),%edx - xorl %esi,%ecx - xorl %edi,%ebp - movl 80(%esp),%esi - rorl $2,%ecx - addl %edx,%ebp - addl 4(%esp),%edx - addl %ecx,%ebp - movl 68(%esp),%ecx - movl %esi,%ebx - rorl $11,%esi - movl %ecx,%edi - rorl $2,%ecx - xorl %ebx,%esi - shrl $3,%ebx - rorl $7,%esi - xorl %edi,%ecx - xorl %esi,%ebx - rorl $17,%ecx - addl 76(%esp),%ebx - shrl $10,%edi - addl 48(%esp),%ebx - movl %edx,%esi - xorl %ecx,%edi - movl 8(%esp),%ecx - rorl $14,%edx - addl %edi,%ebx - movl 12(%esp),%edi - xorl %esi,%edx - movl %ebx,76(%esp) - xorl %edi,%ecx - rorl $5,%edx - andl %esi,%ecx - movl %esi,4(%esp) - xorl %esi,%edx - addl 16(%esp),%ebx - xorl %ecx,%edi - rorl $6,%edx - movl %ebp,%esi - addl %edi,%ebx - rorl $9,%esi - movl %ebp,%ecx - movl 24(%esp),%edi - xorl %ebp,%esi - movl %ebp,20(%esp) - xorl %edi,%ebp - rorl $11,%esi - andl %ebp,%eax - leal 3345764771(%ebx,%edx,1),%edx - xorl %ecx,%esi - xorl %edi,%eax - movl 84(%esp),%ecx - rorl $2,%esi - addl %edx,%eax - addl (%esp),%edx - addl %esi,%eax - movl 72(%esp),%esi - movl %ecx,%ebx - rorl $11,%ecx - movl %esi,%edi - rorl $2,%esi - xorl %ebx,%ecx - shrl $3,%ebx - rorl $7,%ecx - xorl %edi,%esi - xorl %ecx,%ebx - rorl $17,%esi - addl 80(%esp),%ebx - shrl $10,%edi - addl 52(%esp),%ebx - movl %edx,%ecx - xorl %esi,%edi - movl 4(%esp),%esi - rorl $14,%edx - addl %edi,%ebx - movl 8(%esp),%edi - xorl %ecx,%edx - movl %ebx,80(%esp) - xorl %edi,%esi - rorl $5,%edx - andl %ecx,%esi - movl %ecx,(%esp) - xorl %ecx,%edx - addl 12(%esp),%ebx - xorl %esi,%edi - rorl $6,%edx - movl %eax,%ecx - addl %edi,%ebx - rorl $9,%ecx - movl %eax,%esi - movl 20(%esp),%edi - xorl %eax,%ecx - movl %eax,16(%esp) - xorl %edi,%eax - rorl $11,%ecx - andl %eax,%ebp - leal 3516065817(%ebx,%edx,1),%edx - xorl %esi,%ecx - xorl %edi,%ebp - movl 88(%esp),%esi - rorl $2,%ecx - addl %edx,%ebp - addl 28(%esp),%edx - addl %ecx,%ebp - movl 76(%esp),%ecx - movl %esi,%ebx - rorl $11,%esi - movl %ecx,%edi - rorl $2,%ecx - xorl %ebx,%esi - shrl $3,%ebx - rorl $7,%esi - xorl %edi,%ecx - xorl %esi,%ebx - rorl $17,%ecx - addl 84(%esp),%ebx - shrl $10,%edi - addl 56(%esp),%ebx - movl %edx,%esi - xorl %ecx,%edi - movl (%esp),%ecx - rorl $14,%edx - addl %edi,%ebx - movl 4(%esp),%edi - xorl %esi,%edx - movl %ebx,84(%esp) - xorl %edi,%ecx - rorl $5,%edx - andl %esi,%ecx - movl %esi,28(%esp) - xorl %esi,%edx - addl 8(%esp),%ebx - xorl %ecx,%edi - rorl $6,%edx - movl %ebp,%esi - addl %edi,%ebx - rorl $9,%esi - movl %ebp,%ecx - movl 16(%esp),%edi - xorl %ebp,%esi - movl %ebp,12(%esp) - xorl %edi,%ebp - rorl $11,%esi - andl %ebp,%eax - leal 3600352804(%ebx,%edx,1),%edx - xorl %ecx,%esi - xorl %edi,%eax - movl 92(%esp),%ecx - rorl $2,%esi - addl %edx,%eax - addl 24(%esp),%edx - addl %esi,%eax - movl 80(%esp),%esi - movl %ecx,%ebx - rorl $11,%ecx - movl %esi,%edi - rorl $2,%esi - xorl %ebx,%ecx - shrl $3,%ebx - rorl $7,%ecx - xorl %edi,%esi - xorl %ecx,%ebx - rorl $17,%esi - addl 88(%esp),%ebx - shrl $10,%edi - addl 60(%esp),%ebx - movl %edx,%ecx - xorl %esi,%edi - movl 28(%esp),%esi - rorl $14,%edx - addl %edi,%ebx - movl (%esp),%edi - xorl %ecx,%edx - movl %ebx,88(%esp) - xorl %edi,%esi - rorl $5,%edx - andl %ecx,%esi - movl %ecx,24(%esp) - xorl %ecx,%edx - addl 4(%esp),%ebx - xorl %esi,%edi - rorl $6,%edx - movl %eax,%ecx - addl %edi,%ebx - rorl $9,%ecx - movl %eax,%esi - movl 12(%esp),%edi - xorl %eax,%ecx - movl %eax,8(%esp) - xorl %edi,%eax - rorl $11,%ecx - andl %eax,%ebp - leal 4094571909(%ebx,%edx,1),%edx - xorl %esi,%ecx - xorl %edi,%ebp - movl 32(%esp),%esi - rorl $2,%ecx - addl %edx,%ebp - addl 20(%esp),%edx - addl %ecx,%ebp - movl 84(%esp),%ecx - movl %esi,%ebx - rorl $11,%esi - movl %ecx,%edi - rorl $2,%ecx - xorl %ebx,%esi - shrl $3,%ebx - rorl $7,%esi - xorl %edi,%ecx - xorl %esi,%ebx - rorl $17,%ecx - addl 92(%esp),%ebx - shrl $10,%edi - addl 64(%esp),%ebx - movl %edx,%esi - xorl %ecx,%edi - movl 24(%esp),%ecx - rorl $14,%edx - addl %edi,%ebx - movl 28(%esp),%edi - xorl %esi,%edx - movl %ebx,92(%esp) - xorl %edi,%ecx - rorl $5,%edx - andl %esi,%ecx - movl %esi,20(%esp) - xorl %esi,%edx - addl (%esp),%ebx - xorl %ecx,%edi - rorl $6,%edx - movl %ebp,%esi - addl %edi,%ebx - rorl $9,%esi - movl %ebp,%ecx - movl 8(%esp),%edi - xorl %ebp,%esi - movl %ebp,4(%esp) - xorl %edi,%ebp - rorl $11,%esi - andl %ebp,%eax - leal 275423344(%ebx,%edx,1),%edx - xorl %ecx,%esi - xorl %edi,%eax - movl 36(%esp),%ecx - rorl $2,%esi - addl %edx,%eax - addl 16(%esp),%edx - addl %esi,%eax - movl 88(%esp),%esi - movl %ecx,%ebx - rorl $11,%ecx - movl %esi,%edi - rorl $2,%esi - xorl %ebx,%ecx - shrl $3,%ebx - rorl $7,%ecx - xorl %edi,%esi - xorl %ecx,%ebx - rorl $17,%esi - addl 32(%esp),%ebx - shrl $10,%edi - addl 68(%esp),%ebx - movl %edx,%ecx - xorl %esi,%edi - movl 20(%esp),%esi - rorl $14,%edx - addl %edi,%ebx - movl 24(%esp),%edi - xorl %ecx,%edx - movl %ebx,32(%esp) - xorl %edi,%esi - rorl $5,%edx - andl %ecx,%esi - movl %ecx,16(%esp) - xorl %ecx,%edx - addl 28(%esp),%ebx - xorl %esi,%edi - rorl $6,%edx - movl %eax,%ecx - addl %edi,%ebx - rorl $9,%ecx - movl %eax,%esi - movl 4(%esp),%edi - xorl %eax,%ecx - movl %eax,(%esp) - xorl %edi,%eax - rorl $11,%ecx - andl %eax,%ebp - leal 430227734(%ebx,%edx,1),%edx - xorl %esi,%ecx - xorl %edi,%ebp - movl 40(%esp),%esi - rorl $2,%ecx - addl %edx,%ebp - addl 12(%esp),%edx - addl %ecx,%ebp - movl 92(%esp),%ecx - movl %esi,%ebx - rorl $11,%esi - movl %ecx,%edi - rorl $2,%ecx - xorl %ebx,%esi - shrl $3,%ebx - rorl $7,%esi - xorl %edi,%ecx - xorl %esi,%ebx - rorl $17,%ecx - addl 36(%esp),%ebx - shrl $10,%edi - addl 72(%esp),%ebx - movl %edx,%esi - xorl %ecx,%edi - movl 16(%esp),%ecx - rorl $14,%edx - addl %edi,%ebx - movl 20(%esp),%edi - xorl %esi,%edx - movl %ebx,36(%esp) - xorl %edi,%ecx - rorl $5,%edx - andl %esi,%ecx - movl %esi,12(%esp) - xorl %esi,%edx - addl 24(%esp),%ebx - xorl %ecx,%edi - rorl $6,%edx - movl %ebp,%esi - addl %edi,%ebx - rorl $9,%esi - movl %ebp,%ecx - movl (%esp),%edi - xorl %ebp,%esi - movl %ebp,28(%esp) - xorl %edi,%ebp - rorl $11,%esi - andl %ebp,%eax - leal 506948616(%ebx,%edx,1),%edx - xorl %ecx,%esi - xorl %edi,%eax - movl 44(%esp),%ecx - rorl $2,%esi - addl %edx,%eax - addl 8(%esp),%edx - addl %esi,%eax - movl 32(%esp),%esi - movl %ecx,%ebx - rorl $11,%ecx - movl %esi,%edi - rorl $2,%esi - xorl %ebx,%ecx - shrl $3,%ebx - rorl $7,%ecx - xorl %edi,%esi - xorl %ecx,%ebx - rorl $17,%esi - addl 40(%esp),%ebx - shrl $10,%edi - addl 76(%esp),%ebx - movl %edx,%ecx - xorl %esi,%edi - movl 12(%esp),%esi - rorl $14,%edx - addl %edi,%ebx - movl 16(%esp),%edi - xorl %ecx,%edx - movl %ebx,40(%esp) - xorl %edi,%esi - rorl $5,%edx - andl %ecx,%esi - movl %ecx,8(%esp) - xorl %ecx,%edx - addl 20(%esp),%ebx - xorl %esi,%edi - rorl $6,%edx - movl %eax,%ecx - addl %edi,%ebx - rorl $9,%ecx - movl %eax,%esi - movl 28(%esp),%edi - xorl %eax,%ecx - movl %eax,24(%esp) - xorl %edi,%eax - rorl $11,%ecx - andl %eax,%ebp - leal 659060556(%ebx,%edx,1),%edx - xorl %esi,%ecx - xorl %edi,%ebp - movl 48(%esp),%esi - rorl $2,%ecx - addl %edx,%ebp - addl 4(%esp),%edx - addl %ecx,%ebp - movl 36(%esp),%ecx - movl %esi,%ebx - rorl $11,%esi - movl %ecx,%edi - rorl $2,%ecx - xorl %ebx,%esi - shrl $3,%ebx - rorl $7,%esi - xorl %edi,%ecx - xorl %esi,%ebx - rorl $17,%ecx - addl 44(%esp),%ebx - shrl $10,%edi - addl 80(%esp),%ebx - movl %edx,%esi - xorl %ecx,%edi - movl 8(%esp),%ecx - rorl $14,%edx - addl %edi,%ebx - movl 12(%esp),%edi - xorl %esi,%edx - movl %ebx,44(%esp) - xorl %edi,%ecx - rorl $5,%edx - andl %esi,%ecx - movl %esi,4(%esp) - xorl %esi,%edx - addl 16(%esp),%ebx - xorl %ecx,%edi - rorl $6,%edx - movl %ebp,%esi - addl %edi,%ebx - rorl $9,%esi - movl %ebp,%ecx - movl 24(%esp),%edi - xorl %ebp,%esi - movl %ebp,20(%esp) - xorl %edi,%ebp - rorl $11,%esi - andl %ebp,%eax - leal 883997877(%ebx,%edx,1),%edx - xorl %ecx,%esi - xorl %edi,%eax - movl 52(%esp),%ecx - rorl $2,%esi - addl %edx,%eax - addl (%esp),%edx - addl %esi,%eax - movl 40(%esp),%esi - movl %ecx,%ebx - rorl $11,%ecx - movl %esi,%edi - rorl $2,%esi - xorl %ebx,%ecx - shrl $3,%ebx - rorl $7,%ecx - xorl %edi,%esi - xorl %ecx,%ebx - rorl $17,%esi - addl 48(%esp),%ebx - shrl $10,%edi - addl 84(%esp),%ebx - movl %edx,%ecx - xorl %esi,%edi - movl 4(%esp),%esi - rorl $14,%edx - addl %edi,%ebx - movl 8(%esp),%edi - xorl %ecx,%edx - movl %ebx,48(%esp) - xorl %edi,%esi - rorl $5,%edx - andl %ecx,%esi - movl %ecx,(%esp) - xorl %ecx,%edx - addl 12(%esp),%ebx - xorl %esi,%edi - rorl $6,%edx - movl %eax,%ecx - addl %edi,%ebx - rorl $9,%ecx - movl %eax,%esi - movl 20(%esp),%edi - xorl %eax,%ecx - movl %eax,16(%esp) - xorl %edi,%eax - rorl $11,%ecx - andl %eax,%ebp - leal 958139571(%ebx,%edx,1),%edx - xorl %esi,%ecx - xorl %edi,%ebp - movl 56(%esp),%esi - rorl $2,%ecx - addl %edx,%ebp - addl 28(%esp),%edx - addl %ecx,%ebp - movl 44(%esp),%ecx - movl %esi,%ebx - rorl $11,%esi - movl %ecx,%edi - rorl $2,%ecx - xorl %ebx,%esi - shrl $3,%ebx - rorl $7,%esi - xorl %edi,%ecx - xorl %esi,%ebx - rorl $17,%ecx - addl 52(%esp),%ebx - shrl $10,%edi - addl 88(%esp),%ebx - movl %edx,%esi - xorl %ecx,%edi - movl (%esp),%ecx - rorl $14,%edx - addl %edi,%ebx - movl 4(%esp),%edi - xorl %esi,%edx - movl %ebx,52(%esp) - xorl %edi,%ecx - rorl $5,%edx - andl %esi,%ecx - movl %esi,28(%esp) - xorl %esi,%edx - addl 8(%esp),%ebx - xorl %ecx,%edi - rorl $6,%edx - movl %ebp,%esi - addl %edi,%ebx - rorl $9,%esi - movl %ebp,%ecx - movl 16(%esp),%edi - xorl %ebp,%esi - movl %ebp,12(%esp) - xorl %edi,%ebp - rorl $11,%esi - andl %ebp,%eax - leal 1322822218(%ebx,%edx,1),%edx - xorl %ecx,%esi - xorl %edi,%eax - movl 60(%esp),%ecx - rorl $2,%esi - addl %edx,%eax - addl 24(%esp),%edx - addl %esi,%eax - movl 48(%esp),%esi - movl %ecx,%ebx - rorl $11,%ecx - movl %esi,%edi - rorl $2,%esi - xorl %ebx,%ecx - shrl $3,%ebx - rorl $7,%ecx - xorl %edi,%esi - xorl %ecx,%ebx - rorl $17,%esi - addl 56(%esp),%ebx - shrl $10,%edi - addl 92(%esp),%ebx - movl %edx,%ecx - xorl %esi,%edi - movl 28(%esp),%esi - rorl $14,%edx - addl %edi,%ebx - movl (%esp),%edi - xorl %ecx,%edx - movl %ebx,56(%esp) - xorl %edi,%esi - rorl $5,%edx - andl %ecx,%esi - movl %ecx,24(%esp) - xorl %ecx,%edx - addl 4(%esp),%ebx - xorl %esi,%edi - rorl $6,%edx - movl %eax,%ecx - addl %edi,%ebx - rorl $9,%ecx - movl %eax,%esi - movl 12(%esp),%edi - xorl %eax,%ecx - movl %eax,8(%esp) - xorl %edi,%eax - rorl $11,%ecx - andl %eax,%ebp - leal 1537002063(%ebx,%edx,1),%edx - xorl %esi,%ecx - xorl %edi,%ebp - movl 64(%esp),%esi - rorl $2,%ecx - addl %edx,%ebp - addl 20(%esp),%edx - addl %ecx,%ebp - movl 52(%esp),%ecx - movl %esi,%ebx - rorl $11,%esi - movl %ecx,%edi - rorl $2,%ecx - xorl %ebx,%esi - shrl $3,%ebx - rorl $7,%esi - xorl %edi,%ecx - xorl %esi,%ebx - rorl $17,%ecx - addl 60(%esp),%ebx - shrl $10,%edi - addl 32(%esp),%ebx - movl %edx,%esi - xorl %ecx,%edi - movl 24(%esp),%ecx - rorl $14,%edx - addl %edi,%ebx - movl 28(%esp),%edi - xorl %esi,%edx - movl %ebx,60(%esp) - xorl %edi,%ecx - rorl $5,%edx - andl %esi,%ecx - movl %esi,20(%esp) - xorl %esi,%edx - addl (%esp),%ebx - xorl %ecx,%edi - rorl $6,%edx - movl %ebp,%esi - addl %edi,%ebx - rorl $9,%esi - movl %ebp,%ecx - movl 8(%esp),%edi - xorl %ebp,%esi - movl %ebp,4(%esp) - xorl %edi,%ebp - rorl $11,%esi - andl %ebp,%eax - leal 1747873779(%ebx,%edx,1),%edx - xorl %ecx,%esi - xorl %edi,%eax - movl 68(%esp),%ecx - rorl $2,%esi - addl %edx,%eax - addl 16(%esp),%edx - addl %esi,%eax - movl 56(%esp),%esi - movl %ecx,%ebx - rorl $11,%ecx - movl %esi,%edi - rorl $2,%esi - xorl %ebx,%ecx - shrl $3,%ebx - rorl $7,%ecx - xorl %edi,%esi - xorl %ecx,%ebx - rorl $17,%esi - addl 64(%esp),%ebx - shrl $10,%edi - addl 36(%esp),%ebx - movl %edx,%ecx - xorl %esi,%edi - movl 20(%esp),%esi - rorl $14,%edx - addl %edi,%ebx - movl 24(%esp),%edi - xorl %ecx,%edx - movl %ebx,64(%esp) - xorl %edi,%esi - rorl $5,%edx - andl %ecx,%esi - movl %ecx,16(%esp) - xorl %ecx,%edx - addl 28(%esp),%ebx - xorl %esi,%edi - rorl $6,%edx - movl %eax,%ecx - addl %edi,%ebx - rorl $9,%ecx - movl %eax,%esi - movl 4(%esp),%edi - xorl %eax,%ecx - movl %eax,(%esp) - xorl %edi,%eax - rorl $11,%ecx - andl %eax,%ebp - leal 1955562222(%ebx,%edx,1),%edx - xorl %esi,%ecx - xorl %edi,%ebp - movl 72(%esp),%esi - rorl $2,%ecx - addl %edx,%ebp - addl 12(%esp),%edx - addl %ecx,%ebp - movl 60(%esp),%ecx - movl %esi,%ebx - rorl $11,%esi - movl %ecx,%edi - rorl $2,%ecx - xorl %ebx,%esi - shrl $3,%ebx - rorl $7,%esi - xorl %edi,%ecx - xorl %esi,%ebx - rorl $17,%ecx - addl 68(%esp),%ebx - shrl $10,%edi - addl 40(%esp),%ebx - movl %edx,%esi - xorl %ecx,%edi - movl 16(%esp),%ecx - rorl $14,%edx - addl %edi,%ebx - movl 20(%esp),%edi - xorl %esi,%edx - movl %ebx,68(%esp) - xorl %edi,%ecx - rorl $5,%edx - andl %esi,%ecx - movl %esi,12(%esp) - xorl %esi,%edx - addl 24(%esp),%ebx - xorl %ecx,%edi - rorl $6,%edx - movl %ebp,%esi - addl %edi,%ebx - rorl $9,%esi - movl %ebp,%ecx - movl (%esp),%edi - xorl %ebp,%esi - movl %ebp,28(%esp) - xorl %edi,%ebp - rorl $11,%esi - andl %ebp,%eax - leal 2024104815(%ebx,%edx,1),%edx - xorl %ecx,%esi - xorl %edi,%eax - movl 76(%esp),%ecx - rorl $2,%esi - addl %edx,%eax - addl 8(%esp),%edx - addl %esi,%eax - movl 64(%esp),%esi - movl %ecx,%ebx - rorl $11,%ecx - movl %esi,%edi - rorl $2,%esi - xorl %ebx,%ecx - shrl $3,%ebx - rorl $7,%ecx - xorl %edi,%esi - xorl %ecx,%ebx - rorl $17,%esi - addl 72(%esp),%ebx - shrl $10,%edi - addl 44(%esp),%ebx - movl %edx,%ecx - xorl %esi,%edi - movl 12(%esp),%esi - rorl $14,%edx - addl %edi,%ebx - movl 16(%esp),%edi - xorl %ecx,%edx - movl %ebx,72(%esp) - xorl %edi,%esi - rorl $5,%edx - andl %ecx,%esi - movl %ecx,8(%esp) - xorl %ecx,%edx - addl 20(%esp),%ebx - xorl %esi,%edi - rorl $6,%edx - movl %eax,%ecx - addl %edi,%ebx - rorl $9,%ecx - movl %eax,%esi - movl 28(%esp),%edi - xorl %eax,%ecx - movl %eax,24(%esp) - xorl %edi,%eax - rorl $11,%ecx - andl %eax,%ebp - leal 2227730452(%ebx,%edx,1),%edx - xorl %esi,%ecx - xorl %edi,%ebp - movl 80(%esp),%esi - rorl $2,%ecx - addl %edx,%ebp - addl 4(%esp),%edx - addl %ecx,%ebp - movl 68(%esp),%ecx - movl %esi,%ebx - rorl $11,%esi - movl %ecx,%edi - rorl $2,%ecx - xorl %ebx,%esi - shrl $3,%ebx - rorl $7,%esi - xorl %edi,%ecx - xorl %esi,%ebx - rorl $17,%ecx - addl 76(%esp),%ebx - shrl $10,%edi - addl 48(%esp),%ebx - movl %edx,%esi - xorl %ecx,%edi - movl 8(%esp),%ecx - rorl $14,%edx - addl %edi,%ebx - movl 12(%esp),%edi - xorl %esi,%edx - movl %ebx,76(%esp) - xorl %edi,%ecx - rorl $5,%edx - andl %esi,%ecx - movl %esi,4(%esp) - xorl %esi,%edx - addl 16(%esp),%ebx - xorl %ecx,%edi - rorl $6,%edx - movl %ebp,%esi - addl %edi,%ebx - rorl $9,%esi - movl %ebp,%ecx - movl 24(%esp),%edi - xorl %ebp,%esi - movl %ebp,20(%esp) - xorl %edi,%ebp - rorl $11,%esi - andl %ebp,%eax - leal 2361852424(%ebx,%edx,1),%edx - xorl %ecx,%esi - xorl %edi,%eax - movl 84(%esp),%ecx - rorl $2,%esi - addl %edx,%eax - addl (%esp),%edx - addl %esi,%eax - movl 72(%esp),%esi - movl %ecx,%ebx - rorl $11,%ecx - movl %esi,%edi - rorl $2,%esi - xorl %ebx,%ecx - shrl $3,%ebx - rorl $7,%ecx - xorl %edi,%esi - xorl %ecx,%ebx - rorl $17,%esi - addl 80(%esp),%ebx - shrl $10,%edi - addl 52(%esp),%ebx - movl %edx,%ecx - xorl %esi,%edi - movl 4(%esp),%esi - rorl $14,%edx - addl %edi,%ebx - movl 8(%esp),%edi - xorl %ecx,%edx - movl %ebx,80(%esp) - xorl %edi,%esi - rorl $5,%edx - andl %ecx,%esi - movl %ecx,(%esp) - xorl %ecx,%edx - addl 12(%esp),%ebx - xorl %esi,%edi - rorl $6,%edx - movl %eax,%ecx - addl %edi,%ebx - rorl $9,%ecx - movl %eax,%esi - movl 20(%esp),%edi - xorl %eax,%ecx - movl %eax,16(%esp) - xorl %edi,%eax - rorl $11,%ecx - andl %eax,%ebp - leal 2428436474(%ebx,%edx,1),%edx - xorl %esi,%ecx - xorl %edi,%ebp - movl 88(%esp),%esi - rorl $2,%ecx - addl %edx,%ebp - addl 28(%esp),%edx - addl %ecx,%ebp - movl 76(%esp),%ecx - movl %esi,%ebx - rorl $11,%esi - movl %ecx,%edi - rorl $2,%ecx - xorl %ebx,%esi - shrl $3,%ebx - rorl $7,%esi - xorl %edi,%ecx - xorl %esi,%ebx - rorl $17,%ecx - addl 84(%esp),%ebx - shrl $10,%edi - addl 56(%esp),%ebx - movl %edx,%esi - xorl %ecx,%edi - movl (%esp),%ecx - rorl $14,%edx - addl %edi,%ebx - movl 4(%esp),%edi - xorl %esi,%edx - movl %ebx,84(%esp) - xorl %edi,%ecx - rorl $5,%edx - andl %esi,%ecx - movl %esi,28(%esp) - xorl %esi,%edx - addl 8(%esp),%ebx - xorl %ecx,%edi - rorl $6,%edx - movl %ebp,%esi - addl %edi,%ebx - rorl $9,%esi - movl %ebp,%ecx - movl 16(%esp),%edi - xorl %ebp,%esi - movl %ebp,12(%esp) - xorl %edi,%ebp - rorl $11,%esi - andl %ebp,%eax - leal 2756734187(%ebx,%edx,1),%edx - xorl %ecx,%esi - xorl %edi,%eax - movl 92(%esp),%ecx - rorl $2,%esi - addl %edx,%eax - addl 24(%esp),%edx - addl %esi,%eax - movl 80(%esp),%esi - movl %ecx,%ebx - rorl $11,%ecx - movl %esi,%edi - rorl $2,%esi - xorl %ebx,%ecx - shrl $3,%ebx - rorl $7,%ecx - xorl %edi,%esi - xorl %ecx,%ebx - rorl $17,%esi - addl 88(%esp),%ebx - shrl $10,%edi - addl 60(%esp),%ebx - movl %edx,%ecx - xorl %esi,%edi - movl 28(%esp),%esi - rorl $14,%edx - addl %edi,%ebx - movl (%esp),%edi - xorl %ecx,%edx - xorl %edi,%esi - rorl $5,%edx - andl %ecx,%esi - movl %ecx,24(%esp) - xorl %ecx,%edx - addl 4(%esp),%ebx - xorl %esi,%edi - rorl $6,%edx - movl %eax,%ecx - addl %edi,%ebx - rorl $9,%ecx - movl %eax,%esi - movl 12(%esp),%edi - xorl %eax,%ecx - movl %eax,8(%esp) - xorl %edi,%eax - rorl $11,%ecx - andl %eax,%ebp - leal 3204031479(%ebx,%edx,1),%edx - xorl %esi,%ecx - xorl %edi,%ebp - movl 32(%esp),%esi - rorl $2,%ecx - addl %edx,%ebp - addl 20(%esp),%edx - addl %ecx,%ebp - movl 84(%esp),%ecx - movl %esi,%ebx - rorl $11,%esi - movl %ecx,%edi - rorl $2,%ecx - xorl %ebx,%esi - shrl $3,%ebx - rorl $7,%esi - xorl %edi,%ecx - xorl %esi,%ebx - rorl $17,%ecx - addl 92(%esp),%ebx - shrl $10,%edi - addl 64(%esp),%ebx - movl %edx,%esi - xorl %ecx,%edi - movl 24(%esp),%ecx - rorl $14,%edx - addl %edi,%ebx - movl 28(%esp),%edi - xorl %esi,%edx - xorl %edi,%ecx - rorl $5,%edx - andl %esi,%ecx - movl %esi,20(%esp) - xorl %esi,%edx - addl (%esp),%ebx - xorl %ecx,%edi - rorl $6,%edx - movl %ebp,%esi - addl %edi,%ebx - rorl $9,%esi - movl %ebp,%ecx - movl 8(%esp),%edi - xorl %ebp,%esi - movl %ebp,4(%esp) - xorl %edi,%ebp - rorl $11,%esi - andl %ebp,%eax - leal 3329325298(%ebx,%edx,1),%edx - xorl %ecx,%esi - xorl %edi,%eax - rorl $2,%esi - addl %edx,%eax - addl 16(%esp),%edx - addl %esi,%eax - movl 96(%esp),%esi - xorl %edi,%ebp - movl 12(%esp),%ecx - addl (%esi),%eax - addl 4(%esi),%ebp - addl 8(%esi),%edi - addl 12(%esi),%ecx - movl %eax,(%esi) - movl %ebp,4(%esi) - movl %edi,8(%esi) - movl %ecx,12(%esi) - movl %ebp,4(%esp) - xorl %edi,%ebp - movl %edi,8(%esp) - movl %ecx,12(%esp) - movl 20(%esp),%edi - movl 24(%esp),%ebx - movl 28(%esp),%ecx - addl 16(%esi),%edx - addl 20(%esi),%edi - addl 24(%esi),%ebx - addl 28(%esi),%ecx - movl %edx,16(%esi) - movl %edi,20(%esi) - movl %ebx,24(%esi) - movl %ecx,28(%esi) - movl %edi,20(%esp) - movl 100(%esp),%edi - movl %ebx,24(%esp) - movl %ecx,28(%esp) - cmpl 104(%esp),%edi - jb .L009grand_loop - movl 108(%esp),%esp - popl %edi - popl %esi - popl %ebx - popl %ebp - ret -.comm __gnutls_x86_cpuid_s,16 -.section .note.GNU-stack,"",%progbits diff --git a/lib/accelerated/x86/coff/sha512-ssse3-x86.s b/lib/accelerated/x86/coff/sha512-ssse3-x86.s index d68eeffb4a..acad0ec1e7 100644 --- a/lib/accelerated/x86/coff/sha512-ssse3-x86.s +++ b/lib/accelerated/x86/coff/sha512-ssse3-x86.s @@ -594,12 +594,9 @@ _sha512_block_data_order: .long 4234509866,1501505948 .long 987167468,1607167915 .long 1246189591,1816402316 -.long 67438087,66051 -.long 202182159,134810123 .byte 83,72,65,53,49,50,32,98,108,111,99,107,32,116,114,97 .byte 110,115,102,111,114,109,32,102,111,114,32,120,56,54,44,32 .byte 67,82,89,80,84,79,71,65,77,83,32,98,121,32,60,97 .byte 112,112,114,111,64,111,112,101,110,115,115,108,46,111,114,103 .byte 62,0 -.section .note.GNU-stack,"",%progbits diff --git a/lib/accelerated/x86/coff/sha512-ssse3-x86_64.s b/lib/accelerated/x86/coff/sha512-ssse3-x86_64.s index 1f1b40a345..2b6cd0fcd3 100644 --- a/lib/accelerated/x86/coff/sha512-ssse3-x86_64.s +++ b/lib/accelerated/x86/coff/sha512-ssse3-x86_64.s @@ -39,7 +39,6 @@ # .text - .globl sha256_block_data_order .def sha256_block_data_order; .scl 2; .type 32; .endef .p2align 4 @@ -51,13 +50,8 @@ sha256_block_data_order: movq %rcx,%rdi movq %rdx,%rsi movq %r8,%rdx + movq %r9,%rcx - leaq _gnutls_x86_cpuid_s(%rip),%r11 - movl 0(%r11),%r9d - movl 4(%r11),%r10d - movl 8(%r11),%r11d - testl $512,%r10d - jnz .Lssse3_shortcut pushq %rbx pushq %rbp pushq %r12 @@ -75,6 +69,8 @@ sha256_block_data_order: movq %r11,64+24(%rsp) .Lprologue: + leaq K256(%rip),%rbp + movl 0(%rdi),%eax movl 4(%rdi),%ebx movl 8(%rdi),%ecx @@ -87,2787 +83,1697 @@ sha256_block_data_order: .p2align 4 .Lloop: - movl %ebx,%edi - leaq K256(%rip),%rbp - xorl %ecx,%edi + xorq %rdi,%rdi movl 0(%rsi),%r12d movl %r8d,%r13d movl %eax,%r14d bswapl %r12d rorl $14,%r13d movl %r9d,%r15d + movl %r12d,0(%rsp) - xorl %r8d,%r13d rorl $9,%r14d + xorl %r8d,%r13d xorl %r10d,%r15d - movl %r12d,0(%rsp) - xorl %eax,%r14d - andl %r8d,%r15d - rorl $5,%r13d addl %r11d,%r12d - xorl %r10d,%r15d + xorl %eax,%r14d + + addl (%rbp,%rdi,4),%r12d + andl %r8d,%r15d + movl %ebx,%r11d rorl $11,%r14d xorl %r8d,%r13d - addl %r15d,%r12d + xorl %r10d,%r15d - movl %eax,%r15d - addl (%rbp),%r12d + xorl %ecx,%r11d xorl %eax,%r14d + addl %r15d,%r12d + movl %ebx,%r15d - xorl %ebx,%r15d rorl $6,%r13d - movl %ebx,%r11d + andl %eax,%r11d + andl %ecx,%r15d - andl %r15d,%edi rorl $2,%r14d addl %r13d,%r12d + addl %r15d,%r11d - xorl %edi,%r11d addl %r12d,%edx addl %r12d,%r11d - - leaq 4(%rbp),%rbp + leaq 1(%rdi),%rdi addl %r14d,%r11d + movl 4(%rsi),%r12d movl %edx,%r13d movl %r11d,%r14d bswapl %r12d rorl $14,%r13d - movl %r8d,%edi + movl %r8d,%r15d + movl %r12d,4(%rsp) - xorl %edx,%r13d rorl $9,%r14d - xorl %r9d,%edi - - movl %r12d,4(%rsp) - xorl %r11d,%r14d - andl %edx,%edi + xorl %edx,%r13d + xorl %r9d,%r15d rorl $5,%r13d addl %r10d,%r12d - xorl %r9d,%edi + xorl %r11d,%r14d + + addl (%rbp,%rdi,4),%r12d + andl %edx,%r15d + movl %eax,%r10d rorl $11,%r14d xorl %edx,%r13d - addl %edi,%r12d + xorl %r9d,%r15d - movl %r11d,%edi - addl (%rbp),%r12d + xorl %ebx,%r10d xorl %r11d,%r14d + addl %r15d,%r12d + movl %eax,%r15d - xorl %eax,%edi rorl $6,%r13d - movl %eax,%r10d + andl %r11d,%r10d + andl %ebx,%r15d - andl %edi,%r15d rorl $2,%r14d addl %r13d,%r12d + addl %r15d,%r10d - xorl %r15d,%r10d addl %r12d,%ecx addl %r12d,%r10d - - leaq 4(%rbp),%rbp + leaq 1(%rdi),%rdi addl %r14d,%r10d + movl 8(%rsi),%r12d movl %ecx,%r13d movl %r10d,%r14d bswapl %r12d rorl $14,%r13d movl %edx,%r15d + movl %r12d,8(%rsp) - xorl %ecx,%r13d rorl $9,%r14d + xorl %ecx,%r13d xorl %r8d,%r15d - movl %r12d,8(%rsp) - xorl %r10d,%r14d - andl %ecx,%r15d - rorl $5,%r13d addl %r9d,%r12d - xorl %r8d,%r15d + xorl %r10d,%r14d + + addl (%rbp,%rdi,4),%r12d + andl %ecx,%r15d + movl %r11d,%r9d rorl $11,%r14d xorl %ecx,%r13d - addl %r15d,%r12d + xorl %r8d,%r15d - movl %r10d,%r15d - addl (%rbp),%r12d + xorl %eax,%r9d xorl %r10d,%r14d + addl %r15d,%r12d + movl %r11d,%r15d - xorl %r11d,%r15d rorl $6,%r13d - movl %r11d,%r9d + andl %r10d,%r9d + andl %eax,%r15d - andl %r15d,%edi rorl $2,%r14d addl %r13d,%r12d + addl %r15d,%r9d - xorl %edi,%r9d addl %r12d,%ebx addl %r12d,%r9d - - leaq 4(%rbp),%rbp + leaq 1(%rdi),%rdi addl %r14d,%r9d + movl 12(%rsi),%r12d movl %ebx,%r13d movl %r9d,%r14d bswapl %r12d rorl $14,%r13d - movl %ecx,%edi + movl %ecx,%r15d + movl %r12d,12(%rsp) - xorl %ebx,%r13d rorl $9,%r14d - xorl %edx,%edi - - movl %r12d,12(%rsp) - xorl %r9d,%r14d - andl %ebx,%edi + xorl %ebx,%r13d + xorl %edx,%r15d rorl $5,%r13d addl %r8d,%r12d - xorl %edx,%edi + xorl %r9d,%r14d + + addl (%rbp,%rdi,4),%r12d + andl %ebx,%r15d + movl %r10d,%r8d rorl $11,%r14d xorl %ebx,%r13d - addl %edi,%r12d + xorl %edx,%r15d - movl %r9d,%edi - addl (%rbp),%r12d + xorl %r11d,%r8d xorl %r9d,%r14d + addl %r15d,%r12d + movl %r10d,%r15d - xorl %r10d,%edi rorl $6,%r13d - movl %r10d,%r8d + andl %r9d,%r8d + andl %r11d,%r15d - andl %edi,%r15d rorl $2,%r14d addl %r13d,%r12d + addl %r15d,%r8d - xorl %r15d,%r8d addl %r12d,%eax addl %r12d,%r8d - - leaq 20(%rbp),%rbp + leaq 1(%rdi),%rdi addl %r14d,%r8d + movl 16(%rsi),%r12d movl %eax,%r13d movl %r8d,%r14d bswapl %r12d rorl $14,%r13d movl %ebx,%r15d + movl %r12d,16(%rsp) - xorl %eax,%r13d rorl $9,%r14d + xorl %eax,%r13d xorl %ecx,%r15d - movl %r12d,16(%rsp) - xorl %r8d,%r14d - andl %eax,%r15d - rorl $5,%r13d addl %edx,%r12d - xorl %ecx,%r15d + xorl %r8d,%r14d + + addl (%rbp,%rdi,4),%r12d + andl %eax,%r15d + movl %r9d,%edx rorl $11,%r14d xorl %eax,%r13d - addl %r15d,%r12d + xorl %ecx,%r15d - movl %r8d,%r15d - addl (%rbp),%r12d + xorl %r10d,%edx xorl %r8d,%r14d + addl %r15d,%r12d + movl %r9d,%r15d - xorl %r9d,%r15d rorl $6,%r13d - movl %r9d,%edx + andl %r8d,%edx + andl %r10d,%r15d - andl %r15d,%edi rorl $2,%r14d addl %r13d,%r12d + addl %r15d,%edx - xorl %edi,%edx addl %r12d,%r11d addl %r12d,%edx - - leaq 4(%rbp),%rbp + leaq 1(%rdi),%rdi addl %r14d,%edx + movl 20(%rsi),%r12d movl %r11d,%r13d movl %edx,%r14d bswapl %r12d rorl $14,%r13d - movl %eax,%edi + movl %eax,%r15d + movl %r12d,20(%rsp) - xorl %r11d,%r13d rorl $9,%r14d - xorl %ebx,%edi - - movl %r12d,20(%rsp) - xorl %edx,%r14d - andl %r11d,%edi + xorl %r11d,%r13d + xorl %ebx,%r15d rorl $5,%r13d addl %ecx,%r12d - xorl %ebx,%edi + xorl %edx,%r14d + + addl (%rbp,%rdi,4),%r12d + andl %r11d,%r15d + movl %r8d,%ecx rorl $11,%r14d xorl %r11d,%r13d - addl %edi,%r12d + xorl %ebx,%r15d - movl %edx,%edi - addl (%rbp),%r12d + xorl %r9d,%ecx xorl %edx,%r14d + addl %r15d,%r12d + movl %r8d,%r15d - xorl %r8d,%edi rorl $6,%r13d - movl %r8d,%ecx + andl %edx,%ecx + andl %r9d,%r15d - andl %edi,%r15d rorl $2,%r14d addl %r13d,%r12d + addl %r15d,%ecx - xorl %r15d,%ecx addl %r12d,%r10d addl %r12d,%ecx - - leaq 4(%rbp),%rbp + leaq 1(%rdi),%rdi addl %r14d,%ecx + movl 24(%rsi),%r12d movl %r10d,%r13d movl %ecx,%r14d bswapl %r12d rorl $14,%r13d movl %r11d,%r15d + movl %r12d,24(%rsp) - xorl %r10d,%r13d rorl $9,%r14d + xorl %r10d,%r13d xorl %eax,%r15d - movl %r12d,24(%rsp) - xorl %ecx,%r14d - andl %r10d,%r15d - rorl $5,%r13d addl %ebx,%r12d - xorl %eax,%r15d + xorl %ecx,%r14d + + addl (%rbp,%rdi,4),%r12d + andl %r10d,%r15d + movl %edx,%ebx rorl $11,%r14d xorl %r10d,%r13d - addl %r15d,%r12d + xorl %eax,%r15d - movl %ecx,%r15d - addl (%rbp),%r12d + xorl %r8d,%ebx xorl %ecx,%r14d + addl %r15d,%r12d + movl %edx,%r15d - xorl %edx,%r15d rorl $6,%r13d - movl %edx,%ebx + andl %ecx,%ebx + andl %r8d,%r15d - andl %r15d,%edi rorl $2,%r14d addl %r13d,%r12d + addl %r15d,%ebx - xorl %edi,%ebx addl %r12d,%r9d addl %r12d,%ebx - - leaq 4(%rbp),%rbp + leaq 1(%rdi),%rdi addl %r14d,%ebx + movl 28(%rsi),%r12d movl %r9d,%r13d movl %ebx,%r14d bswapl %r12d rorl $14,%r13d - movl %r10d,%edi + movl %r10d,%r15d + movl %r12d,28(%rsp) - xorl %r9d,%r13d rorl $9,%r14d - xorl %r11d,%edi - - movl %r12d,28(%rsp) - xorl %ebx,%r14d - andl %r9d,%edi + xorl %r9d,%r13d + xorl %r11d,%r15d rorl $5,%r13d addl %eax,%r12d - xorl %r11d,%edi + xorl %ebx,%r14d + + addl (%rbp,%rdi,4),%r12d + andl %r9d,%r15d + movl %ecx,%eax rorl $11,%r14d xorl %r9d,%r13d - addl %edi,%r12d + xorl %r11d,%r15d - movl %ebx,%edi - addl (%rbp),%r12d + xorl %edx,%eax xorl %ebx,%r14d + addl %r15d,%r12d + movl %ecx,%r15d - xorl %ecx,%edi rorl $6,%r13d - movl %ecx,%eax + andl %ebx,%eax + andl %edx,%r15d - andl %edi,%r15d rorl $2,%r14d addl %r13d,%r12d + addl %r15d,%eax - xorl %r15d,%eax addl %r12d,%r8d addl %r12d,%eax - - leaq 20(%rbp),%rbp + leaq 1(%rdi),%rdi addl %r14d,%eax + movl 32(%rsi),%r12d movl %r8d,%r13d movl %eax,%r14d bswapl %r12d rorl $14,%r13d movl %r9d,%r15d + movl %r12d,32(%rsp) - xorl %r8d,%r13d rorl $9,%r14d + xorl %r8d,%r13d xorl %r10d,%r15d - movl %r12d,32(%rsp) - xorl %eax,%r14d - andl %r8d,%r15d - rorl $5,%r13d addl %r11d,%r12d - xorl %r10d,%r15d + xorl %eax,%r14d + + addl (%rbp,%rdi,4),%r12d + andl %r8d,%r15d + movl %ebx,%r11d rorl $11,%r14d xorl %r8d,%r13d - addl %r15d,%r12d + xorl %r10d,%r15d - movl %eax,%r15d - addl (%rbp),%r12d + xorl %ecx,%r11d xorl %eax,%r14d + addl %r15d,%r12d + movl %ebx,%r15d - xorl %ebx,%r15d rorl $6,%r13d - movl %ebx,%r11d + andl %eax,%r11d + andl %ecx,%r15d - andl %r15d,%edi rorl $2,%r14d addl %r13d,%r12d + addl %r15d,%r11d - xorl %edi,%r11d addl %r12d,%edx addl %r12d,%r11d - - leaq 4(%rbp),%rbp + leaq 1(%rdi),%rdi addl %r14d,%r11d + movl 36(%rsi),%r12d movl %edx,%r13d movl %r11d,%r14d bswapl %r12d rorl $14,%r13d - movl %r8d,%edi + movl %r8d,%r15d + movl %r12d,36(%rsp) - xorl %edx,%r13d rorl $9,%r14d - xorl %r9d,%edi - - movl %r12d,36(%rsp) - xorl %r11d,%r14d - andl %edx,%edi + xorl %edx,%r13d + xorl %r9d,%r15d rorl $5,%r13d addl %r10d,%r12d - xorl %r9d,%edi + xorl %r11d,%r14d + + addl (%rbp,%rdi,4),%r12d + andl %edx,%r15d + movl %eax,%r10d rorl $11,%r14d xorl %edx,%r13d - addl %edi,%r12d + xorl %r9d,%r15d - movl %r11d,%edi - addl (%rbp),%r12d + xorl %ebx,%r10d xorl %r11d,%r14d + addl %r15d,%r12d + movl %eax,%r15d - xorl %eax,%edi rorl $6,%r13d - movl %eax,%r10d + andl %r11d,%r10d + andl %ebx,%r15d - andl %edi,%r15d rorl $2,%r14d addl %r13d,%r12d + addl %r15d,%r10d - xorl %r15d,%r10d addl %r12d,%ecx addl %r12d,%r10d - - leaq 4(%rbp),%rbp + leaq 1(%rdi),%rdi addl %r14d,%r10d + movl 40(%rsi),%r12d movl %ecx,%r13d movl %r10d,%r14d bswapl %r12d rorl $14,%r13d movl %edx,%r15d + movl %r12d,40(%rsp) - xorl %ecx,%r13d rorl $9,%r14d + xorl %ecx,%r13d xorl %r8d,%r15d - movl %r12d,40(%rsp) - xorl %r10d,%r14d - andl %ecx,%r15d - rorl $5,%r13d addl %r9d,%r12d - xorl %r8d,%r15d + xorl %r10d,%r14d + + addl (%rbp,%rdi,4),%r12d + andl %ecx,%r15d + movl %r11d,%r9d rorl $11,%r14d xorl %ecx,%r13d - addl %r15d,%r12d + xorl %r8d,%r15d - movl %r10d,%r15d - addl (%rbp),%r12d + xorl %eax,%r9d xorl %r10d,%r14d + addl %r15d,%r12d + movl %r11d,%r15d - xorl %r11d,%r15d rorl $6,%r13d - movl %r11d,%r9d + andl %r10d,%r9d + andl %eax,%r15d - andl %r15d,%edi rorl $2,%r14d addl %r13d,%r12d + addl %r15d,%r9d - xorl %edi,%r9d addl %r12d,%ebx addl %r12d,%r9d - - leaq 4(%rbp),%rbp + leaq 1(%rdi),%rdi addl %r14d,%r9d + movl 44(%rsi),%r12d movl %ebx,%r13d movl %r9d,%r14d bswapl %r12d rorl $14,%r13d - movl %ecx,%edi + movl %ecx,%r15d + movl %r12d,44(%rsp) - xorl %ebx,%r13d rorl $9,%r14d - xorl %edx,%edi - - movl %r12d,44(%rsp) - xorl %r9d,%r14d - andl %ebx,%edi + xorl %ebx,%r13d + xorl %edx,%r15d rorl $5,%r13d addl %r8d,%r12d - xorl %edx,%edi + xorl %r9d,%r14d + + addl (%rbp,%rdi,4),%r12d + andl %ebx,%r15d + movl %r10d,%r8d rorl $11,%r14d xorl %ebx,%r13d - addl %edi,%r12d + xorl %edx,%r15d - movl %r9d,%edi - addl (%rbp),%r12d + xorl %r11d,%r8d xorl %r9d,%r14d + addl %r15d,%r12d + movl %r10d,%r15d - xorl %r10d,%edi rorl $6,%r13d - movl %r10d,%r8d + andl %r9d,%r8d + andl %r11d,%r15d - andl %edi,%r15d rorl $2,%r14d addl %r13d,%r12d + addl %r15d,%r8d - xorl %r15d,%r8d addl %r12d,%eax addl %r12d,%r8d - - leaq 20(%rbp),%rbp + leaq 1(%rdi),%rdi addl %r14d,%r8d + movl 48(%rsi),%r12d movl %eax,%r13d movl %r8d,%r14d bswapl %r12d rorl $14,%r13d movl %ebx,%r15d + movl %r12d,48(%rsp) - xorl %eax,%r13d rorl $9,%r14d + xorl %eax,%r13d xorl %ecx,%r15d - movl %r12d,48(%rsp) - xorl %r8d,%r14d - andl %eax,%r15d - rorl $5,%r13d addl %edx,%r12d - xorl %ecx,%r15d + xorl %r8d,%r14d + + addl (%rbp,%rdi,4),%r12d + andl %eax,%r15d + movl %r9d,%edx rorl $11,%r14d xorl %eax,%r13d - addl %r15d,%r12d + xorl %ecx,%r15d - movl %r8d,%r15d - addl (%rbp),%r12d + xorl %r10d,%edx xorl %r8d,%r14d + addl %r15d,%r12d + movl %r9d,%r15d - xorl %r9d,%r15d rorl $6,%r13d - movl %r9d,%edx + andl %r8d,%edx + andl %r10d,%r15d - andl %r15d,%edi rorl $2,%r14d addl %r13d,%r12d + addl %r15d,%edx - xorl %edi,%edx addl %r12d,%r11d addl %r12d,%edx - - leaq 4(%rbp),%rbp + leaq 1(%rdi),%rdi addl %r14d,%edx + movl 52(%rsi),%r12d movl %r11d,%r13d movl %edx,%r14d bswapl %r12d rorl $14,%r13d - movl %eax,%edi + movl %eax,%r15d + movl %r12d,52(%rsp) - xorl %r11d,%r13d rorl $9,%r14d - xorl %ebx,%edi - - movl %r12d,52(%rsp) - xorl %edx,%r14d - andl %r11d,%edi + xorl %r11d,%r13d + xorl %ebx,%r15d rorl $5,%r13d addl %ecx,%r12d - xorl %ebx,%edi + xorl %edx,%r14d + + addl (%rbp,%rdi,4),%r12d + andl %r11d,%r15d + movl %r8d,%ecx rorl $11,%r14d xorl %r11d,%r13d - addl %edi,%r12d + xorl %ebx,%r15d - movl %edx,%edi - addl (%rbp),%r12d + xorl %r9d,%ecx xorl %edx,%r14d + addl %r15d,%r12d + movl %r8d,%r15d - xorl %r8d,%edi rorl $6,%r13d - movl %r8d,%ecx + andl %edx,%ecx + andl %r9d,%r15d - andl %edi,%r15d rorl $2,%r14d addl %r13d,%r12d + addl %r15d,%ecx - xorl %r15d,%ecx addl %r12d,%r10d addl %r12d,%ecx - - leaq 4(%rbp),%rbp + leaq 1(%rdi),%rdi addl %r14d,%ecx + movl 56(%rsi),%r12d movl %r10d,%r13d movl %ecx,%r14d bswapl %r12d rorl $14,%r13d movl %r11d,%r15d + movl %r12d,56(%rsp) - xorl %r10d,%r13d rorl $9,%r14d + xorl %r10d,%r13d xorl %eax,%r15d - movl %r12d,56(%rsp) - xorl %ecx,%r14d - andl %r10d,%r15d - rorl $5,%r13d addl %ebx,%r12d - xorl %eax,%r15d + xorl %ecx,%r14d + + addl (%rbp,%rdi,4),%r12d + andl %r10d,%r15d + movl %edx,%ebx rorl $11,%r14d xorl %r10d,%r13d - addl %r15d,%r12d + xorl %eax,%r15d - movl %ecx,%r15d - addl (%rbp),%r12d + xorl %r8d,%ebx xorl %ecx,%r14d + addl %r15d,%r12d + movl %edx,%r15d - xorl %edx,%r15d rorl $6,%r13d - movl %edx,%ebx + andl %ecx,%ebx + andl %r8d,%r15d - andl %r15d,%edi rorl $2,%r14d addl %r13d,%r12d + addl %r15d,%ebx - xorl %edi,%ebx addl %r12d,%r9d addl %r12d,%ebx - - leaq 4(%rbp),%rbp + leaq 1(%rdi),%rdi addl %r14d,%ebx + movl 60(%rsi),%r12d movl %r9d,%r13d movl %ebx,%r14d bswapl %r12d rorl $14,%r13d - movl %r10d,%edi + movl %r10d,%r15d + movl %r12d,60(%rsp) - xorl %r9d,%r13d rorl $9,%r14d - xorl %r11d,%edi - - movl %r12d,60(%rsp) - xorl %ebx,%r14d - andl %r9d,%edi + xorl %r9d,%r13d + xorl %r11d,%r15d rorl $5,%r13d addl %eax,%r12d - xorl %r11d,%edi + xorl %ebx,%r14d + + addl (%rbp,%rdi,4),%r12d + andl %r9d,%r15d + movl %ecx,%eax rorl $11,%r14d xorl %r9d,%r13d - addl %edi,%r12d + xorl %r11d,%r15d - movl %ebx,%edi - addl (%rbp),%r12d + xorl %edx,%eax xorl %ebx,%r14d + addl %r15d,%r12d + movl %ecx,%r15d - xorl %ecx,%edi rorl $6,%r13d - movl %ecx,%eax + andl %ebx,%eax + andl %edx,%r15d - andl %edi,%r15d rorl $2,%r14d addl %r13d,%r12d + addl %r15d,%eax - xorl %r15d,%eax addl %r12d,%r8d addl %r12d,%eax + leaq 1(%rdi),%rdi + addl %r14d,%eax - leaq 20(%rbp),%rbp jmp .Lrounds_16_xx .p2align 4 .Lrounds_16_xx: movl 4(%rsp),%r13d - movl 56(%rsp),%r15d - + movl 56(%rsp),%r14d movl %r13d,%r12d - rorl $11,%r13d - addl %r14d,%eax - movl %r15d,%r14d - rorl $2,%r15d + movl %r14d,%r15d + + rorl $11,%r12d + xorl %r13d,%r12d + shrl $3,%r13d + rorl $7,%r12d xorl %r12d,%r13d - shrl $3,%r12d - rorl $7,%r13d + movl 36(%rsp),%r12d + + rorl $2,%r15d xorl %r14d,%r15d shrl $10,%r14d rorl $17,%r15d - xorl %r13d,%r12d - xorl %r14d,%r15d - addl 36(%rsp),%r12d + addl %r13d,%r12d + xorl %r15d,%r14d addl 0(%rsp),%r12d movl %r8d,%r13d - addl %r15d,%r12d + addl %r14d,%r12d movl %eax,%r14d rorl $14,%r13d movl %r9d,%r15d + movl %r12d,0(%rsp) - xorl %r8d,%r13d rorl $9,%r14d + xorl %r8d,%r13d xorl %r10d,%r15d - movl %r12d,0(%rsp) - xorl %eax,%r14d - andl %r8d,%r15d - rorl $5,%r13d addl %r11d,%r12d - xorl %r10d,%r15d + xorl %eax,%r14d + + addl (%rbp,%rdi,4),%r12d + andl %r8d,%r15d + movl %ebx,%r11d rorl $11,%r14d xorl %r8d,%r13d - addl %r15d,%r12d + xorl %r10d,%r15d - movl %eax,%r15d - addl (%rbp),%r12d + xorl %ecx,%r11d xorl %eax,%r14d + addl %r15d,%r12d + movl %ebx,%r15d - xorl %ebx,%r15d rorl $6,%r13d - movl %ebx,%r11d + andl %eax,%r11d + andl %ecx,%r15d - andl %r15d,%edi rorl $2,%r14d addl %r13d,%r12d + addl %r15d,%r11d - xorl %edi,%r11d addl %r12d,%edx addl %r12d,%r11d + leaq 1(%rdi),%rdi + addl %r14d,%r11d - leaq 4(%rbp),%rbp movl 8(%rsp),%r13d - movl 60(%rsp),%edi - + movl 60(%rsp),%r14d movl %r13d,%r12d - rorl $11,%r13d - addl %r14d,%r11d - movl %edi,%r14d - rorl $2,%edi + movl %r14d,%r15d + + rorl $11,%r12d + xorl %r13d,%r12d + shrl $3,%r13d + rorl $7,%r12d xorl %r12d,%r13d - shrl $3,%r12d - rorl $7,%r13d - xorl %r14d,%edi + movl 40(%rsp),%r12d + + rorl $2,%r15d + xorl %r14d,%r15d shrl $10,%r14d - rorl $17,%edi - xorl %r13d,%r12d - xorl %r14d,%edi - addl 40(%rsp),%r12d + rorl $17,%r15d + addl %r13d,%r12d + xorl %r15d,%r14d addl 4(%rsp),%r12d movl %edx,%r13d - addl %edi,%r12d + addl %r14d,%r12d movl %r11d,%r14d rorl $14,%r13d - movl %r8d,%edi + movl %r8d,%r15d + movl %r12d,4(%rsp) - xorl %edx,%r13d rorl $9,%r14d - xorl %r9d,%edi - - movl %r12d,4(%rsp) - xorl %r11d,%r14d - andl %edx,%edi + xorl %edx,%r13d + xorl %r9d,%r15d rorl $5,%r13d addl %r10d,%r12d - xorl %r9d,%edi + xorl %r11d,%r14d + + addl (%rbp,%rdi,4),%r12d + andl %edx,%r15d + movl %eax,%r10d rorl $11,%r14d xorl %edx,%r13d - addl %edi,%r12d + xorl %r9d,%r15d - movl %r11d,%edi - addl (%rbp),%r12d + xorl %ebx,%r10d xorl %r11d,%r14d + addl %r15d,%r12d + movl %eax,%r15d - xorl %eax,%edi rorl $6,%r13d - movl %eax,%r10d + andl %r11d,%r10d + andl %ebx,%r15d - andl %edi,%r15d rorl $2,%r14d addl %r13d,%r12d + addl %r15d,%r10d - xorl %r15d,%r10d addl %r12d,%ecx addl %r12d,%r10d + leaq 1(%rdi),%rdi + addl %r14d,%r10d - leaq 4(%rbp),%rbp movl 12(%rsp),%r13d - movl 0(%rsp),%r15d - + movl 0(%rsp),%r14d movl %r13d,%r12d - rorl $11,%r13d - addl %r14d,%r10d - movl %r15d,%r14d - rorl $2,%r15d + movl %r14d,%r15d + + rorl $11,%r12d + xorl %r13d,%r12d + shrl $3,%r13d + rorl $7,%r12d xorl %r12d,%r13d - shrl $3,%r12d - rorl $7,%r13d + movl 44(%rsp),%r12d + + rorl $2,%r15d xorl %r14d,%r15d shrl $10,%r14d rorl $17,%r15d - xorl %r13d,%r12d - xorl %r14d,%r15d - addl 44(%rsp),%r12d + addl %r13d,%r12d + xorl %r15d,%r14d addl 8(%rsp),%r12d movl %ecx,%r13d - addl %r15d,%r12d + addl %r14d,%r12d movl %r10d,%r14d rorl $14,%r13d movl %edx,%r15d + movl %r12d,8(%rsp) - xorl %ecx,%r13d rorl $9,%r14d + xorl %ecx,%r13d xorl %r8d,%r15d - movl %r12d,8(%rsp) - xorl %r10d,%r14d - andl %ecx,%r15d - rorl $5,%r13d addl %r9d,%r12d - xorl %r8d,%r15d + xorl %r10d,%r14d + + addl (%rbp,%rdi,4),%r12d + andl %ecx,%r15d + movl %r11d,%r9d rorl $11,%r14d xorl %ecx,%r13d - addl %r15d,%r12d + xorl %r8d,%r15d - movl %r10d,%r15d - addl (%rbp),%r12d + xorl %eax,%r9d xorl %r10d,%r14d + addl %r15d,%r12d + movl %r11d,%r15d - xorl %r11d,%r15d rorl $6,%r13d - movl %r11d,%r9d + andl %r10d,%r9d + andl %eax,%r15d - andl %r15d,%edi rorl $2,%r14d addl %r13d,%r12d + addl %r15d,%r9d - xorl %edi,%r9d addl %r12d,%ebx addl %r12d,%r9d + leaq 1(%rdi),%rdi + addl %r14d,%r9d - leaq 4(%rbp),%rbp movl 16(%rsp),%r13d - movl 4(%rsp),%edi - + movl 4(%rsp),%r14d movl %r13d,%r12d - rorl $11,%r13d - addl %r14d,%r9d - movl %edi,%r14d - rorl $2,%edi + movl %r14d,%r15d + + rorl $11,%r12d + xorl %r13d,%r12d + shrl $3,%r13d + rorl $7,%r12d xorl %r12d,%r13d - shrl $3,%r12d - rorl $7,%r13d - xorl %r14d,%edi + movl 48(%rsp),%r12d + + rorl $2,%r15d + xorl %r14d,%r15d shrl $10,%r14d - rorl $17,%edi - xorl %r13d,%r12d - xorl %r14d,%edi - addl 48(%rsp),%r12d + rorl $17,%r15d + addl %r13d,%r12d + xorl %r15d,%r14d addl 12(%rsp),%r12d movl %ebx,%r13d - addl %edi,%r12d + addl %r14d,%r12d movl %r9d,%r14d rorl $14,%r13d - movl %ecx,%edi + movl %ecx,%r15d + movl %r12d,12(%rsp) - xorl %ebx,%r13d rorl $9,%r14d - xorl %edx,%edi - - movl %r12d,12(%rsp) - xorl %r9d,%r14d - andl %ebx,%edi + xorl %ebx,%r13d + xorl %edx,%r15d rorl $5,%r13d addl %r8d,%r12d - xorl %edx,%edi + xorl %r9d,%r14d + + addl (%rbp,%rdi,4),%r12d + andl %ebx,%r15d + movl %r10d,%r8d rorl $11,%r14d xorl %ebx,%r13d - addl %edi,%r12d + xorl %edx,%r15d - movl %r9d,%edi - addl (%rbp),%r12d + xorl %r11d,%r8d xorl %r9d,%r14d + addl %r15d,%r12d + movl %r10d,%r15d - xorl %r10d,%edi rorl $6,%r13d - movl %r10d,%r8d + andl %r9d,%r8d + andl %r11d,%r15d - andl %edi,%r15d rorl $2,%r14d addl %r13d,%r12d + addl %r15d,%r8d - xorl %r15d,%r8d addl %r12d,%eax addl %r12d,%r8d + leaq 1(%rdi),%rdi + addl %r14d,%r8d - leaq 20(%rbp),%rbp movl 20(%rsp),%r13d - movl 8(%rsp),%r15d - + movl 8(%rsp),%r14d movl %r13d,%r12d - rorl $11,%r13d - addl %r14d,%r8d - movl %r15d,%r14d - rorl $2,%r15d + movl %r14d,%r15d + rorl $11,%r12d + xorl %r13d,%r12d + shrl $3,%r13d + + rorl $7,%r12d xorl %r12d,%r13d - shrl $3,%r12d - rorl $7,%r13d + movl 52(%rsp),%r12d + + rorl $2,%r15d xorl %r14d,%r15d shrl $10,%r14d rorl $17,%r15d - xorl %r13d,%r12d - xorl %r14d,%r15d - addl 52(%rsp),%r12d + addl %r13d,%r12d + xorl %r15d,%r14d addl 16(%rsp),%r12d movl %eax,%r13d - addl %r15d,%r12d + addl %r14d,%r12d movl %r8d,%r14d rorl $14,%r13d movl %ebx,%r15d + movl %r12d,16(%rsp) - xorl %eax,%r13d rorl $9,%r14d + xorl %eax,%r13d xorl %ecx,%r15d - movl %r12d,16(%rsp) - xorl %r8d,%r14d - andl %eax,%r15d - rorl $5,%r13d addl %edx,%r12d - xorl %ecx,%r15d + xorl %r8d,%r14d + + addl (%rbp,%rdi,4),%r12d + andl %eax,%r15d + movl %r9d,%edx rorl $11,%r14d xorl %eax,%r13d - addl %r15d,%r12d + xorl %ecx,%r15d - movl %r8d,%r15d - addl (%rbp),%r12d + xorl %r10d,%edx xorl %r8d,%r14d + addl %r15d,%r12d + movl %r9d,%r15d - xorl %r9d,%r15d rorl $6,%r13d - movl %r9d,%edx + andl %r8d,%edx + andl %r10d,%r15d - andl %r15d,%edi rorl $2,%r14d addl %r13d,%r12d + addl %r15d,%edx - xorl %edi,%edx addl %r12d,%r11d addl %r12d,%edx + leaq 1(%rdi),%rdi + addl %r14d,%edx - leaq 4(%rbp),%rbp movl 24(%rsp),%r13d - movl 12(%rsp),%edi - + movl 12(%rsp),%r14d movl %r13d,%r12d - rorl $11,%r13d - addl %r14d,%edx - movl %edi,%r14d - rorl $2,%edi + movl %r14d,%r15d + rorl $11,%r12d + xorl %r13d,%r12d + shrl $3,%r13d + + rorl $7,%r12d xorl %r12d,%r13d - shrl $3,%r12d - rorl $7,%r13d - xorl %r14d,%edi + movl 56(%rsp),%r12d + + rorl $2,%r15d + xorl %r14d,%r15d shrl $10,%r14d - rorl $17,%edi - xorl %r13d,%r12d - xorl %r14d,%edi - addl 56(%rsp),%r12d + rorl $17,%r15d + addl %r13d,%r12d + xorl %r15d,%r14d addl 20(%rsp),%r12d movl %r11d,%r13d - addl %edi,%r12d + addl %r14d,%r12d movl %edx,%r14d rorl $14,%r13d - movl %eax,%edi + movl %eax,%r15d + movl %r12d,20(%rsp) - xorl %r11d,%r13d rorl $9,%r14d - xorl %ebx,%edi - - movl %r12d,20(%rsp) - xorl %edx,%r14d - andl %r11d,%edi + xorl %r11d,%r13d + xorl %ebx,%r15d rorl $5,%r13d addl %ecx,%r12d - xorl %ebx,%edi + xorl %edx,%r14d + + addl (%rbp,%rdi,4),%r12d + andl %r11d,%r15d + movl %r8d,%ecx rorl $11,%r14d xorl %r11d,%r13d - addl %edi,%r12d + xorl %ebx,%r15d - movl %edx,%edi - addl (%rbp),%r12d + xorl %r9d,%ecx xorl %edx,%r14d + addl %r15d,%r12d + movl %r8d,%r15d - xorl %r8d,%edi rorl $6,%r13d - movl %r8d,%ecx + andl %edx,%ecx + andl %r9d,%r15d - andl %edi,%r15d rorl $2,%r14d addl %r13d,%r12d + addl %r15d,%ecx - xorl %r15d,%ecx addl %r12d,%r10d addl %r12d,%ecx + leaq 1(%rdi),%rdi + addl %r14d,%ecx - leaq 4(%rbp),%rbp movl 28(%rsp),%r13d - movl 16(%rsp),%r15d - + movl 16(%rsp),%r14d movl %r13d,%r12d - rorl $11,%r13d - addl %r14d,%ecx - movl %r15d,%r14d - rorl $2,%r15d + movl %r14d,%r15d + + rorl $11,%r12d + xorl %r13d,%r12d + shrl $3,%r13d + rorl $7,%r12d xorl %r12d,%r13d - shrl $3,%r12d - rorl $7,%r13d - xorl %r14d,%r15d + movl 60(%rsp),%r12d + + rorl $2,%r15d + xorl %r14d,%r15d shrl $10,%r14d rorl $17,%r15d - xorl %r13d,%r12d - xorl %r14d,%r15d - addl 60(%rsp),%r12d + addl %r13d,%r12d + xorl %r15d,%r14d addl 24(%rsp),%r12d movl %r10d,%r13d - addl %r15d,%r12d + addl %r14d,%r12d movl %ecx,%r14d rorl $14,%r13d movl %r11d,%r15d + movl %r12d,24(%rsp) - xorl %r10d,%r13d rorl $9,%r14d + xorl %r10d,%r13d xorl %eax,%r15d - movl %r12d,24(%rsp) - xorl %ecx,%r14d - andl %r10d,%r15d - rorl $5,%r13d addl %ebx,%r12d - xorl %eax,%r15d + xorl %ecx,%r14d + + addl (%rbp,%rdi,4),%r12d + andl %r10d,%r15d + movl %edx,%ebx rorl $11,%r14d xorl %r10d,%r13d - addl %r15d,%r12d + xorl %eax,%r15d - movl %ecx,%r15d - addl (%rbp),%r12d + xorl %r8d,%ebx xorl %ecx,%r14d + addl %r15d,%r12d + movl %edx,%r15d - xorl %edx,%r15d rorl $6,%r13d - movl %edx,%ebx + andl %ecx,%ebx + andl %r8d,%r15d - andl %r15d,%edi rorl $2,%r14d addl %r13d,%r12d + addl %r15d,%ebx - xorl %edi,%ebx addl %r12d,%r9d addl %r12d,%ebx + leaq 1(%rdi),%rdi + addl %r14d,%ebx - leaq 4(%rbp),%rbp movl 32(%rsp),%r13d - movl 20(%rsp),%edi - + movl 20(%rsp),%r14d movl %r13d,%r12d - rorl $11,%r13d - addl %r14d,%ebx - movl %edi,%r14d - rorl $2,%edi + movl %r14d,%r15d + + rorl $11,%r12d + xorl %r13d,%r12d + shrl $3,%r13d + rorl $7,%r12d xorl %r12d,%r13d - shrl $3,%r12d - rorl $7,%r13d - xorl %r14d,%edi + movl 0(%rsp),%r12d + + rorl $2,%r15d + xorl %r14d,%r15d shrl $10,%r14d - rorl $17,%edi - xorl %r13d,%r12d - xorl %r14d,%edi - addl 0(%rsp),%r12d + rorl $17,%r15d + addl %r13d,%r12d + xorl %r15d,%r14d addl 28(%rsp),%r12d movl %r9d,%r13d - addl %edi,%r12d + addl %r14d,%r12d movl %ebx,%r14d rorl $14,%r13d - movl %r10d,%edi + movl %r10d,%r15d + movl %r12d,28(%rsp) - xorl %r9d,%r13d rorl $9,%r14d - xorl %r11d,%edi - - movl %r12d,28(%rsp) - xorl %ebx,%r14d - andl %r9d,%edi + xorl %r9d,%r13d + xorl %r11d,%r15d rorl $5,%r13d addl %eax,%r12d - xorl %r11d,%edi + xorl %ebx,%r14d + + addl (%rbp,%rdi,4),%r12d + andl %r9d,%r15d + movl %ecx,%eax rorl $11,%r14d xorl %r9d,%r13d - addl %edi,%r12d + xorl %r11d,%r15d - movl %ebx,%edi - addl (%rbp),%r12d + xorl %edx,%eax xorl %ebx,%r14d + addl %r15d,%r12d + movl %ecx,%r15d - xorl %ecx,%edi rorl $6,%r13d - movl %ecx,%eax + andl %ebx,%eax + andl %edx,%r15d - andl %edi,%r15d rorl $2,%r14d addl %r13d,%r12d + addl %r15d,%eax - xorl %r15d,%eax addl %r12d,%r8d addl %r12d,%eax + leaq 1(%rdi),%rdi + addl %r14d,%eax - leaq 20(%rbp),%rbp movl 36(%rsp),%r13d - movl 24(%rsp),%r15d - + movl 24(%rsp),%r14d movl %r13d,%r12d - rorl $11,%r13d - addl %r14d,%eax - movl %r15d,%r14d - rorl $2,%r15d + movl %r14d,%r15d + rorl $11,%r12d + xorl %r13d,%r12d + shrl $3,%r13d + + rorl $7,%r12d xorl %r12d,%r13d - shrl $3,%r12d - rorl $7,%r13d + movl 4(%rsp),%r12d + + rorl $2,%r15d xorl %r14d,%r15d shrl $10,%r14d rorl $17,%r15d - xorl %r13d,%r12d - xorl %r14d,%r15d - addl 4(%rsp),%r12d + addl %r13d,%r12d + xorl %r15d,%r14d addl 32(%rsp),%r12d movl %r8d,%r13d - addl %r15d,%r12d + addl %r14d,%r12d movl %eax,%r14d rorl $14,%r13d movl %r9d,%r15d + movl %r12d,32(%rsp) - xorl %r8d,%r13d rorl $9,%r14d + xorl %r8d,%r13d xorl %r10d,%r15d - movl %r12d,32(%rsp) - xorl %eax,%r14d - andl %r8d,%r15d - rorl $5,%r13d addl %r11d,%r12d - xorl %r10d,%r15d + xorl %eax,%r14d + + addl (%rbp,%rdi,4),%r12d + andl %r8d,%r15d + movl %ebx,%r11d rorl $11,%r14d xorl %r8d,%r13d - addl %r15d,%r12d + xorl %r10d,%r15d - movl %eax,%r15d - addl (%rbp),%r12d + xorl %ecx,%r11d xorl %eax,%r14d + addl %r15d,%r12d + movl %ebx,%r15d - xorl %ebx,%r15d rorl $6,%r13d - movl %ebx,%r11d + andl %eax,%r11d + andl %ecx,%r15d - andl %r15d,%edi rorl $2,%r14d addl %r13d,%r12d + addl %r15d,%r11d - xorl %edi,%r11d addl %r12d,%edx addl %r12d,%r11d + leaq 1(%rdi),%rdi + addl %r14d,%r11d - leaq 4(%rbp),%rbp movl 40(%rsp),%r13d - movl 28(%rsp),%edi - + movl 28(%rsp),%r14d movl %r13d,%r12d - rorl $11,%r13d - addl %r14d,%r11d - movl %edi,%r14d - rorl $2,%edi + movl %r14d,%r15d + rorl $11,%r12d + xorl %r13d,%r12d + shrl $3,%r13d + + rorl $7,%r12d xorl %r12d,%r13d - shrl $3,%r12d - rorl $7,%r13d - xorl %r14d,%edi + movl 8(%rsp),%r12d + + rorl $2,%r15d + xorl %r14d,%r15d shrl $10,%r14d - rorl $17,%edi - xorl %r13d,%r12d - xorl %r14d,%edi - addl 8(%rsp),%r12d + rorl $17,%r15d + addl %r13d,%r12d + xorl %r15d,%r14d addl 36(%rsp),%r12d movl %edx,%r13d - addl %edi,%r12d + addl %r14d,%r12d movl %r11d,%r14d rorl $14,%r13d - movl %r8d,%edi + movl %r8d,%r15d + movl %r12d,36(%rsp) - xorl %edx,%r13d rorl $9,%r14d - xorl %r9d,%edi - - movl %r12d,36(%rsp) - xorl %r11d,%r14d - andl %edx,%edi + xorl %edx,%r13d + xorl %r9d,%r15d rorl $5,%r13d addl %r10d,%r12d - xorl %r9d,%edi + xorl %r11d,%r14d + + addl (%rbp,%rdi,4),%r12d + andl %edx,%r15d + movl %eax,%r10d rorl $11,%r14d xorl %edx,%r13d - addl %edi,%r12d + xorl %r9d,%r15d - movl %r11d,%edi - addl (%rbp),%r12d + xorl %ebx,%r10d xorl %r11d,%r14d + addl %r15d,%r12d + movl %eax,%r15d - xorl %eax,%edi rorl $6,%r13d - movl %eax,%r10d + andl %r11d,%r10d + andl %ebx,%r15d - andl %edi,%r15d rorl $2,%r14d addl %r13d,%r12d + addl %r15d,%r10d - xorl %r15d,%r10d addl %r12d,%ecx addl %r12d,%r10d + leaq 1(%rdi),%rdi + addl %r14d,%r10d - leaq 4(%rbp),%rbp movl 44(%rsp),%r13d - movl 32(%rsp),%r15d - + movl 32(%rsp),%r14d movl %r13d,%r12d - rorl $11,%r13d - addl %r14d,%r10d - movl %r15d,%r14d - rorl $2,%r15d + movl %r14d,%r15d + + rorl $11,%r12d + xorl %r13d,%r12d + shrl $3,%r13d + rorl $7,%r12d xorl %r12d,%r13d - shrl $3,%r12d - rorl $7,%r13d + movl 12(%rsp),%r12d + + rorl $2,%r15d xorl %r14d,%r15d shrl $10,%r14d rorl $17,%r15d - xorl %r13d,%r12d - xorl %r14d,%r15d - addl 12(%rsp),%r12d + addl %r13d,%r12d + xorl %r15d,%r14d addl 40(%rsp),%r12d movl %ecx,%r13d - addl %r15d,%r12d + addl %r14d,%r12d movl %r10d,%r14d rorl $14,%r13d movl %edx,%r15d + movl %r12d,40(%rsp) - xorl %ecx,%r13d rorl $9,%r14d + xorl %ecx,%r13d xorl %r8d,%r15d - movl %r12d,40(%rsp) - xorl %r10d,%r14d - andl %ecx,%r15d - rorl $5,%r13d addl %r9d,%r12d - xorl %r8d,%r15d + xorl %r10d,%r14d + + addl (%rbp,%rdi,4),%r12d + andl %ecx,%r15d + movl %r11d,%r9d rorl $11,%r14d xorl %ecx,%r13d - addl %r15d,%r12d + xorl %r8d,%r15d - movl %r10d,%r15d - addl (%rbp),%r12d + xorl %eax,%r9d xorl %r10d,%r14d + addl %r15d,%r12d + movl %r11d,%r15d - xorl %r11d,%r15d rorl $6,%r13d - movl %r11d,%r9d + andl %r10d,%r9d + andl %eax,%r15d - andl %r15d,%edi rorl $2,%r14d addl %r13d,%r12d + addl %r15d,%r9d - xorl %edi,%r9d addl %r12d,%ebx addl %r12d,%r9d + leaq 1(%rdi),%rdi + addl %r14d,%r9d - leaq 4(%rbp),%rbp movl 48(%rsp),%r13d - movl 36(%rsp),%edi - + movl 36(%rsp),%r14d movl %r13d,%r12d - rorl $11,%r13d - addl %r14d,%r9d - movl %edi,%r14d - rorl $2,%edi + movl %r14d,%r15d + + rorl $11,%r12d + xorl %r13d,%r12d + shrl $3,%r13d + rorl $7,%r12d xorl %r12d,%r13d - shrl $3,%r12d - rorl $7,%r13d - xorl %r14d,%edi + movl 16(%rsp),%r12d + + rorl $2,%r15d + xorl %r14d,%r15d shrl $10,%r14d - rorl $17,%edi - xorl %r13d,%r12d - xorl %r14d,%edi - addl 16(%rsp),%r12d + rorl $17,%r15d + addl %r13d,%r12d + xorl %r15d,%r14d addl 44(%rsp),%r12d movl %ebx,%r13d - addl %edi,%r12d + addl %r14d,%r12d movl %r9d,%r14d rorl $14,%r13d - movl %ecx,%edi + movl %ecx,%r15d + movl %r12d,44(%rsp) - xorl %ebx,%r13d rorl $9,%r14d - xorl %edx,%edi - - movl %r12d,44(%rsp) - xorl %r9d,%r14d - andl %ebx,%edi + xorl %ebx,%r13d + xorl %edx,%r15d rorl $5,%r13d addl %r8d,%r12d - xorl %edx,%edi + xorl %r9d,%r14d + + addl (%rbp,%rdi,4),%r12d + andl %ebx,%r15d + movl %r10d,%r8d rorl $11,%r14d xorl %ebx,%r13d - addl %edi,%r12d + xorl %edx,%r15d - movl %r9d,%edi - addl (%rbp),%r12d + xorl %r11d,%r8d xorl %r9d,%r14d + addl %r15d,%r12d + movl %r10d,%r15d - xorl %r10d,%edi rorl $6,%r13d - movl %r10d,%r8d + andl %r9d,%r8d + andl %r11d,%r15d - andl %edi,%r15d rorl $2,%r14d addl %r13d,%r12d + addl %r15d,%r8d - xorl %r15d,%r8d addl %r12d,%eax addl %r12d,%r8d + leaq 1(%rdi),%rdi + addl %r14d,%r8d - leaq 20(%rbp),%rbp movl 52(%rsp),%r13d - movl 40(%rsp),%r15d - + movl 40(%rsp),%r14d movl %r13d,%r12d - rorl $11,%r13d - addl %r14d,%r8d - movl %r15d,%r14d - rorl $2,%r15d + movl %r14d,%r15d + + rorl $11,%r12d + xorl %r13d,%r12d + shrl $3,%r13d + rorl $7,%r12d xorl %r12d,%r13d - shrl $3,%r12d - rorl $7,%r13d + movl 20(%rsp),%r12d + + rorl $2,%r15d xorl %r14d,%r15d shrl $10,%r14d rorl $17,%r15d - xorl %r13d,%r12d - xorl %r14d,%r15d - addl 20(%rsp),%r12d + addl %r13d,%r12d + xorl %r15d,%r14d addl 48(%rsp),%r12d movl %eax,%r13d - addl %r15d,%r12d + addl %r14d,%r12d movl %r8d,%r14d rorl $14,%r13d movl %ebx,%r15d + movl %r12d,48(%rsp) - xorl %eax,%r13d rorl $9,%r14d + xorl %eax,%r13d xorl %ecx,%r15d - movl %r12d,48(%rsp) - xorl %r8d,%r14d - andl %eax,%r15d - rorl $5,%r13d addl %edx,%r12d - xorl %ecx,%r15d + xorl %r8d,%r14d + + addl (%rbp,%rdi,4),%r12d + andl %eax,%r15d + movl %r9d,%edx rorl $11,%r14d xorl %eax,%r13d - addl %r15d,%r12d + xorl %ecx,%r15d - movl %r8d,%r15d - addl (%rbp),%r12d + xorl %r10d,%edx xorl %r8d,%r14d + addl %r15d,%r12d + movl %r9d,%r15d - xorl %r9d,%r15d rorl $6,%r13d - movl %r9d,%edx + andl %r8d,%edx + andl %r10d,%r15d - andl %r15d,%edi rorl $2,%r14d addl %r13d,%r12d + addl %r15d,%edx - xorl %edi,%edx addl %r12d,%r11d addl %r12d,%edx + leaq 1(%rdi),%rdi + addl %r14d,%edx - leaq 4(%rbp),%rbp movl 56(%rsp),%r13d - movl 44(%rsp),%edi - + movl 44(%rsp),%r14d movl %r13d,%r12d - rorl $11,%r13d - addl %r14d,%edx - movl %edi,%r14d - rorl $2,%edi + movl %r14d,%r15d + + rorl $11,%r12d + xorl %r13d,%r12d + shrl $3,%r13d + rorl $7,%r12d xorl %r12d,%r13d - shrl $3,%r12d - rorl $7,%r13d - xorl %r14d,%edi + movl 24(%rsp),%r12d + + rorl $2,%r15d + xorl %r14d,%r15d shrl $10,%r14d - rorl $17,%edi - xorl %r13d,%r12d - xorl %r14d,%edi - addl 24(%rsp),%r12d + rorl $17,%r15d + addl %r13d,%r12d + xorl %r15d,%r14d addl 52(%rsp),%r12d movl %r11d,%r13d - addl %edi,%r12d + addl %r14d,%r12d movl %edx,%r14d rorl $14,%r13d - movl %eax,%edi + movl %eax,%r15d + movl %r12d,52(%rsp) - xorl %r11d,%r13d rorl $9,%r14d - xorl %ebx,%edi - - movl %r12d,52(%rsp) - xorl %edx,%r14d - andl %r11d,%edi + xorl %r11d,%r13d + xorl %ebx,%r15d rorl $5,%r13d addl %ecx,%r12d - xorl %ebx,%edi + xorl %edx,%r14d + + addl (%rbp,%rdi,4),%r12d + andl %r11d,%r15d + movl %r8d,%ecx rorl $11,%r14d xorl %r11d,%r13d - addl %edi,%r12d + xorl %ebx,%r15d - movl %edx,%edi - addl (%rbp),%r12d + xorl %r9d,%ecx xorl %edx,%r14d + addl %r15d,%r12d + movl %r8d,%r15d - xorl %r8d,%edi rorl $6,%r13d - movl %r8d,%ecx + andl %edx,%ecx + andl %r9d,%r15d - andl %edi,%r15d rorl $2,%r14d addl %r13d,%r12d + addl %r15d,%ecx - xorl %r15d,%ecx addl %r12d,%r10d addl %r12d,%ecx + leaq 1(%rdi),%rdi + addl %r14d,%ecx - leaq 4(%rbp),%rbp movl 60(%rsp),%r13d - movl 48(%rsp),%r15d - + movl 48(%rsp),%r14d movl %r13d,%r12d - rorl $11,%r13d - addl %r14d,%ecx - movl %r15d,%r14d - rorl $2,%r15d + movl %r14d,%r15d + + rorl $11,%r12d + xorl %r13d,%r12d + shrl $3,%r13d + rorl $7,%r12d xorl %r12d,%r13d - shrl $3,%r12d - rorl $7,%r13d + movl 28(%rsp),%r12d + + rorl $2,%r15d xorl %r14d,%r15d shrl $10,%r14d rorl $17,%r15d - xorl %r13d,%r12d - xorl %r14d,%r15d - addl 28(%rsp),%r12d + addl %r13d,%r12d + xorl %r15d,%r14d addl 56(%rsp),%r12d movl %r10d,%r13d - addl %r15d,%r12d + addl %r14d,%r12d movl %ecx,%r14d rorl $14,%r13d movl %r11d,%r15d + movl %r12d,56(%rsp) - xorl %r10d,%r13d rorl $9,%r14d + xorl %r10d,%r13d xorl %eax,%r15d - movl %r12d,56(%rsp) - xorl %ecx,%r14d - andl %r10d,%r15d - - rorl $5,%r13d - addl %ebx,%r12d - xorl %eax,%r15d - - rorl $11,%r14d - xorl %r10d,%r13d - addl %r15d,%r12d - - movl %ecx,%r15d - addl (%rbp),%r12d - xorl %ecx,%r14d - - xorl %edx,%r15d - rorl $6,%r13d - movl %edx,%ebx - - andl %r15d,%edi - rorl $2,%r14d - addl %r13d,%r12d - - xorl %edi,%ebx - addl %r12d,%r9d - addl %r12d,%ebx - - leaq 4(%rbp),%rbp - movl 0(%rsp),%r13d - movl 52(%rsp),%edi - - movl %r13d,%r12d - rorl $11,%r13d - addl %r14d,%ebx - movl %edi,%r14d - rorl $2,%edi - - xorl %r12d,%r13d - shrl $3,%r12d - rorl $7,%r13d - xorl %r14d,%edi - shrl $10,%r14d - - rorl $17,%edi - xorl %r13d,%r12d - xorl %r14d,%edi - addl 32(%rsp),%r12d - - addl 60(%rsp),%r12d - movl %r9d,%r13d - addl %edi,%r12d - movl %ebx,%r14d - rorl $14,%r13d - movl %r10d,%edi - - xorl %r9d,%r13d - rorl $9,%r14d - xorl %r11d,%edi - - movl %r12d,60(%rsp) - xorl %ebx,%r14d - andl %r9d,%edi - - rorl $5,%r13d - addl %eax,%r12d - xorl %r11d,%edi - - rorl $11,%r14d - xorl %r9d,%r13d - addl %edi,%r12d - - movl %ebx,%edi - addl (%rbp),%r12d - xorl %ebx,%r14d - - xorl %ecx,%edi - rorl $6,%r13d - movl %ecx,%eax - - andl %edi,%r15d - rorl $2,%r14d - addl %r13d,%r12d - - xorl %r15d,%eax - addl %r12d,%r8d - addl %r12d,%eax - - leaq 20(%rbp),%rbp - cmpb $0,3(%rbp) - jnz .Lrounds_16_xx - - movq 64+0(%rsp),%rdi - addl %r14d,%eax - leaq 64(%rsi),%rsi - - addl 0(%rdi),%eax - addl 4(%rdi),%ebx - addl 8(%rdi),%ecx - addl 12(%rdi),%edx - addl 16(%rdi),%r8d - addl 20(%rdi),%r9d - addl 24(%rdi),%r10d - addl 28(%rdi),%r11d - - cmpq 64+16(%rsp),%rsi - - movl %eax,0(%rdi) - movl %ebx,4(%rdi) - movl %ecx,8(%rdi) - movl %edx,12(%rdi) - movl %r8d,16(%rdi) - movl %r9d,20(%rdi) - movl %r10d,24(%rdi) - movl %r11d,28(%rdi) - jb .Lloop - - movq 64+24(%rsp),%rsi - movq (%rsi),%r15 - movq 8(%rsi),%r14 - movq 16(%rsi),%r13 - movq 24(%rsi),%r12 - movq 32(%rsi),%rbp - movq 40(%rsi),%rbx - leaq 48(%rsi),%rsp -.Lepilogue: - movq 8(%rsp),%rdi - movq 16(%rsp),%rsi - .byte 0xf3,0xc3 -.LSEH_end_sha256_block_data_order: -.p2align 6 - -K256: -.long 0x428a2f98,0x71374491,0xb5c0fbcf,0xe9b5dba5 -.long 0x428a2f98,0x71374491,0xb5c0fbcf,0xe9b5dba5 -.long 0x3956c25b,0x59f111f1,0x923f82a4,0xab1c5ed5 -.long 0x3956c25b,0x59f111f1,0x923f82a4,0xab1c5ed5 -.long 0xd807aa98,0x12835b01,0x243185be,0x550c7dc3 -.long 0xd807aa98,0x12835b01,0x243185be,0x550c7dc3 -.long 0x72be5d74,0x80deb1fe,0x9bdc06a7,0xc19bf174 -.long 0x72be5d74,0x80deb1fe,0x9bdc06a7,0xc19bf174 -.long 0xe49b69c1,0xefbe4786,0x0fc19dc6,0x240ca1cc -.long 0xe49b69c1,0xefbe4786,0x0fc19dc6,0x240ca1cc -.long 0x2de92c6f,0x4a7484aa,0x5cb0a9dc,0x76f988da -.long 0x2de92c6f,0x4a7484aa,0x5cb0a9dc,0x76f988da -.long 0x983e5152,0xa831c66d,0xb00327c8,0xbf597fc7 -.long 0x983e5152,0xa831c66d,0xb00327c8,0xbf597fc7 -.long 0xc6e00bf3,0xd5a79147,0x06ca6351,0x14292967 -.long 0xc6e00bf3,0xd5a79147,0x06ca6351,0x14292967 -.long 0x27b70a85,0x2e1b2138,0x4d2c6dfc,0x53380d13 -.long 0x27b70a85,0x2e1b2138,0x4d2c6dfc,0x53380d13 -.long 0x650a7354,0x766a0abb,0x81c2c92e,0x92722c85 -.long 0x650a7354,0x766a0abb,0x81c2c92e,0x92722c85 -.long 0xa2bfe8a1,0xa81a664b,0xc24b8b70,0xc76c51a3 -.long 0xa2bfe8a1,0xa81a664b,0xc24b8b70,0xc76c51a3 -.long 0xd192e819,0xd6990624,0xf40e3585,0x106aa070 -.long 0xd192e819,0xd6990624,0xf40e3585,0x106aa070 -.long 0x19a4c116,0x1e376c08,0x2748774c,0x34b0bcb5 -.long 0x19a4c116,0x1e376c08,0x2748774c,0x34b0bcb5 -.long 0x391c0cb3,0x4ed8aa4a,0x5b9cca4f,0x682e6ff3 -.long 0x391c0cb3,0x4ed8aa4a,0x5b9cca4f,0x682e6ff3 -.long 0x748f82ee,0x78a5636f,0x84c87814,0x8cc70208 -.long 0x748f82ee,0x78a5636f,0x84c87814,0x8cc70208 -.long 0x90befffa,0xa4506ceb,0xbef9a3f7,0xc67178f2 -.long 0x90befffa,0xa4506ceb,0xbef9a3f7,0xc67178f2 - -.long 0x00010203,0x04050607,0x08090a0b,0x0c0d0e0f -.long 0x00010203,0x04050607,0x08090a0b,0x0c0d0e0f -.long 0x03020100,0x0b0a0908,0xffffffff,0xffffffff -.long 0x03020100,0x0b0a0908,0xffffffff,0xffffffff -.long 0xffffffff,0xffffffff,0x03020100,0x0b0a0908 -.long 0xffffffff,0xffffffff,0x03020100,0x0b0a0908 -.byte 83,72,65,50,53,54,32,98,108,111,99,107,32,116,114,97,110,115,102,111,114,109,32,102,111,114,32,120,56,54,95,54,52,44,32,67,82,89,80,84,79,71,65,77,83,32,98,121,32,60,97,112,112,114,111,64,111,112,101,110,115,115,108,46,111,114,103,62,0 -.def sha256_block_data_order_ssse3; .scl 3; .type 32; .endef -.p2align 6 -sha256_block_data_order_ssse3: - movq %rdi,8(%rsp) - movq %rsi,16(%rsp) - movq %rsp,%rax -.LSEH_begin_sha256_block_data_order_ssse3: - movq %rcx,%rdi - movq %rdx,%rsi - movq %r8,%rdx - -.Lssse3_shortcut: - pushq %rbx - pushq %rbp - pushq %r12 - pushq %r13 - pushq %r14 - pushq %r15 - movq %rsp,%r11 - shlq $4,%rdx - subq $160,%rsp - leaq (%rsi,%rdx,4),%rdx - andq $-64,%rsp - movq %rdi,64+0(%rsp) - movq %rsi,64+8(%rsp) - movq %rdx,64+16(%rsp) - movq %r11,64+24(%rsp) - movaps %xmm6,64+32(%rsp) - movaps %xmm7,64+48(%rsp) - movaps %xmm8,64+64(%rsp) - movaps %xmm9,64+80(%rsp) -.Lprologue_ssse3: - - movl 0(%rdi),%eax - movl 4(%rdi),%ebx - movl 8(%rdi),%ecx - movl 12(%rdi),%edx - movl 16(%rdi),%r8d - movl 20(%rdi),%r9d - movl 24(%rdi),%r10d - movl 28(%rdi),%r11d - - - jmp .Lloop_ssse3 -.p2align 4 -.Lloop_ssse3: - movdqa K256+512(%rip),%xmm7 - movdqu 0(%rsi),%xmm0 - movdqu 16(%rsi),%xmm1 - movdqu 32(%rsi),%xmm2 - movdqu 48(%rsi),%xmm3 -.byte 102,15,56,0,199 - leaq K256(%rip),%rbp -.byte 102,15,56,0,207 - movdqa 0(%rbp),%xmm4 -.byte 102,15,56,0,215 - movdqa 32(%rbp),%xmm5 - paddd %xmm0,%xmm4 - movdqa 64(%rbp),%xmm6 -.byte 102,15,56,0,223 - movdqa 96(%rbp),%xmm7 - paddd %xmm1,%xmm5 - paddd %xmm2,%xmm6 - paddd %xmm3,%xmm7 - movdqa %xmm4,0(%rsp) - movl %eax,%r14d - movdqa %xmm5,16(%rsp) - movl %ebx,%edi - movdqa %xmm6,32(%rsp) - xorl %ecx,%edi - movdqa %xmm7,48(%rsp) - movl %r8d,%r13d - jmp .Lssse3_00_47 - -.p2align 4 -.Lssse3_00_47: - subq $-32*4,%rbp - rorl $14,%r13d - movdqa %xmm1,%xmm4 - movl %r14d,%eax - movl %r9d,%r12d - movdqa %xmm3,%xmm7 - rorl $9,%r14d - xorl %r8d,%r13d - xorl %r10d,%r12d - rorl $5,%r13d - xorl %eax,%r14d -.byte 102,15,58,15,224,4 - andl %r8d,%r12d - xorl %r8d,%r13d -.byte 102,15,58,15,250,4 - addl 0(%rsp),%r11d - movl %eax,%r15d - xorl %r10d,%r12d - rorl $11,%r14d - movdqa %xmm4,%xmm5 - xorl %ebx,%r15d - addl %r12d,%r11d - movdqa %xmm4,%xmm6 - rorl $6,%r13d - andl %r15d,%edi - psrld $3,%xmm4 - xorl %eax,%r14d - addl %r13d,%r11d - xorl %ebx,%edi - paddd %xmm7,%xmm0 - rorl $2,%r14d - addl %r11d,%edx - psrld $7,%xmm6 - addl %edi,%r11d - movl %edx,%r13d - pshufd $250,%xmm3,%xmm7 - addl %r11d,%r14d - rorl $14,%r13d - pslld $14,%xmm5 - movl %r14d,%r11d - movl %r8d,%r12d - pxor %xmm6,%xmm4 - rorl $9,%r14d - xorl %edx,%r13d - xorl %r9d,%r12d - rorl $5,%r13d - psrld $11,%xmm6 - xorl %r11d,%r14d - pxor %xmm5,%xmm4 - andl %edx,%r12d - xorl %edx,%r13d - pslld $11,%xmm5 - addl 4(%rsp),%r10d - movl %r11d,%edi - pxor %xmm6,%xmm4 - xorl %r9d,%r12d - rorl $11,%r14d - movdqa %xmm7,%xmm6 - xorl %eax,%edi - addl %r12d,%r10d - pxor %xmm5,%xmm4 - rorl $6,%r13d - andl %edi,%r15d - xorl %r11d,%r14d - psrld $10,%xmm7 - addl %r13d,%r10d - xorl %eax,%r15d - paddd %xmm4,%xmm0 - rorl $2,%r14d - addl %r10d,%ecx - psrlq $17,%xmm6 - addl %r15d,%r10d - movl %ecx,%r13d - addl %r10d,%r14d - pxor %xmm6,%xmm7 - rorl $14,%r13d - movl %r14d,%r10d - movl %edx,%r12d - rorl $9,%r14d - psrlq $2,%xmm6 - xorl %ecx,%r13d - xorl %r8d,%r12d - pxor %xmm6,%xmm7 - rorl $5,%r13d - xorl %r10d,%r14d - andl %ecx,%r12d - pshufd $128,%xmm7,%xmm7 - xorl %ecx,%r13d - addl 8(%rsp),%r9d - movl %r10d,%r15d - psrldq $8,%xmm7 - xorl %r8d,%r12d - rorl $11,%r14d - xorl %r11d,%r15d - addl %r12d,%r9d - rorl $6,%r13d - paddd %xmm7,%xmm0 - andl %r15d,%edi - xorl %r10d,%r14d - addl %r13d,%r9d - pshufd $80,%xmm0,%xmm7 - xorl %r11d,%edi - rorl $2,%r14d - addl %r9d,%ebx - movdqa %xmm7,%xmm6 - addl %edi,%r9d - movl %ebx,%r13d - psrld $10,%xmm7 - addl %r9d,%r14d - rorl $14,%r13d - psrlq $17,%xmm6 - movl %r14d,%r9d - movl %ecx,%r12d - pxor %xmm6,%xmm7 - rorl $9,%r14d - xorl %ebx,%r13d - xorl %edx,%r12d - rorl $5,%r13d - xorl %r9d,%r14d - psrlq $2,%xmm6 - andl %ebx,%r12d - xorl %ebx,%r13d - addl 12(%rsp),%r8d - pxor %xmm6,%xmm7 - movl %r9d,%edi - xorl %edx,%r12d - rorl $11,%r14d - pshufd $8,%xmm7,%xmm7 - xorl %r10d,%edi - addl %r12d,%r8d - movdqa 0(%rbp),%xmm6 - rorl $6,%r13d - andl %edi,%r15d - pslldq $8,%xmm7 - xorl %r9d,%r14d - addl %r13d,%r8d - xorl %r10d,%r15d - paddd %xmm7,%xmm0 - rorl $2,%r14d - addl %r8d,%eax - addl %r15d,%r8d - paddd %xmm0,%xmm6 - movl %eax,%r13d - addl %r8d,%r14d - movdqa %xmm6,0(%rsp) - rorl $14,%r13d - movdqa %xmm2,%xmm4 - movl %r14d,%r8d - movl %ebx,%r12d - movdqa %xmm0,%xmm7 - rorl $9,%r14d - xorl %eax,%r13d - xorl %ecx,%r12d - rorl $5,%r13d - xorl %r8d,%r14d -.byte 102,15,58,15,225,4 - andl %eax,%r12d - xorl %eax,%r13d -.byte 102,15,58,15,251,4 - addl 16(%rsp),%edx - movl %r8d,%r15d - xorl %ecx,%r12d - rorl $11,%r14d - movdqa %xmm4,%xmm5 - xorl %r9d,%r15d - addl %r12d,%edx - movdqa %xmm4,%xmm6 - rorl $6,%r13d - andl %r15d,%edi - psrld $3,%xmm4 - xorl %r8d,%r14d - addl %r13d,%edx - xorl %r9d,%edi - paddd %xmm7,%xmm1 - rorl $2,%r14d - addl %edx,%r11d - psrld $7,%xmm6 - addl %edi,%edx - movl %r11d,%r13d - pshufd $250,%xmm0,%xmm7 - addl %edx,%r14d - rorl $14,%r13d - pslld $14,%xmm5 - movl %r14d,%edx - movl %eax,%r12d - pxor %xmm6,%xmm4 - rorl $9,%r14d - xorl %r11d,%r13d - xorl %ebx,%r12d - rorl $5,%r13d - psrld $11,%xmm6 - xorl %edx,%r14d - pxor %xmm5,%xmm4 - andl %r11d,%r12d - xorl %r11d,%r13d - pslld $11,%xmm5 - addl 20(%rsp),%ecx - movl %edx,%edi - pxor %xmm6,%xmm4 - xorl %ebx,%r12d - rorl $11,%r14d - movdqa %xmm7,%xmm6 - xorl %r8d,%edi - addl %r12d,%ecx - pxor %xmm5,%xmm4 - rorl $6,%r13d - andl %edi,%r15d - xorl %edx,%r14d - psrld $10,%xmm7 - addl %r13d,%ecx - xorl %r8d,%r15d - paddd %xmm4,%xmm1 - rorl $2,%r14d - addl %ecx,%r10d - psrlq $17,%xmm6 - addl %r15d,%ecx - movl %r10d,%r13d - addl %ecx,%r14d - pxor %xmm6,%xmm7 - rorl $14,%r13d - movl %r14d,%ecx - movl %r11d,%r12d - rorl $9,%r14d - psrlq $2,%xmm6 - xorl %r10d,%r13d - xorl %eax,%r12d - pxor %xmm6,%xmm7 - rorl $5,%r13d - xorl %ecx,%r14d - andl %r10d,%r12d - pshufd $128,%xmm7,%xmm7 - xorl %r10d,%r13d - addl 24(%rsp),%ebx - movl %ecx,%r15d - psrldq $8,%xmm7 - xorl %eax,%r12d - rorl $11,%r14d - xorl %edx,%r15d - addl %r12d,%ebx - rorl $6,%r13d - paddd %xmm7,%xmm1 - andl %r15d,%edi - xorl %ecx,%r14d - addl %r13d,%ebx - pshufd $80,%xmm1,%xmm7 - xorl %edx,%edi - rorl $2,%r14d - addl %ebx,%r9d - movdqa %xmm7,%xmm6 - addl %edi,%ebx - movl %r9d,%r13d - psrld $10,%xmm7 - addl %ebx,%r14d - rorl $14,%r13d - psrlq $17,%xmm6 - movl %r14d,%ebx - movl %r10d,%r12d - pxor %xmm6,%xmm7 - rorl $9,%r14d - xorl %r9d,%r13d - xorl %r11d,%r12d - rorl $5,%r13d - xorl %ebx,%r14d - psrlq $2,%xmm6 - andl %r9d,%r12d - xorl %r9d,%r13d - addl 28(%rsp),%eax - pxor %xmm6,%xmm7 - movl %ebx,%edi - xorl %r11d,%r12d - rorl $11,%r14d - pshufd $8,%xmm7,%xmm7 - xorl %ecx,%edi - addl %r12d,%eax - movdqa 32(%rbp),%xmm6 - rorl $6,%r13d - andl %edi,%r15d - pslldq $8,%xmm7 - xorl %ebx,%r14d - addl %r13d,%eax - xorl %ecx,%r15d - paddd %xmm7,%xmm1 - rorl $2,%r14d - addl %eax,%r8d - addl %r15d,%eax - paddd %xmm1,%xmm6 - movl %r8d,%r13d - addl %eax,%r14d - movdqa %xmm6,16(%rsp) - rorl $14,%r13d - movdqa %xmm3,%xmm4 - movl %r14d,%eax - movl %r9d,%r12d - movdqa %xmm1,%xmm7 - rorl $9,%r14d - xorl %r8d,%r13d - xorl %r10d,%r12d - rorl $5,%r13d - xorl %eax,%r14d -.byte 102,15,58,15,226,4 - andl %r8d,%r12d - xorl %r8d,%r13d -.byte 102,15,58,15,248,4 - addl 32(%rsp),%r11d - movl %eax,%r15d - xorl %r10d,%r12d - rorl $11,%r14d - movdqa %xmm4,%xmm5 - xorl %ebx,%r15d - addl %r12d,%r11d - movdqa %xmm4,%xmm6 - rorl $6,%r13d - andl %r15d,%edi - psrld $3,%xmm4 - xorl %eax,%r14d - addl %r13d,%r11d - xorl %ebx,%edi - paddd %xmm7,%xmm2 - rorl $2,%r14d - addl %r11d,%edx - psrld $7,%xmm6 - addl %edi,%r11d - movl %edx,%r13d - pshufd $250,%xmm1,%xmm7 - addl %r11d,%r14d - rorl $14,%r13d - pslld $14,%xmm5 - movl %r14d,%r11d - movl %r8d,%r12d - pxor %xmm6,%xmm4 - rorl $9,%r14d - xorl %edx,%r13d - xorl %r9d,%r12d - rorl $5,%r13d - psrld $11,%xmm6 - xorl %r11d,%r14d - pxor %xmm5,%xmm4 - andl %edx,%r12d - xorl %edx,%r13d - pslld $11,%xmm5 - addl 36(%rsp),%r10d - movl %r11d,%edi - pxor %xmm6,%xmm4 - xorl %r9d,%r12d - rorl $11,%r14d - movdqa %xmm7,%xmm6 - xorl %eax,%edi - addl %r12d,%r10d - pxor %xmm5,%xmm4 - rorl $6,%r13d - andl %edi,%r15d - xorl %r11d,%r14d - psrld $10,%xmm7 - addl %r13d,%r10d - xorl %eax,%r15d - paddd %xmm4,%xmm2 - rorl $2,%r14d - addl %r10d,%ecx - psrlq $17,%xmm6 - addl %r15d,%r10d - movl %ecx,%r13d - addl %r10d,%r14d - pxor %xmm6,%xmm7 - rorl $14,%r13d - movl %r14d,%r10d - movl %edx,%r12d - rorl $9,%r14d - psrlq $2,%xmm6 - xorl %ecx,%r13d - xorl %r8d,%r12d - pxor %xmm6,%xmm7 - rorl $5,%r13d - xorl %r10d,%r14d - andl %ecx,%r12d - pshufd $128,%xmm7,%xmm7 - xorl %ecx,%r13d - addl 40(%rsp),%r9d - movl %r10d,%r15d - psrldq $8,%xmm7 - xorl %r8d,%r12d - rorl $11,%r14d - xorl %r11d,%r15d - addl %r12d,%r9d - rorl $6,%r13d - paddd %xmm7,%xmm2 - andl %r15d,%edi - xorl %r10d,%r14d - addl %r13d,%r9d - pshufd $80,%xmm2,%xmm7 - xorl %r11d,%edi - rorl $2,%r14d - addl %r9d,%ebx - movdqa %xmm7,%xmm6 - addl %edi,%r9d - movl %ebx,%r13d - psrld $10,%xmm7 - addl %r9d,%r14d - rorl $14,%r13d - psrlq $17,%xmm6 - movl %r14d,%r9d - movl %ecx,%r12d - pxor %xmm6,%xmm7 - rorl $9,%r14d - xorl %ebx,%r13d - xorl %edx,%r12d - rorl $5,%r13d - xorl %r9d,%r14d - psrlq $2,%xmm6 - andl %ebx,%r12d - xorl %ebx,%r13d - addl 44(%rsp),%r8d - pxor %xmm6,%xmm7 - movl %r9d,%edi - xorl %edx,%r12d - rorl $11,%r14d - pshufd $8,%xmm7,%xmm7 - xorl %r10d,%edi - addl %r12d,%r8d - movdqa 64(%rbp),%xmm6 - rorl $6,%r13d - andl %edi,%r15d - pslldq $8,%xmm7 - xorl %r9d,%r14d - addl %r13d,%r8d - xorl %r10d,%r15d - paddd %xmm7,%xmm2 - rorl $2,%r14d - addl %r8d,%eax - addl %r15d,%r8d - paddd %xmm2,%xmm6 - movl %eax,%r13d - addl %r8d,%r14d - movdqa %xmm6,32(%rsp) - rorl $14,%r13d - movdqa %xmm0,%xmm4 - movl %r14d,%r8d - movl %ebx,%r12d - movdqa %xmm2,%xmm7 - rorl $9,%r14d - xorl %eax,%r13d - xorl %ecx,%r12d - rorl $5,%r13d - xorl %r8d,%r14d -.byte 102,15,58,15,227,4 - andl %eax,%r12d - xorl %eax,%r13d -.byte 102,15,58,15,249,4 - addl 48(%rsp),%edx - movl %r8d,%r15d - xorl %ecx,%r12d - rorl $11,%r14d - movdqa %xmm4,%xmm5 - xorl %r9d,%r15d - addl %r12d,%edx - movdqa %xmm4,%xmm6 - rorl $6,%r13d - andl %r15d,%edi - psrld $3,%xmm4 - xorl %r8d,%r14d - addl %r13d,%edx - xorl %r9d,%edi - paddd %xmm7,%xmm3 - rorl $2,%r14d - addl %edx,%r11d - psrld $7,%xmm6 - addl %edi,%edx - movl %r11d,%r13d - pshufd $250,%xmm2,%xmm7 - addl %edx,%r14d - rorl $14,%r13d - pslld $14,%xmm5 - movl %r14d,%edx - movl %eax,%r12d - pxor %xmm6,%xmm4 - rorl $9,%r14d - xorl %r11d,%r13d - xorl %ebx,%r12d - rorl $5,%r13d - psrld $11,%xmm6 - xorl %edx,%r14d - pxor %xmm5,%xmm4 - andl %r11d,%r12d - xorl %r11d,%r13d - pslld $11,%xmm5 - addl 52(%rsp),%ecx - movl %edx,%edi - pxor %xmm6,%xmm4 - xorl %ebx,%r12d - rorl $11,%r14d - movdqa %xmm7,%xmm6 - xorl %r8d,%edi - addl %r12d,%ecx - pxor %xmm5,%xmm4 - rorl $6,%r13d - andl %edi,%r15d - xorl %edx,%r14d - psrld $10,%xmm7 - addl %r13d,%ecx - xorl %r8d,%r15d - paddd %xmm4,%xmm3 - rorl $2,%r14d - addl %ecx,%r10d - psrlq $17,%xmm6 - addl %r15d,%ecx - movl %r10d,%r13d - addl %ecx,%r14d - pxor %xmm6,%xmm7 - rorl $14,%r13d - movl %r14d,%ecx - movl %r11d,%r12d - rorl $9,%r14d - psrlq $2,%xmm6 - xorl %r10d,%r13d - xorl %eax,%r12d - pxor %xmm6,%xmm7 - rorl $5,%r13d - xorl %ecx,%r14d - andl %r10d,%r12d - pshufd $128,%xmm7,%xmm7 - xorl %r10d,%r13d - addl 56(%rsp),%ebx - movl %ecx,%r15d - psrldq $8,%xmm7 - xorl %eax,%r12d - rorl $11,%r14d - xorl %edx,%r15d - addl %r12d,%ebx - rorl $6,%r13d - paddd %xmm7,%xmm3 - andl %r15d,%edi - xorl %ecx,%r14d - addl %r13d,%ebx - pshufd $80,%xmm3,%xmm7 - xorl %edx,%edi - rorl $2,%r14d - addl %ebx,%r9d - movdqa %xmm7,%xmm6 - addl %edi,%ebx - movl %r9d,%r13d - psrld $10,%xmm7 - addl %ebx,%r14d - rorl $14,%r13d - psrlq $17,%xmm6 - movl %r14d,%ebx - movl %r10d,%r12d - pxor %xmm6,%xmm7 - rorl $9,%r14d - xorl %r9d,%r13d - xorl %r11d,%r12d - rorl $5,%r13d - xorl %ebx,%r14d - psrlq $2,%xmm6 - andl %r9d,%r12d - xorl %r9d,%r13d - addl 60(%rsp),%eax - pxor %xmm6,%xmm7 - movl %ebx,%edi - xorl %r11d,%r12d - rorl $11,%r14d - pshufd $8,%xmm7,%xmm7 - xorl %ecx,%edi - addl %r12d,%eax - movdqa 96(%rbp),%xmm6 - rorl $6,%r13d - andl %edi,%r15d - pslldq $8,%xmm7 - xorl %ebx,%r14d - addl %r13d,%eax - xorl %ecx,%r15d - paddd %xmm7,%xmm3 - rorl $2,%r14d - addl %eax,%r8d - addl %r15d,%eax - paddd %xmm3,%xmm6 - movl %r8d,%r13d - addl %eax,%r14d - movdqa %xmm6,48(%rsp) - cmpb $0,131(%rbp) - jne .Lssse3_00_47 - rorl $14,%r13d - movl %r14d,%eax - movl %r9d,%r12d - rorl $9,%r14d - xorl %r8d,%r13d - xorl %r10d,%r12d - rorl $5,%r13d - xorl %eax,%r14d - andl %r8d,%r12d - xorl %r8d,%r13d - addl 0(%rsp),%r11d - movl %eax,%r15d - xorl %r10d,%r12d - rorl $11,%r14d - xorl %ebx,%r15d - addl %r12d,%r11d - rorl $6,%r13d - andl %r15d,%edi - xorl %eax,%r14d - addl %r13d,%r11d - xorl %ebx,%edi - rorl $2,%r14d - addl %r11d,%edx - addl %edi,%r11d - movl %edx,%r13d - addl %r11d,%r14d - rorl $14,%r13d - movl %r14d,%r11d - movl %r8d,%r12d - rorl $9,%r14d - xorl %edx,%r13d - xorl %r9d,%r12d - rorl $5,%r13d - xorl %r11d,%r14d - andl %edx,%r12d - xorl %edx,%r13d - addl 4(%rsp),%r10d - movl %r11d,%edi - xorl %r9d,%r12d - rorl $11,%r14d - xorl %eax,%edi - addl %r12d,%r10d - rorl $6,%r13d - andl %edi,%r15d - xorl %r11d,%r14d - addl %r13d,%r10d - xorl %eax,%r15d - rorl $2,%r14d - addl %r10d,%ecx - addl %r15d,%r10d - movl %ecx,%r13d - addl %r10d,%r14d - rorl $14,%r13d - movl %r14d,%r10d - movl %edx,%r12d - rorl $9,%r14d - xorl %ecx,%r13d - xorl %r8d,%r12d - rorl $5,%r13d - xorl %r10d,%r14d - andl %ecx,%r12d - xorl %ecx,%r13d - addl 8(%rsp),%r9d - movl %r10d,%r15d - xorl %r8d,%r12d - rorl $11,%r14d - xorl %r11d,%r15d - addl %r12d,%r9d - rorl $6,%r13d - andl %r15d,%edi - xorl %r10d,%r14d - addl %r13d,%r9d - xorl %r11d,%edi - rorl $2,%r14d - addl %r9d,%ebx - addl %edi,%r9d - movl %ebx,%r13d - addl %r9d,%r14d - rorl $14,%r13d - movl %r14d,%r9d - movl %ecx,%r12d - rorl $9,%r14d - xorl %ebx,%r13d - xorl %edx,%r12d - rorl $5,%r13d - xorl %r9d,%r14d - andl %ebx,%r12d - xorl %ebx,%r13d - addl 12(%rsp),%r8d - movl %r9d,%edi - xorl %edx,%r12d - rorl $11,%r14d - xorl %r10d,%edi - addl %r12d,%r8d - rorl $6,%r13d - andl %edi,%r15d - xorl %r9d,%r14d - addl %r13d,%r8d - xorl %r10d,%r15d - rorl $2,%r14d - addl %r8d,%eax - addl %r15d,%r8d - movl %eax,%r13d - addl %r8d,%r14d - rorl $14,%r13d - movl %r14d,%r8d - movl %ebx,%r12d - rorl $9,%r14d - xorl %eax,%r13d - xorl %ecx,%r12d - rorl $5,%r13d - xorl %r8d,%r14d - andl %eax,%r12d - xorl %eax,%r13d - addl 16(%rsp),%edx - movl %r8d,%r15d - xorl %ecx,%r12d - rorl $11,%r14d - xorl %r9d,%r15d - addl %r12d,%edx - rorl $6,%r13d - andl %r15d,%edi - xorl %r8d,%r14d - addl %r13d,%edx - xorl %r9d,%edi - rorl $2,%r14d - addl %edx,%r11d - addl %edi,%edx - movl %r11d,%r13d - addl %edx,%r14d - rorl $14,%r13d - movl %r14d,%edx - movl %eax,%r12d - rorl $9,%r14d - xorl %r11d,%r13d - xorl %ebx,%r12d - rorl $5,%r13d - xorl %edx,%r14d - andl %r11d,%r12d - xorl %r11d,%r13d - addl 20(%rsp),%ecx - movl %edx,%edi - xorl %ebx,%r12d - rorl $11,%r14d - xorl %r8d,%edi - addl %r12d,%ecx - rorl $6,%r13d - andl %edi,%r15d - xorl %edx,%r14d - addl %r13d,%ecx - xorl %r8d,%r15d - rorl $2,%r14d - addl %ecx,%r10d - addl %r15d,%ecx - movl %r10d,%r13d - addl %ecx,%r14d - rorl $14,%r13d - movl %r14d,%ecx - movl %r11d,%r12d - rorl $9,%r14d - xorl %r10d,%r13d - xorl %eax,%r12d rorl $5,%r13d + addl %ebx,%r12d xorl %ecx,%r14d - andl %r10d,%r12d - xorl %r10d,%r13d - addl 24(%rsp),%ebx - movl %ecx,%r15d - xorl %eax,%r12d + + addl (%rbp,%rdi,4),%r12d + andl %r10d,%r15d + movl %edx,%ebx + rorl $11,%r14d - xorl %edx,%r15d - addl %r12d,%ebx - rorl $6,%r13d - andl %r15d,%edi + xorl %r10d,%r13d + xorl %eax,%r15d + + xorl %r8d,%ebx xorl %ecx,%r14d - addl %r13d,%ebx - xorl %edx,%edi - rorl $2,%r14d - addl %ebx,%r9d - addl %edi,%ebx - movl %r9d,%r13d - addl %ebx,%r14d - rorl $14,%r13d - movl %r14d,%ebx - movl %r10d,%r12d - rorl $9,%r14d - xorl %r9d,%r13d - xorl %r11d,%r12d - rorl $5,%r13d - xorl %ebx,%r14d - andl %r9d,%r12d - xorl %r9d,%r13d - addl 28(%rsp),%eax - movl %ebx,%edi - xorl %r11d,%r12d - rorl $11,%r14d - xorl %ecx,%edi - addl %r12d,%eax - rorl $6,%r13d - andl %edi,%r15d - xorl %ebx,%r14d - addl %r13d,%eax - xorl %ecx,%r15d - rorl $2,%r14d - addl %eax,%r8d - addl %r15d,%eax - movl %r8d,%r13d - addl %eax,%r14d - rorl $14,%r13d - movl %r14d,%eax - movl %r9d,%r12d - rorl $9,%r14d - xorl %r8d,%r13d - xorl %r10d,%r12d - rorl $5,%r13d - xorl %eax,%r14d - andl %r8d,%r12d - xorl %r8d,%r13d - addl 32(%rsp),%r11d - movl %eax,%r15d - xorl %r10d,%r12d - rorl $11,%r14d - xorl %ebx,%r15d - addl %r12d,%r11d - rorl $6,%r13d - andl %r15d,%edi - xorl %eax,%r14d - addl %r13d,%r11d - xorl %ebx,%edi - rorl $2,%r14d - addl %r11d,%edx - addl %edi,%r11d - movl %edx,%r13d - addl %r11d,%r14d - rorl $14,%r13d - movl %r14d,%r11d - movl %r8d,%r12d - rorl $9,%r14d - xorl %edx,%r13d - xorl %r9d,%r12d - rorl $5,%r13d - xorl %r11d,%r14d - andl %edx,%r12d - xorl %edx,%r13d - addl 36(%rsp),%r10d - movl %r11d,%edi - xorl %r9d,%r12d - rorl $11,%r14d - xorl %eax,%edi - addl %r12d,%r10d + addl %r15d,%r12d + movl %edx,%r15d + rorl $6,%r13d - andl %edi,%r15d - xorl %r11d,%r14d - addl %r13d,%r10d - xorl %eax,%r15d + andl %ecx,%ebx + andl %r8d,%r15d + rorl $2,%r14d - addl %r10d,%ecx - addl %r15d,%r10d - movl %ecx,%r13d - addl %r10d,%r14d - rorl $14,%r13d - movl %r14d,%r10d - movl %edx,%r12d - rorl $9,%r14d - xorl %ecx,%r13d - xorl %r8d,%r12d - rorl $5,%r13d - xorl %r10d,%r14d - andl %ecx,%r12d - xorl %ecx,%r13d - addl 40(%rsp),%r9d - movl %r10d,%r15d - xorl %r8d,%r12d - rorl $11,%r14d - xorl %r11d,%r15d + addl %r13d,%r12d + addl %r15d,%ebx + addl %r12d,%r9d - rorl $6,%r13d - andl %r15d,%edi - xorl %r10d,%r14d - addl %r13d,%r9d - xorl %r11d,%edi - rorl $2,%r14d - addl %r9d,%ebx - addl %edi,%r9d - movl %ebx,%r13d - addl %r9d,%r14d - rorl $14,%r13d - movl %r14d,%r9d - movl %ecx,%r12d - rorl $9,%r14d - xorl %ebx,%r13d - xorl %edx,%r12d - rorl $5,%r13d - xorl %r9d,%r14d - andl %ebx,%r12d - xorl %ebx,%r13d - addl 44(%rsp),%r8d - movl %r9d,%edi - xorl %edx,%r12d - rorl $11,%r14d - xorl %r10d,%edi - addl %r12d,%r8d - rorl $6,%r13d - andl %edi,%r15d - xorl %r9d,%r14d - addl %r13d,%r8d - xorl %r10d,%r15d - rorl $2,%r14d - addl %r8d,%eax - addl %r15d,%r8d - movl %eax,%r13d - addl %r8d,%r14d - rorl $14,%r13d - movl %r14d,%r8d - movl %ebx,%r12d - rorl $9,%r14d - xorl %eax,%r13d - xorl %ecx,%r12d - rorl $5,%r13d - xorl %r8d,%r14d - andl %eax,%r12d - xorl %eax,%r13d - addl 48(%rsp),%edx - movl %r8d,%r15d - xorl %ecx,%r12d - rorl $11,%r14d - xorl %r9d,%r15d - addl %r12d,%edx - rorl $6,%r13d - andl %r15d,%edi - xorl %r8d,%r14d - addl %r13d,%edx - xorl %r9d,%edi - rorl $2,%r14d - addl %edx,%r11d - addl %edi,%edx - movl %r11d,%r13d - addl %edx,%r14d - rorl $14,%r13d - movl %r14d,%edx - movl %eax,%r12d - rorl $9,%r14d - xorl %r11d,%r13d - xorl %ebx,%r12d - rorl $5,%r13d - xorl %edx,%r14d - andl %r11d,%r12d - xorl %r11d,%r13d - addl 52(%rsp),%ecx - movl %edx,%edi - xorl %ebx,%r12d - rorl $11,%r14d - xorl %r8d,%edi - addl %r12d,%ecx - rorl $6,%r13d - andl %edi,%r15d - xorl %edx,%r14d - addl %r13d,%ecx - xorl %r8d,%r15d - rorl $2,%r14d - addl %ecx,%r10d - addl %r15d,%ecx - movl %r10d,%r13d - addl %ecx,%r14d - rorl $14,%r13d - movl %r14d,%ecx - movl %r11d,%r12d - rorl $9,%r14d - xorl %r10d,%r13d - xorl %eax,%r12d - rorl $5,%r13d - xorl %ecx,%r14d - andl %r10d,%r12d - xorl %r10d,%r13d - addl 56(%rsp),%ebx - movl %ecx,%r15d - xorl %eax,%r12d - rorl $11,%r14d - xorl %edx,%r15d addl %r12d,%ebx - rorl $6,%r13d - andl %r15d,%edi - xorl %ecx,%r14d - addl %r13d,%ebx - xorl %edx,%edi - rorl $2,%r14d - addl %ebx,%r9d - addl %edi,%ebx + leaq 1(%rdi),%rdi + addl %r14d,%ebx + + movl 0(%rsp),%r13d + movl 52(%rsp),%r14d + movl %r13d,%r12d + movl %r14d,%r15d + + rorl $11,%r12d + xorl %r13d,%r12d + shrl $3,%r13d + + rorl $7,%r12d + xorl %r12d,%r13d + movl 32(%rsp),%r12d + + rorl $2,%r15d + xorl %r14d,%r15d + shrl $10,%r14d + + rorl $17,%r15d + addl %r13d,%r12d + xorl %r15d,%r14d + + addl 60(%rsp),%r12d movl %r9d,%r13d - addl %ebx,%r14d + addl %r14d,%r12d + movl %ebx,%r14d rorl $14,%r13d - movl %r14d,%ebx - movl %r10d,%r12d + movl %r10d,%r15d + movl %r12d,60(%rsp) + rorl $9,%r14d xorl %r9d,%r13d - xorl %r11d,%r12d + xorl %r11d,%r15d + rorl $5,%r13d + addl %eax,%r12d xorl %ebx,%r14d - andl %r9d,%r12d - xorl %r9d,%r13d - addl 60(%rsp),%eax - movl %ebx,%edi - xorl %r11d,%r12d + + addl (%rbp,%rdi,4),%r12d + andl %r9d,%r15d + movl %ecx,%eax + rorl $11,%r14d - xorl %ecx,%edi - addl %r12d,%eax - rorl $6,%r13d - andl %edi,%r15d + xorl %r9d,%r13d + xorl %r11d,%r15d + + xorl %edx,%eax xorl %ebx,%r14d - addl %r13d,%eax - xorl %ecx,%r15d + addl %r15d,%r12d + movl %ecx,%r15d + + rorl $6,%r13d + andl %ebx,%eax + andl %edx,%r15d + rorl $2,%r14d - addl %eax,%r8d + addl %r13d,%r12d addl %r15d,%eax - movl %r8d,%r13d - addl %eax,%r14d + + addl %r12d,%r8d + addl %r12d,%eax + leaq 1(%rdi),%rdi + addl %r14d,%eax + + cmpq $64,%rdi + jb .Lrounds_16_xx + movq 64+0(%rsp),%rdi - movl %r14d,%eax + leaq 64(%rsi),%rsi addl 0(%rdi),%eax - leaq 64(%rsi),%rsi addl 4(%rdi),%ebx addl 8(%rdi),%ecx addl 12(%rdi),%edx @@ -2886,13 +1792,9 @@ sha256_block_data_order_ssse3: movl %r9d,20(%rdi) movl %r10d,24(%rdi) movl %r11d,28(%rdi) - jb .Lloop_ssse3 + jb .Lloop movq 64+24(%rsp),%rsi - movaps 64+32(%rsp),%xmm6 - movaps 64+48(%rsp),%xmm7 - movaps 64+64(%rsp),%xmm8 - movaps 64+80(%rsp),%xmm9 movq (%rsi),%r15 movq 8(%rsi),%r14 movq 16(%rsi),%r13 @@ -2900,11 +1802,30 @@ sha256_block_data_order_ssse3: movq 32(%rsi),%rbp movq 40(%rsi),%rbx leaq 48(%rsi),%rsp -.Lepilogue_ssse3: +.Lepilogue: movq 8(%rsp),%rdi movq 16(%rsp),%rsi .byte 0xf3,0xc3 -.LSEH_end_sha256_block_data_order_ssse3: +.LSEH_end_sha256_block_data_order: +.p2align 6 + +K256: +.long 0x428a2f98,0x71374491,0xb5c0fbcf,0xe9b5dba5 +.long 0x3956c25b,0x59f111f1,0x923f82a4,0xab1c5ed5 +.long 0xd807aa98,0x12835b01,0x243185be,0x550c7dc3 +.long 0x72be5d74,0x80deb1fe,0x9bdc06a7,0xc19bf174 +.long 0xe49b69c1,0xefbe4786,0x0fc19dc6,0x240ca1cc +.long 0x2de92c6f,0x4a7484aa,0x5cb0a9dc,0x76f988da +.long 0x983e5152,0xa831c66d,0xb00327c8,0xbf597fc7 +.long 0xc6e00bf3,0xd5a79147,0x06ca6351,0x14292967 +.long 0x27b70a85,0x2e1b2138,0x4d2c6dfc,0x53380d13 +.long 0x650a7354,0x766a0abb,0x81c2c92e,0x92722c85 +.long 0xa2bfe8a1,0xa81a664b,0xc24b8b70,0xc76c51a3 +.long 0xd192e819,0xd6990624,0xf40e3585,0x106aa070 +.long 0x19a4c116,0x1e376c08,0x2748774c,0x34b0bcb5 +.long 0x391c0cb3,0x4ed8aa4a,0x5b9cca4f,0x682e6ff3 +.long 0x748f82ee,0x78a5636f,0x84c87814,0x8cc70208 +.long 0x90befffa,0xa4506ceb,0xbef9a3f7,0xc67178f2 .def se_handler; .scl 3; .type 32; .endef .p2align 4 @@ -2923,21 +1844,16 @@ se_handler: movq 120(%r8),%rax movq 248(%r8),%rbx - movq 8(%r9),%rsi - movq 56(%r9),%r11 - - movl 0(%r11),%r10d - leaq (%rsi,%r10,1),%r10 + leaq .Lprologue(%rip),%r10 cmpq %r10,%rbx jb .Lin_prologue movq 152(%r8),%rax - movl 4(%r11),%r10d - leaq (%rsi,%r10,1),%r10 + leaq .Lepilogue(%rip),%r10 cmpq %r10,%rbx jae .Lin_prologue - movq %rax,%rsi + movq 64+24(%rax),%rax leaq 48(%rax),%rax @@ -2954,15 +1870,6 @@ se_handler: movq %r14,232(%r8) movq %r15,240(%r8) - leaq .Lepilogue(%rip),%r10 - cmpq %r10,%rbx - jb .Lin_prologue - - leaq 64+32(%rsi),%rsi - leaq 512(%r8),%rdi - movl $8,%ecx -.long 0xa548f3fc - .Lin_prologue: movq 8(%rax),%rdi movq 16(%rax),%rsi @@ -2973,7 +1880,7 @@ se_handler: movq 40(%r9),%rdi movq %r8,%rsi movl $154,%ecx -.long 0xa548f3fc +.long 0xa548f3fc movq %r9,%rsi xorq %rcx,%rcx @@ -3008,18 +1915,10 @@ se_handler: .rva .LSEH_begin_sha256_block_data_order .rva .LSEH_end_sha256_block_data_order .rva .LSEH_info_sha256_block_data_order -.rva .LSEH_begin_sha256_block_data_order_ssse3 -.rva .LSEH_end_sha256_block_data_order_ssse3 -.rva .LSEH_info_sha256_block_data_order_ssse3 + .section .xdata .p2align 3 .LSEH_info_sha256_block_data_order: .byte 9,0,0,0 .rva se_handler -.rva .Lprologue,.Lepilogue -.LSEH_info_sha256_block_data_order_ssse3: -.byte 9,0,0,0 -.rva se_handler -.rva .Lprologue_ssse3,.Lepilogue_ssse3 -.section .note.GNU-stack,"",%progbits diff --git a/lib/accelerated/x86/elf/aes-ssse3-x86.s b/lib/accelerated/x86/elf/aes-ssse3-x86.s index 3aa221267a..92bdeeadb3 100644 --- a/lib/accelerated/x86/elf/aes-ssse3-x86.s +++ b/lib/accelerated/x86/elf/aes-ssse3-x86.s @@ -85,33 +85,33 @@ _vpaes_encrypt_core: movdqa %xmm6,%xmm1 movdqa (%ebp),%xmm2 pandn %xmm0,%xmm1 - pand %xmm6,%xmm0 movdqu (%edx),%xmm5 + psrld $4,%xmm1 + pand %xmm6,%xmm0 .byte 102,15,56,0,208 movdqa 16(%ebp),%xmm0 +.byte 102,15,56,0,193 pxor %xmm5,%xmm2 - psrld $4,%xmm1 + pxor %xmm2,%xmm0 addl $16,%edx -.byte 102,15,56,0,193 leal 192(%ebp),%ebx - pxor %xmm2,%xmm0 jmp .L000enc_entry .align 16 .L001enc_loop: movdqa 32(%ebp),%xmm4 - movdqa 48(%ebp),%xmm0 .byte 102,15,56,0,226 -.byte 102,15,56,0,195 pxor %xmm5,%xmm4 - movdqa 64(%ebp),%xmm5 + movdqa 48(%ebp),%xmm0 +.byte 102,15,56,0,195 pxor %xmm4,%xmm0 - movdqa -64(%ebx,%ecx,1),%xmm1 + movdqa 64(%ebp),%xmm5 .byte 102,15,56,0,234 + movdqa -64(%ebx,%ecx,1),%xmm1 movdqa 80(%ebp),%xmm2 - movdqa (%ebx,%ecx,1),%xmm4 .byte 102,15,56,0,211 - movdqa %xmm0,%xmm3 pxor %xmm5,%xmm2 + movdqa (%ebx,%ecx,1),%xmm4 + movdqa %xmm0,%xmm3 .byte 102,15,56,0,193 addl $16,%edx pxor %xmm2,%xmm0 @@ -120,28 +120,28 @@ _vpaes_encrypt_core: pxor %xmm0,%xmm3 .byte 102,15,56,0,193 andl $48,%ecx - subl $1,%eax pxor %xmm3,%xmm0 + subl $1,%eax .L000enc_entry: movdqa %xmm6,%xmm1 - movdqa -32(%ebp),%xmm5 pandn %xmm0,%xmm1 psrld $4,%xmm1 pand %xmm6,%xmm0 + movdqa -32(%ebp),%xmm5 .byte 102,15,56,0,232 - movdqa %xmm7,%xmm3 pxor %xmm1,%xmm0 + movdqa %xmm7,%xmm3 .byte 102,15,56,0,217 - movdqa %xmm7,%xmm4 pxor %xmm5,%xmm3 + movdqa %xmm7,%xmm4 .byte 102,15,56,0,224 - movdqa %xmm7,%xmm2 pxor %xmm5,%xmm4 + movdqa %xmm7,%xmm2 .byte 102,15,56,0,211 - movdqa %xmm7,%xmm3 pxor %xmm0,%xmm2 -.byte 102,15,56,0,220 + movdqa %xmm7,%xmm3 movdqu (%edx),%xmm5 +.byte 102,15,56,0,220 pxor %xmm1,%xmm3 jnz .L001enc_loop movdqa 96(%ebp),%xmm4 @@ -157,8 +157,8 @@ _vpaes_encrypt_core: .type _vpaes_decrypt_core,@function .align 16 _vpaes_decrypt_core: - leal 608(%ebp),%ebx movl 240(%edx),%eax + leal 608(%ebp),%ebx movdqa %xmm6,%xmm1 movdqa -64(%ebx),%xmm2 pandn %xmm0,%xmm1 @@ -181,56 +181,56 @@ _vpaes_decrypt_core: .align 16 .L003dec_loop: movdqa -32(%ebx),%xmm4 - movdqa -16(%ebx),%xmm1 .byte 102,15,56,0,226 -.byte 102,15,56,0,203 + pxor %xmm0,%xmm4 + movdqa -16(%ebx),%xmm0 +.byte 102,15,56,0,195 pxor %xmm4,%xmm0 + addl $16,%edx +.byte 102,15,56,0,197 movdqa (%ebx),%xmm4 - pxor %xmm1,%xmm0 - movdqa 16(%ebx),%xmm1 .byte 102,15,56,0,226 -.byte 102,15,56,0,197 -.byte 102,15,56,0,203 + pxor %xmm0,%xmm4 + movdqa 16(%ebx),%xmm0 +.byte 102,15,56,0,195 pxor %xmm4,%xmm0 + subl $1,%eax +.byte 102,15,56,0,197 movdqa 32(%ebx),%xmm4 - pxor %xmm1,%xmm0 - movdqa 48(%ebx),%xmm1 .byte 102,15,56,0,226 -.byte 102,15,56,0,197 -.byte 102,15,56,0,203 + pxor %xmm0,%xmm4 + movdqa 48(%ebx),%xmm0 +.byte 102,15,56,0,195 pxor %xmm4,%xmm0 +.byte 102,15,56,0,197 movdqa 64(%ebx),%xmm4 - pxor %xmm1,%xmm0 - movdqa 80(%ebx),%xmm1 .byte 102,15,56,0,226 -.byte 102,15,56,0,197 -.byte 102,15,56,0,203 + pxor %xmm0,%xmm4 + movdqa 80(%ebx),%xmm0 +.byte 102,15,56,0,195 pxor %xmm4,%xmm0 - addl $16,%edx .byte 102,15,58,15,237,12 - pxor %xmm1,%xmm0 - subl $1,%eax .L002dec_entry: movdqa %xmm6,%xmm1 - movdqa -32(%ebp),%xmm2 pandn %xmm0,%xmm1 - pand %xmm6,%xmm0 psrld $4,%xmm1 + pand %xmm6,%xmm0 + movdqa -32(%ebp),%xmm2 .byte 102,15,56,0,208 - movdqa %xmm7,%xmm3 pxor %xmm1,%xmm0 + movdqa %xmm7,%xmm3 .byte 102,15,56,0,217 - movdqa %xmm7,%xmm4 pxor %xmm2,%xmm3 + movdqa %xmm7,%xmm4 .byte 102,15,56,0,224 pxor %xmm2,%xmm4 movdqa %xmm7,%xmm2 .byte 102,15,56,0,211 - movdqa %xmm7,%xmm3 pxor %xmm0,%xmm2 + movdqa %xmm7,%xmm3 .byte 102,15,56,0,220 - movdqu (%edx),%xmm0 pxor %xmm1,%xmm3 + movdqu (%edx),%xmm0 jnz .L003dec_loop movdqa 96(%ebx),%xmm4 .byte 102,15,56,0,226 @@ -339,12 +339,12 @@ _vpaes_schedule_core: .type _vpaes_schedule_192_smear,@function .align 16 _vpaes_schedule_192_smear: - pshufd $128,%xmm6,%xmm1 + pshufd $128,%xmm6,%xmm0 + pxor %xmm0,%xmm6 pshufd $254,%xmm7,%xmm0 - pxor %xmm1,%xmm6 - pxor %xmm1,%xmm1 pxor %xmm0,%xmm6 movdqa %xmm6,%xmm0 + pxor %xmm1,%xmm1 movhlps %xmm1,%xmm6 ret .size _vpaes_schedule_192_smear,.-_vpaes_schedule_192_smear @@ -613,8 +613,6 @@ vpaes_cbc_encrypt: movl 24(%esp),%edi movl 28(%esp),%eax movl 32(%esp),%edx - subl $16,%eax - jc .L020cbc_abort leal -56(%esp),%ebx movl 36(%esp),%ebp andl $-16,%ebx @@ -624,17 +622,18 @@ vpaes_cbc_encrypt: subl %esi,%edi movl %ebx,48(%esp) movl %edi,(%esp) + subl $16,%eax movl %edx,4(%esp) movl %ebp,8(%esp) movl %eax,%edi - leal .L_vpaes_consts+0x30-.L021pic_point,%ebp + leal .L_vpaes_consts+0x30-.L020pic_point,%ebp call _vpaes_preheat -.L021pic_point: +.L020pic_point: cmpl $0,%ecx - je .L022cbc_dec_loop - jmp .L023cbc_enc_loop + je .L021cbc_dec_loop + jmp .L022cbc_enc_loop .align 16 -.L023cbc_enc_loop: +.L022cbc_enc_loop: movdqu (%esi),%xmm0 pxor %xmm1,%xmm0 call _vpaes_encrypt_core @@ -644,10 +643,10 @@ vpaes_cbc_encrypt: movdqu %xmm0,(%ebx,%esi,1) leal 16(%esi),%esi subl $16,%edi - jnc .L023cbc_enc_loop - jmp .L024cbc_done + jnc .L022cbc_enc_loop + jmp .L023cbc_done .align 16 -.L022cbc_dec_loop: +.L021cbc_dec_loop: movdqu (%esi),%xmm0 movdqa %xmm1,16(%esp) movdqa %xmm0,32(%esp) @@ -659,12 +658,11 @@ vpaes_cbc_encrypt: movdqu %xmm0,(%ebx,%esi,1) leal 16(%esi),%esi subl $16,%edi - jnc .L022cbc_dec_loop -.L024cbc_done: + jnc .L021cbc_dec_loop +.L023cbc_done: movl 8(%esp),%ebx movl 48(%esp),%esp movdqu %xmm1,(%ebx) -.L020cbc_abort: popl %edi popl %esi popl %ebx diff --git a/lib/accelerated/x86/elf/aes-ssse3-x86_64.s b/lib/accelerated/x86/elf/aes-ssse3-x86_64.s index d394571c71..78dd07e700 100644 --- a/lib/accelerated/x86/elf/aes-ssse3-x86_64.s +++ b/lib/accelerated/x86/elf/aes-ssse3-x86_64.s @@ -43,8 +43,8 @@ _vpaes_encrypt_core: movdqa .Lk_ipt+16(%rip),%xmm0 .byte 102,15,56,0,193 pxor %xmm5,%xmm2 - addq $16,%r9 pxor %xmm2,%xmm0 + addq $16,%r9 leaq .Lk_mc_backward(%rip),%r10 jmp .Lenc_entry @@ -52,19 +52,19 @@ _vpaes_encrypt_core: .Lenc_loop: movdqa %xmm13,%xmm4 - movdqa %xmm12,%xmm0 .byte 102,15,56,0,226 -.byte 102,15,56,0,195 pxor %xmm5,%xmm4 - movdqa %xmm15,%xmm5 + movdqa %xmm12,%xmm0 +.byte 102,15,56,0,195 pxor %xmm4,%xmm0 - movdqa -64(%r11,%r10,1),%xmm1 + movdqa %xmm15,%xmm5 .byte 102,15,56,0,234 - movdqa (%r11,%r10,1),%xmm4 + movdqa -64(%r11,%r10,1),%xmm1 movdqa %xmm14,%xmm2 .byte 102,15,56,0,211 - movdqa %xmm0,%xmm3 pxor %xmm5,%xmm2 + movdqa (%r11,%r10,1),%xmm4 + movdqa %xmm0,%xmm3 .byte 102,15,56,0,193 addq $16,%r9 pxor %xmm2,%xmm0 @@ -73,30 +73,30 @@ _vpaes_encrypt_core: pxor %xmm0,%xmm3 .byte 102,15,56,0,193 andq $48,%r11 - subq $1,%rax pxor %xmm3,%xmm0 + subq $1,%rax .Lenc_entry: movdqa %xmm9,%xmm1 - movdqa %xmm11,%xmm5 pandn %xmm0,%xmm1 psrld $4,%xmm1 pand %xmm9,%xmm0 + movdqa %xmm11,%xmm5 .byte 102,15,56,0,232 - movdqa %xmm10,%xmm3 pxor %xmm1,%xmm0 + movdqa %xmm10,%xmm3 .byte 102,15,56,0,217 - movdqa %xmm10,%xmm4 pxor %xmm5,%xmm3 + movdqa %xmm10,%xmm4 .byte 102,15,56,0,224 - movdqa %xmm10,%xmm2 pxor %xmm5,%xmm4 + movdqa %xmm10,%xmm2 .byte 102,15,56,0,211 - movdqa %xmm10,%xmm3 pxor %xmm0,%xmm2 -.byte 102,15,56,0,220 + movdqa %xmm10,%xmm3 movdqu (%r9),%xmm5 +.byte 102,15,56,0,220 pxor %xmm1,%xmm3 jnz .Lenc_loop @@ -149,61 +149,62 @@ _vpaes_decrypt_core: movdqa -32(%r10),%xmm4 - movdqa -16(%r10),%xmm1 .byte 102,15,56,0,226 -.byte 102,15,56,0,203 + pxor %xmm0,%xmm4 + movdqa -16(%r10),%xmm0 +.byte 102,15,56,0,195 pxor %xmm4,%xmm0 - movdqa 0(%r10),%xmm4 - pxor %xmm1,%xmm0 - movdqa 16(%r10),%xmm1 + addq $16,%r9 -.byte 102,15,56,0,226 .byte 102,15,56,0,197 -.byte 102,15,56,0,203 + movdqa 0(%r10),%xmm4 +.byte 102,15,56,0,226 + pxor %xmm0,%xmm4 + movdqa 16(%r10),%xmm0 +.byte 102,15,56,0,195 pxor %xmm4,%xmm0 - movdqa 32(%r10),%xmm4 - pxor %xmm1,%xmm0 - movdqa 48(%r10),%xmm1 + subq $1,%rax -.byte 102,15,56,0,226 .byte 102,15,56,0,197 -.byte 102,15,56,0,203 + movdqa 32(%r10),%xmm4 +.byte 102,15,56,0,226 + pxor %xmm0,%xmm4 + movdqa 48(%r10),%xmm0 +.byte 102,15,56,0,195 pxor %xmm4,%xmm0 - movdqa 64(%r10),%xmm4 - pxor %xmm1,%xmm0 - movdqa 80(%r10),%xmm1 -.byte 102,15,56,0,226 .byte 102,15,56,0,197 -.byte 102,15,56,0,203 + movdqa 64(%r10),%xmm4 +.byte 102,15,56,0,226 + pxor %xmm0,%xmm4 + movdqa 80(%r10),%xmm0 +.byte 102,15,56,0,195 pxor %xmm4,%xmm0 - addq $16,%r9 + .byte 102,15,58,15,237,12 - pxor %xmm1,%xmm0 - subq $1,%rax .Ldec_entry: movdqa %xmm9,%xmm1 pandn %xmm0,%xmm1 - movdqa %xmm11,%xmm2 psrld $4,%xmm1 pand %xmm9,%xmm0 + movdqa %xmm11,%xmm2 .byte 102,15,56,0,208 - movdqa %xmm10,%xmm3 pxor %xmm1,%xmm0 + movdqa %xmm10,%xmm3 .byte 102,15,56,0,217 - movdqa %xmm10,%xmm4 pxor %xmm2,%xmm3 + movdqa %xmm10,%xmm4 .byte 102,15,56,0,224 pxor %xmm2,%xmm4 movdqa %xmm10,%xmm2 .byte 102,15,56,0,211 - movdqa %xmm10,%xmm3 pxor %xmm0,%xmm2 + movdqa %xmm10,%xmm3 .byte 102,15,56,0,220 - movdqu (%r9),%xmm0 pxor %xmm1,%xmm3 + movdqu (%r9),%xmm0 jnz .Ldec_loop @@ -211,7 +212,7 @@ _vpaes_decrypt_core: .byte 102,15,56,0,226 pxor %xmm0,%xmm4 movdqa 112(%r10),%xmm0 - movdqa -352(%r11),%xmm2 + movdqa .Lk_sr-.Lk_dsbd(%r11),%xmm2 .byte 102,15,56,0,195 pxor %xmm4,%xmm0 .byte 102,15,56,0,194 @@ -231,7 +232,7 @@ _vpaes_schedule_core: - call _vpaes_preheat + call _vpaes_preheat movdqa .Lk_rcon(%rip),%xmm8 movdqu (%rdi),%xmm0 @@ -277,7 +278,7 @@ _vpaes_schedule_core: call _vpaes_schedule_round decq %rsi jz .Lschedule_mangle_last - call _vpaes_schedule_mangle + call _vpaes_schedule_mangle jmp .Loop_schedule_128 @@ -298,7 +299,7 @@ _vpaes_schedule_core: .align 16 .Lschedule_192: movdqu 8(%rdi),%xmm0 - call _vpaes_schedule_transform + call _vpaes_schedule_transform movdqa %xmm0,%xmm6 pxor %xmm4,%xmm4 movhlps %xmm4,%xmm6 @@ -307,13 +308,13 @@ _vpaes_schedule_core: .Loop_schedule_192: call _vpaes_schedule_round .byte 102,15,58,15,198,8 - call _vpaes_schedule_mangle + call _vpaes_schedule_mangle call _vpaes_schedule_192_smear - call _vpaes_schedule_mangle + call _vpaes_schedule_mangle call _vpaes_schedule_round decq %rsi jz .Lschedule_mangle_last - call _vpaes_schedule_mangle + call _vpaes_schedule_mangle call _vpaes_schedule_192_smear jmp .Loop_schedule_192 @@ -330,18 +331,18 @@ _vpaes_schedule_core: .align 16 .Lschedule_256: movdqu 16(%rdi),%xmm0 - call _vpaes_schedule_transform + call _vpaes_schedule_transform movl $7,%esi .Loop_schedule_256: - call _vpaes_schedule_mangle + call _vpaes_schedule_mangle movdqa %xmm0,%xmm6 call _vpaes_schedule_round decq %rsi jz .Lschedule_mangle_last - call _vpaes_schedule_mangle + call _vpaes_schedule_mangle pshufd $255,%xmm0,%xmm0 @@ -379,7 +380,7 @@ _vpaes_schedule_core: .Lschedule_mangle_last_dec: addq $-16,%rdx pxor .Lk_s63(%rip),%xmm0 - call _vpaes_schedule_transform + call _vpaes_schedule_transform movdqu %xmm0,(%rdx) @@ -411,12 +412,12 @@ _vpaes_schedule_core: .type _vpaes_schedule_192_smear,@function .align 16 _vpaes_schedule_192_smear: - pshufd $128,%xmm6,%xmm1 + pshufd $128,%xmm6,%xmm0 + pxor %xmm0,%xmm6 pshufd $254,%xmm7,%xmm0 - pxor %xmm1,%xmm6 - pxor %xmm1,%xmm1 pxor %xmm0,%xmm6 movdqa %xmm6,%xmm0 + pxor %xmm1,%xmm1 movhlps %xmm1,%xmm6 .byte 0xf3,0xc3 .size _vpaes_schedule_192_smear,.-_vpaes_schedule_192_smear @@ -679,10 +680,9 @@ vpaes_decrypt: .align 16 vpaes_cbc_encrypt: xchgq %rcx,%rdx - subq $16,%rcx - jc .Lcbc_abort movdqu (%r8),%xmm6 subq %rdi,%rsi + subq $16,%rcx call _vpaes_preheat cmpl $0,%r9d je .Lcbc_dec_loop @@ -711,7 +711,6 @@ vpaes_cbc_encrypt: jnc .Lcbc_dec_loop .Lcbc_done: movdqu %xmm6,(%r8) -.Lcbc_abort: .byte 0xf3,0xc3 .size vpaes_cbc_encrypt,.-vpaes_cbc_encrypt diff --git a/lib/accelerated/x86/elf/aesni-x86_64.s b/lib/accelerated/x86/elf/aesni-x86_64.s index dbfe999d71..59b37d83ba 100644 --- a/lib/accelerated/x86/elf/aesni-x86_64.s +++ b/lib/accelerated/x86/elf/aesni-x86_64.s @@ -53,7 +53,7 @@ aesni_encrypt: decl %eax movups (%rdx),%xmm1 leaq 16(%rdx),%rdx - jnz .Loop_enc1_1 + jnz .Loop_enc1_1 .byte 102,15,56,221,209 movups %xmm2,(%rsi) .byte 0xf3,0xc3 @@ -74,7 +74,7 @@ aesni_decrypt: decl %eax movups (%rdx),%xmm1 leaq 16(%rdx),%rdx - jnz .Loop_dec1_2 + jnz .Loop_dec1_2 .byte 102,15,56,223,209 movups %xmm2,(%rsi) .byte 0xf3,0xc3 @@ -583,7 +583,7 @@ aesni_ecb_encrypt: decl %eax movups (%rcx),%xmm1 leaq 16(%rcx),%rcx - jnz .Loop_enc1_3 + jnz .Loop_enc1_3 .byte 102,15,56,221,209 movups %xmm2,(%rsi) jmp .Lecb_ret @@ -728,7 +728,7 @@ aesni_ecb_encrypt: decl %eax movups (%rcx),%xmm1 leaq 16(%rcx),%rcx - jnz .Loop_dec1_4 + jnz .Loop_dec1_4 .byte 102,15,56,223,209 movups %xmm2,(%rsi) jmp .Lecb_ret @@ -857,7 +857,7 @@ aesni_ccm64_decrypt_blocks: decl %eax movups (%rcx),%xmm1 leaq 16(%rcx),%rcx - jnz .Loop_enc1_5 + jnz .Loop_enc1_5 .byte 102,15,56,221,209 movups (%rdi),%xmm8 paddq %xmm6,%xmm9 @@ -916,7 +916,7 @@ aesni_ccm64_decrypt_blocks: decl %eax movups (%r11),%xmm1 leaq 16(%r11),%r11 - jnz .Loop_enc1_6 + jnz .Loop_enc1_6 .byte 102,15,56,221,217 movups %xmm3,(%r9) .byte 0xf3,0xc3 @@ -925,412 +925,199 @@ aesni_ccm64_decrypt_blocks: .type aesni_ctr32_encrypt_blocks,@function .align 16 aesni_ctr32_encrypt_blocks: - leaq (%rsp),%rax - pushq %rbp - subq $128,%rsp - andq $-16,%rsp - leaq -8(%rax),%rbp - cmpq $1,%rdx je .Lctr32_one_shortcut - movdqu (%r8),%xmm2 - movdqu (%rcx),%xmm0 - movl 12(%r8),%r8d - pxor %xmm0,%xmm2 - movl 12(%rcx),%r11d - movdqa %xmm2,0(%rsp) - bswapl %r8d - movdqa %xmm2,%xmm3 - movdqa %xmm2,%xmm4 - movdqa %xmm2,%xmm5 - movdqa %xmm2,64(%rsp) - movdqa %xmm2,80(%rsp) - movdqa %xmm2,96(%rsp) - movdqa %xmm2,112(%rsp) + movdqu (%r8),%xmm14 + movdqa .Lbswap_mask(%rip),%xmm15 + xorl %eax,%eax +.byte 102,69,15,58,22,242,3 +.byte 102,68,15,58,34,240,3 movl 240(%rcx),%eax - - leaq 1(%r8),%r9 - leaq 2(%r8),%r10 - bswapl %r9d - bswapl %r10d - xorl %r11d,%r9d - xorl %r11d,%r10d -.byte 102,65,15,58,34,217,3 - leaq 3(%r8),%r9 - movdqa %xmm3,16(%rsp) -.byte 102,65,15,58,34,226,3 - bswapl %r9d - leaq 4(%r8),%r10 - movdqa %xmm4,32(%rsp) - xorl %r11d,%r9d bswapl %r10d -.byte 102,65,15,58,34,233,3 - xorl %r11d,%r10d - movdqa %xmm5,48(%rsp) - leaq 5(%r8),%r9 - movl %r10d,64+12(%rsp) - bswapl %r9d - leaq 6(%r8),%r10 - xorl %r11d,%r9d - bswapl %r10d - movl %r9d,80+12(%rsp) - xorl %r11d,%r10d - leaq 7(%r8),%r9 - movl %r10d,96+12(%rsp) - bswapl %r9d - xorl %r11d,%r9d - movl %r9d,112+12(%rsp) + pxor %xmm12,%xmm12 + pxor %xmm13,%xmm13 +.byte 102,69,15,58,34,226,0 + leaq 3(%r10),%r11 +.byte 102,69,15,58,34,235,0 + incl %r10d +.byte 102,69,15,58,34,226,1 + incq %r11 +.byte 102,69,15,58,34,235,1 + incl %r10d +.byte 102,69,15,58,34,226,2 + incq %r11 +.byte 102,69,15,58,34,235,2 + movdqa %xmm12,-40(%rsp) +.byte 102,69,15,56,0,231 + movdqa %xmm13,-24(%rsp) +.byte 102,69,15,56,0,239 + + pshufd $192,%xmm12,%xmm2 + pshufd $128,%xmm12,%xmm3 + pshufd $64,%xmm12,%xmm4 + cmpq $6,%rdx + jb .Lctr32_tail + shrl $1,%eax + movq %rcx,%r11 + movl %eax,%r10d + subq $6,%rdx + jmp .Lctr32_loop6 - movups 16(%rcx),%xmm1 +.align 16 +.Lctr32_loop6: + pshufd $192,%xmm13,%xmm5 + por %xmm14,%xmm2 + movups (%r11),%xmm0 + pshufd $128,%xmm13,%xmm6 + por %xmm14,%xmm3 + movups 16(%r11),%xmm1 + pshufd $64,%xmm13,%xmm7 + por %xmm14,%xmm4 + por %xmm14,%xmm5 + xorps %xmm0,%xmm2 + por %xmm14,%xmm6 + por %xmm14,%xmm7 - movdqa 64(%rsp),%xmm6 - movdqa 80(%rsp),%xmm7 - cmpq $8,%rdx - jb .Lctr32_tail - leaq 128(%rcx),%rcx - subq $8,%rdx - jmp .Lctr32_loop8 -.align 32 -.Lctr32_loop8: - addl $8,%r8d - movdqa 96(%rsp),%xmm8 -.byte 102,15,56,220,209 - movl %r8d,%r9d - movdqa 112(%rsp),%xmm9 -.byte 102,15,56,220,217 - bswapl %r9d - movups 32-128(%rcx),%xmm0 -.byte 102,15,56,220,225 - xorl %r11d,%r9d -.byte 102,15,56,220,233 - movl %r9d,0+12(%rsp) - leaq 1(%r8),%r9 -.byte 102,15,56,220,241 -.byte 102,15,56,220,249 -.byte 102,68,15,56,220,193 -.byte 102,68,15,56,220,201 - movups 48-128(%rcx),%xmm1 -.byte 102,15,56,220,208 -.byte 102,15,56,220,216 - bswapl %r9d -.byte 102,15,56,220,224 - xorl %r11d,%r9d -.byte 102,15,56,220,232 - movl %r9d,16+12(%rsp) - leaq 2(%r8),%r9 -.byte 102,15,56,220,240 -.byte 102,15,56,220,248 -.byte 102,68,15,56,220,192 -.byte 102,68,15,56,220,200 - movups 64-128(%rcx),%xmm0 -.byte 102,15,56,220,209 -.byte 102,15,56,220,217 - bswapl %r9d -.byte 102,15,56,220,225 - xorl %r11d,%r9d -.byte 102,15,56,220,233 - movl %r9d,32+12(%rsp) - leaq 3(%r8),%r9 -.byte 102,15,56,220,241 -.byte 102,15,56,220,249 -.byte 102,68,15,56,220,193 -.byte 102,68,15,56,220,201 - movups 80-128(%rcx),%xmm1 -.byte 102,15,56,220,208 -.byte 102,15,56,220,216 - bswapl %r9d -.byte 102,15,56,220,224 - xorl %r11d,%r9d -.byte 102,15,56,220,232 - movl %r9d,48+12(%rsp) - leaq 4(%r8),%r9 -.byte 102,15,56,220,240 -.byte 102,15,56,220,248 -.byte 102,68,15,56,220,192 -.byte 102,68,15,56,220,200 - movups 96-128(%rcx),%xmm0 -.byte 102,15,56,220,209 -.byte 102,15,56,220,217 - bswapl %r9d -.byte 102,15,56,220,225 - xorl %r11d,%r9d -.byte 102,15,56,220,233 - movl %r9d,64+12(%rsp) - leaq 5(%r8),%r9 -.byte 102,15,56,220,241 -.byte 102,15,56,220,249 -.byte 102,68,15,56,220,193 -.byte 102,68,15,56,220,201 - movups 112-128(%rcx),%xmm1 -.byte 102,15,56,220,208 -.byte 102,15,56,220,216 - bswapl %r9d -.byte 102,15,56,220,224 - xorl %r11d,%r9d -.byte 102,15,56,220,232 - movl %r9d,80+12(%rsp) - leaq 6(%r8),%r9 -.byte 102,15,56,220,240 -.byte 102,15,56,220,248 -.byte 102,68,15,56,220,192 -.byte 102,68,15,56,220,200 - movups 128-128(%rcx),%xmm0 + pxor %xmm0,%xmm3 .byte 102,15,56,220,209 + leaq 32(%r11),%rcx + pxor %xmm0,%xmm4 .byte 102,15,56,220,217 - bswapl %r9d + movdqa .Lincrement32(%rip),%xmm13 + pxor %xmm0,%xmm5 .byte 102,15,56,220,225 - xorl %r11d,%r9d + movdqa -40(%rsp),%xmm12 + pxor %xmm0,%xmm6 .byte 102,15,56,220,233 - movl %r9d,96+12(%rsp) - leaq 7(%r8),%r9 + pxor %xmm0,%xmm7 + movups (%rcx),%xmm0 + decl %eax .byte 102,15,56,220,241 .byte 102,15,56,220,249 -.byte 102,68,15,56,220,193 -.byte 102,68,15,56,220,201 - movups 144-128(%rcx),%xmm1 -.byte 102,15,56,220,208 -.byte 102,15,56,220,216 - bswapl %r9d -.byte 102,15,56,220,224 - xorl %r11d,%r9d -.byte 102,15,56,220,232 - movl %r9d,112+12(%rsp) -.byte 102,15,56,220,240 -.byte 102,15,56,220,248 -.byte 102,68,15,56,220,192 - movdqu 0(%rdi),%xmm10 -.byte 102,68,15,56,220,200 - movups 160-128(%rcx),%xmm0 - - cmpl $11,%eax - jb .Lctr32_enc_done - + jmp .Lctr32_enc_loop6_enter +.align 16 +.Lctr32_enc_loop6: .byte 102,15,56,220,209 .byte 102,15,56,220,217 + decl %eax .byte 102,15,56,220,225 .byte 102,15,56,220,233 .byte 102,15,56,220,241 .byte 102,15,56,220,249 -.byte 102,68,15,56,220,193 -.byte 102,68,15,56,220,201 - movups 176-128(%rcx),%xmm1 - +.Lctr32_enc_loop6_enter: + movups 16(%rcx),%xmm1 .byte 102,15,56,220,208 .byte 102,15,56,220,216 + leaq 32(%rcx),%rcx .byte 102,15,56,220,224 .byte 102,15,56,220,232 .byte 102,15,56,220,240 .byte 102,15,56,220,248 -.byte 102,68,15,56,220,192 -.byte 102,68,15,56,220,200 - movups 192-128(%rcx),%xmm0 - je .Lctr32_enc_done - -.byte 102,15,56,220,209 -.byte 102,15,56,220,217 -.byte 102,15,56,220,225 -.byte 102,15,56,220,233 -.byte 102,15,56,220,241 -.byte 102,15,56,220,249 -.byte 102,68,15,56,220,193 -.byte 102,68,15,56,220,201 - movups 208-128(%rcx),%xmm1 + movups (%rcx),%xmm0 + jnz .Lctr32_enc_loop6 -.byte 102,15,56,220,208 -.byte 102,15,56,220,216 -.byte 102,15,56,220,224 -.byte 102,15,56,220,232 -.byte 102,15,56,220,240 -.byte 102,15,56,220,248 -.byte 102,68,15,56,220,192 -.byte 102,68,15,56,220,200 - movups 224-128(%rcx),%xmm0 - -.Lctr32_enc_done: - movdqu 16(%rdi),%xmm11 - pxor %xmm0,%xmm10 - movdqu 32(%rdi),%xmm12 - pxor %xmm0,%xmm11 - movdqu 48(%rdi),%xmm13 - pxor %xmm0,%xmm12 - movdqu 64(%rdi),%xmm14 - pxor %xmm0,%xmm13 - movdqu 80(%rdi),%xmm15 - pxor %xmm0,%xmm14 .byte 102,15,56,220,209 - pxor %xmm0,%xmm15 + paddd %xmm13,%xmm12 .byte 102,15,56,220,217 + paddd -24(%rsp),%xmm13 .byte 102,15,56,220,225 + movdqa %xmm12,-40(%rsp) .byte 102,15,56,220,233 + movdqa %xmm13,-24(%rsp) .byte 102,15,56,220,241 +.byte 102,69,15,56,0,231 .byte 102,15,56,220,249 -.byte 102,68,15,56,220,193 -.byte 102,68,15,56,220,201 - movdqu 96(%rdi),%xmm1 - -.byte 102,65,15,56,221,210 - pxor %xmm0,%xmm1 - movdqu 112(%rdi),%xmm10 - leaq 128(%rdi),%rdi -.byte 102,65,15,56,221,219 - pxor %xmm0,%xmm10 - movdqa 0(%rsp),%xmm11 -.byte 102,65,15,56,221,228 - movdqa 16(%rsp),%xmm12 -.byte 102,65,15,56,221,237 - movdqa 32(%rsp),%xmm13 -.byte 102,65,15,56,221,246 - movdqa 48(%rsp),%xmm14 -.byte 102,65,15,56,221,255 - movdqa 64(%rsp),%xmm15 -.byte 102,68,15,56,221,193 - movdqa 80(%rsp),%xmm0 -.byte 102,69,15,56,221,202 - movups 16-128(%rcx),%xmm1 +.byte 102,69,15,56,0,239 - movups %xmm2,(%rsi) - movdqa %xmm11,%xmm2 - movups %xmm3,16(%rsi) - movdqa %xmm12,%xmm3 - movups %xmm4,32(%rsi) - movdqa %xmm13,%xmm4 - movups %xmm5,48(%rsi) - movdqa %xmm14,%xmm5 - movups %xmm6,64(%rsi) - movdqa %xmm15,%xmm6 - movups %xmm7,80(%rsi) - movdqa %xmm0,%xmm7 - movups %xmm8,96(%rsi) - movups %xmm9,112(%rsi) - leaq 128(%rsi),%rsi +.byte 102,15,56,221,208 + movups (%rdi),%xmm8 +.byte 102,15,56,221,216 + movups 16(%rdi),%xmm9 +.byte 102,15,56,221,224 + movups 32(%rdi),%xmm10 +.byte 102,15,56,221,232 + movups 48(%rdi),%xmm11 +.byte 102,15,56,221,240 + movups 64(%rdi),%xmm1 +.byte 102,15,56,221,248 + movups 80(%rdi),%xmm0 + leaq 96(%rdi),%rdi - subq $8,%rdx - jnc .Lctr32_loop8 + xorps %xmm2,%xmm8 + pshufd $192,%xmm12,%xmm2 + xorps %xmm3,%xmm9 + pshufd $128,%xmm12,%xmm3 + movups %xmm8,(%rsi) + xorps %xmm4,%xmm10 + pshufd $64,%xmm12,%xmm4 + movups %xmm9,16(%rsi) + xorps %xmm5,%xmm11 + movups %xmm10,32(%rsi) + xorps %xmm6,%xmm1 + movups %xmm11,48(%rsi) + xorps %xmm7,%xmm0 + movups %xmm1,64(%rsi) + movups %xmm0,80(%rsi) + leaq 96(%rsi),%rsi + movl %r10d,%eax + subq $6,%rdx + jnc .Lctr32_loop6 - addq $8,%rdx + addq $6,%rdx jz .Lctr32_done - leaq -128(%rcx),%rcx + movq %r11,%rcx + leal 1(%rax,%rax,1),%eax .Lctr32_tail: - leaq 16(%rcx),%rcx - cmpq $4,%rdx - jb .Lctr32_loop3 - je .Lctr32_loop4 - - movdqa 96(%rsp),%xmm8 - pxor %xmm9,%xmm9 - - movups 16(%rcx),%xmm0 -.byte 102,15,56,220,209 - leaq 16(%rcx),%rcx -.byte 102,15,56,220,217 - shrl $1,%eax -.byte 102,15,56,220,225 - decl %eax -.byte 102,15,56,220,233 - movups (%rdi),%xmm10 -.byte 102,15,56,220,241 - movups 16(%rdi),%xmm11 -.byte 102,15,56,220,249 - movups 32(%rdi),%xmm12 -.byte 102,68,15,56,220,193 - movups 16(%rcx),%xmm1 - - call .Lenc_loop8_enter - - movdqu 48(%rdi),%xmm13 - pxor %xmm10,%xmm2 - movdqu 64(%rdi),%xmm10 - pxor %xmm11,%xmm3 - movdqu %xmm2,(%rsi) - pxor %xmm12,%xmm4 - movdqu %xmm3,16(%rsi) - pxor %xmm13,%xmm5 - movdqu %xmm4,32(%rsi) - pxor %xmm10,%xmm6 - movdqu %xmm5,48(%rsi) - movdqu %xmm6,64(%rsi) - cmpq $6,%rdx - jb .Lctr32_done - - movups 80(%rdi),%xmm11 - xorps %xmm11,%xmm7 - movups %xmm7,80(%rsi) - je .Lctr32_done - - movups 96(%rdi),%xmm12 - xorps %xmm12,%xmm8 - movups %xmm8,96(%rsi) - jmp .Lctr32_done + por %xmm14,%xmm2 + movups (%rdi),%xmm8 + cmpq $2,%rdx + jb .Lctr32_one -.align 32 -.Lctr32_loop4: -.byte 102,15,56,220,209 - leaq 16(%rcx),%rcx -.byte 102,15,56,220,217 -.byte 102,15,56,220,225 -.byte 102,15,56,220,233 - movups (%rcx),%xmm1 - decl %eax - jnz .Lctr32_loop4 -.byte 102,15,56,221,209 - movups (%rdi),%xmm10 -.byte 102,15,56,221,217 - movups 16(%rdi),%xmm11 -.byte 102,15,56,221,225 - movups 32(%rdi),%xmm12 -.byte 102,15,56,221,233 - movups 48(%rdi),%xmm13 + por %xmm14,%xmm3 + movups 16(%rdi),%xmm9 + je .Lctr32_two - xorps %xmm10,%xmm2 - movups %xmm2,(%rsi) - xorps %xmm11,%xmm3 - movups %xmm3,16(%rsi) - pxor %xmm12,%xmm4 - movdqu %xmm4,32(%rsi) - pxor %xmm13,%xmm5 - movdqu %xmm5,48(%rsi) - jmp .Lctr32_done + pshufd $192,%xmm13,%xmm5 + por %xmm14,%xmm4 + movups 32(%rdi),%xmm10 + cmpq $4,%rdx + jb .Lctr32_three -.align 32 -.Lctr32_loop3: -.byte 102,15,56,220,209 - leaq 16(%rcx),%rcx -.byte 102,15,56,220,217 -.byte 102,15,56,220,225 - movups (%rcx),%xmm1 - decl %eax - jnz .Lctr32_loop3 -.byte 102,15,56,221,209 -.byte 102,15,56,221,217 -.byte 102,15,56,221,225 + pshufd $128,%xmm13,%xmm6 + por %xmm14,%xmm5 + movups 48(%rdi),%xmm11 + je .Lctr32_four - movups (%rdi),%xmm10 - xorps %xmm10,%xmm2 - movups %xmm2,(%rsi) - cmpq $2,%rdx - jb .Lctr32_done + por %xmm14,%xmm6 + xorps %xmm7,%xmm7 - movups 16(%rdi),%xmm11 - xorps %xmm11,%xmm3 - movups %xmm3,16(%rsi) - je .Lctr32_done + call _aesni_encrypt6 - movups 32(%rdi),%xmm12 - xorps %xmm12,%xmm4 - movups %xmm4,32(%rsi) + movups 64(%rdi),%xmm1 + xorps %xmm2,%xmm8 + xorps %xmm3,%xmm9 + movups %xmm8,(%rsi) + xorps %xmm4,%xmm10 + movups %xmm9,16(%rsi) + xorps %xmm5,%xmm11 + movups %xmm10,32(%rsi) + xorps %xmm6,%xmm1 + movups %xmm11,48(%rsi) + movups %xmm1,64(%rsi) jmp .Lctr32_done .align 16 .Lctr32_one_shortcut: movups (%r8),%xmm2 - movups (%rdi),%xmm10 + movups (%rdi),%xmm8 movl 240(%rcx),%eax +.Lctr32_one: movups (%rcx),%xmm0 movups 16(%rcx),%xmm1 leaq 32(%rcx),%rcx @@ -1340,28 +1127,53 @@ aesni_ctr32_encrypt_blocks: decl %eax movups (%rcx),%xmm1 leaq 16(%rcx),%rcx - jnz .Loop_enc1_7 + jnz .Loop_enc1_7 .byte 102,15,56,221,209 - xorps %xmm10,%xmm2 - movups %xmm2,(%rsi) + xorps %xmm2,%xmm8 + movups %xmm8,(%rsi) + jmp .Lctr32_done + +.align 16 +.Lctr32_two: + xorps %xmm4,%xmm4 + call _aesni_encrypt3 + xorps %xmm2,%xmm8 + xorps %xmm3,%xmm9 + movups %xmm8,(%rsi) + movups %xmm9,16(%rsi) + jmp .Lctr32_done + +.align 16 +.Lctr32_three: + call _aesni_encrypt3 + xorps %xmm2,%xmm8 + xorps %xmm3,%xmm9 + movups %xmm8,(%rsi) + xorps %xmm4,%xmm10 + movups %xmm9,16(%rsi) + movups %xmm10,32(%rsi) jmp .Lctr32_done .align 16 +.Lctr32_four: + call _aesni_encrypt4 + xorps %xmm2,%xmm8 + xorps %xmm3,%xmm9 + movups %xmm8,(%rsi) + xorps %xmm4,%xmm10 + movups %xmm9,16(%rsi) + xorps %xmm5,%xmm11 + movups %xmm10,32(%rsi) + movups %xmm11,48(%rsi) + .Lctr32_done: - leaq (%rbp),%rsp - popq %rbp -.Lctr32_epilogue: .byte 0xf3,0xc3 .size aesni_ctr32_encrypt_blocks,.-aesni_ctr32_encrypt_blocks .globl aesni_xts_encrypt .type aesni_xts_encrypt,@function .align 16 aesni_xts_encrypt: - leaq (%rsp),%rax - pushq %rbp - subq $112,%rsp - andq $-16,%rsp - leaq -8(%rax),%rbp + leaq -104(%rsp),%rsp movups (%r9),%xmm15 movl 240(%r8),%eax movl 240(%rcx),%r10d @@ -1374,268 +1186,230 @@ aesni_xts_encrypt: decl %eax movups (%r8),%xmm1 leaq 16(%r8),%r8 - jnz .Loop_enc1_8 + jnz .Loop_enc1_8 .byte 102,68,15,56,221,249 - movups (%rcx),%xmm0 movq %rcx,%r11 movl %r10d,%eax - shll $4,%r10d movq %rdx,%r9 andq $-16,%rdx - movups 16(%rcx,%r10,1),%xmm1 - movl %eax,%r10d - movdqa .Lxts_magic(%rip),%xmm8 - pshufd $95,%xmm15,%xmm9 - pxor %xmm0,%xmm1 - movdqa %xmm9,%xmm14 - paddd %xmm9,%xmm9 + pxor %xmm14,%xmm14 + pcmpgtd %xmm15,%xmm14 + pshufd $19,%xmm14,%xmm9 + pxor %xmm14,%xmm14 movdqa %xmm15,%xmm10 - psrad $31,%xmm14 paddq %xmm15,%xmm15 - pand %xmm8,%xmm14 - pxor %xmm0,%xmm10 - pxor %xmm14,%xmm15 - movdqa %xmm9,%xmm14 - paddd %xmm9,%xmm9 + pand %xmm8,%xmm9 + pcmpgtd %xmm15,%xmm14 + pxor %xmm9,%xmm15 + pshufd $19,%xmm14,%xmm9 + pxor %xmm14,%xmm14 movdqa %xmm15,%xmm11 - psrad $31,%xmm14 paddq %xmm15,%xmm15 - pand %xmm8,%xmm14 - pxor %xmm0,%xmm11 - pxor %xmm14,%xmm15 - movdqa %xmm9,%xmm14 - paddd %xmm9,%xmm9 + pand %xmm8,%xmm9 + pcmpgtd %xmm15,%xmm14 + pxor %xmm9,%xmm15 + pshufd $19,%xmm14,%xmm9 + pxor %xmm14,%xmm14 movdqa %xmm15,%xmm12 - psrad $31,%xmm14 paddq %xmm15,%xmm15 - pand %xmm8,%xmm14 - pxor %xmm0,%xmm12 - pxor %xmm14,%xmm15 - movdqa %xmm9,%xmm14 - paddd %xmm9,%xmm9 + pand %xmm8,%xmm9 + pcmpgtd %xmm15,%xmm14 + pxor %xmm9,%xmm15 + pshufd $19,%xmm14,%xmm9 + pxor %xmm14,%xmm14 movdqa %xmm15,%xmm13 - psrad $31,%xmm14 - paddq %xmm15,%xmm15 - pand %xmm8,%xmm14 - pxor %xmm0,%xmm13 - pxor %xmm14,%xmm15 - movdqa %xmm15,%xmm14 - psrad $31,%xmm9 paddq %xmm15,%xmm15 pand %xmm8,%xmm9 - pxor %xmm0,%xmm14 + pcmpgtd %xmm15,%xmm14 pxor %xmm9,%xmm15 - movaps %xmm1,96(%rsp) - subq $96,%rdx jc .Lxts_enc_short shrl $1,%eax - subl $3,%eax - movups 16(%r11),%xmm1 + subl $1,%eax movl %eax,%r10d - leaq .Lxts_magic(%rip),%r8 jmp .Lxts_enc_grandloop -.align 32 +.align 16 .Lxts_enc_grandloop: + pshufd $19,%xmm14,%xmm9 + movdqa %xmm15,%xmm14 + paddq %xmm15,%xmm15 movdqu 0(%rdi),%xmm2 - movdqa %xmm0,%xmm8 + pand %xmm8,%xmm9 movdqu 16(%rdi),%xmm3 - pxor %xmm10,%xmm2 + pxor %xmm9,%xmm15 + movdqu 32(%rdi),%xmm4 - pxor %xmm11,%xmm3 -.byte 102,15,56,220,209 + pxor %xmm10,%xmm2 movdqu 48(%rdi),%xmm5 - pxor %xmm12,%xmm4 -.byte 102,15,56,220,217 + pxor %xmm11,%xmm3 movdqu 64(%rdi),%xmm6 - pxor %xmm13,%xmm5 -.byte 102,15,56,220,225 + pxor %xmm12,%xmm4 movdqu 80(%rdi),%xmm7 - pxor %xmm15,%xmm8 - movdqa 96(%rsp),%xmm9 - pxor %xmm14,%xmm6 -.byte 102,15,56,220,233 - movups 32(%r11),%xmm0 leaq 96(%rdi),%rdi - pxor %xmm8,%xmm7 + pxor %xmm13,%xmm5 + movups (%r11),%xmm0 + pxor %xmm14,%xmm6 + pxor %xmm15,%xmm7 - pxor %xmm9,%xmm10 -.byte 102,15,56,220,241 - pxor %xmm9,%xmm11 - movdqa %xmm10,0(%rsp) -.byte 102,15,56,220,249 - movups 48(%r11),%xmm1 -.byte 102,15,56,220,208 - pxor %xmm9,%xmm12 + + movups 16(%r11),%xmm1 + pxor %xmm0,%xmm2 + pxor %xmm0,%xmm3 + movdqa %xmm10,0(%rsp) +.byte 102,15,56,220,209 + leaq 32(%r11),%rcx + pxor %xmm0,%xmm4 movdqa %xmm11,16(%rsp) -.byte 102,15,56,220,216 - pxor %xmm9,%xmm13 +.byte 102,15,56,220,217 + pxor %xmm0,%xmm5 movdqa %xmm12,32(%rsp) -.byte 102,15,56,220,224 - pxor %xmm9,%xmm14 -.byte 102,15,56,220,232 - pxor %xmm9,%xmm8 +.byte 102,15,56,220,225 + pxor %xmm0,%xmm6 + movdqa %xmm13,48(%rsp) +.byte 102,15,56,220,233 + pxor %xmm0,%xmm7 + movups (%rcx),%xmm0 + decl %eax movdqa %xmm14,64(%rsp) -.byte 102,15,56,220,240 - movdqa %xmm8,80(%rsp) -.byte 102,15,56,220,248 - movups 64(%r11),%xmm0 - leaq 64(%r11),%rcx - pshufd $95,%xmm15,%xmm9 - jmp .Lxts_enc_loop6 -.align 32 +.byte 102,15,56,220,241 + movdqa %xmm15,80(%rsp) +.byte 102,15,56,220,249 + pxor %xmm14,%xmm14 + pcmpgtd %xmm15,%xmm14 + jmp .Lxts_enc_loop6_enter + +.align 16 .Lxts_enc_loop6: .byte 102,15,56,220,209 .byte 102,15,56,220,217 + decl %eax .byte 102,15,56,220,225 .byte 102,15,56,220,233 .byte 102,15,56,220,241 .byte 102,15,56,220,249 +.Lxts_enc_loop6_enter: movups 16(%rcx),%xmm1 - leaq 32(%rcx),%rcx - .byte 102,15,56,220,208 .byte 102,15,56,220,216 + leaq 32(%rcx),%rcx .byte 102,15,56,220,224 .byte 102,15,56,220,232 .byte 102,15,56,220,240 .byte 102,15,56,220,248 movups (%rcx),%xmm0 - decl %eax jnz .Lxts_enc_loop6 - movdqa (%r8),%xmm8 - movdqa %xmm9,%xmm14 - paddd %xmm9,%xmm9 -.byte 102,15,56,220,209 + pshufd $19,%xmm14,%xmm9 + pxor %xmm14,%xmm14 paddq %xmm15,%xmm15 - psrad $31,%xmm14 +.byte 102,15,56,220,209 + pand %xmm8,%xmm9 .byte 102,15,56,220,217 - pand %xmm8,%xmm14 - movups (%r11),%xmm10 + pcmpgtd %xmm15,%xmm14 .byte 102,15,56,220,225 + pxor %xmm9,%xmm15 .byte 102,15,56,220,233 - pxor %xmm14,%xmm15 .byte 102,15,56,220,241 - movaps %xmm10,%xmm11 .byte 102,15,56,220,249 movups 16(%rcx),%xmm1 - movdqa %xmm9,%xmm14 - paddd %xmm9,%xmm9 + pshufd $19,%xmm14,%xmm9 + pxor %xmm14,%xmm14 + movdqa %xmm15,%xmm10 + paddq %xmm15,%xmm15 .byte 102,15,56,220,208 - pxor %xmm15,%xmm10 - psrad $31,%xmm14 + pand %xmm8,%xmm9 .byte 102,15,56,220,216 - paddq %xmm15,%xmm15 - pand %xmm8,%xmm14 + pcmpgtd %xmm15,%xmm14 .byte 102,15,56,220,224 + pxor %xmm9,%xmm15 .byte 102,15,56,220,232 - pxor %xmm14,%xmm15 .byte 102,15,56,220,240 - movaps %xmm11,%xmm12 .byte 102,15,56,220,248 movups 32(%rcx),%xmm0 - movdqa %xmm9,%xmm14 - paddd %xmm9,%xmm9 + pshufd $19,%xmm14,%xmm9 + pxor %xmm14,%xmm14 + movdqa %xmm15,%xmm11 + paddq %xmm15,%xmm15 .byte 102,15,56,220,209 - pxor %xmm15,%xmm11 - psrad $31,%xmm14 + pand %xmm8,%xmm9 .byte 102,15,56,220,217 - paddq %xmm15,%xmm15 - pand %xmm8,%xmm14 + pcmpgtd %xmm15,%xmm14 .byte 102,15,56,220,225 - movdqa %xmm13,48(%rsp) + pxor %xmm9,%xmm15 .byte 102,15,56,220,233 - pxor %xmm14,%xmm15 .byte 102,15,56,220,241 - movaps %xmm12,%xmm13 .byte 102,15,56,220,249 - movups 48(%rcx),%xmm1 - - movdqa %xmm9,%xmm14 - paddd %xmm9,%xmm9 -.byte 102,15,56,220,208 - pxor %xmm15,%xmm12 - psrad $31,%xmm14 -.byte 102,15,56,220,216 - paddq %xmm15,%xmm15 - pand %xmm8,%xmm14 -.byte 102,15,56,220,224 -.byte 102,15,56,220,232 - pxor %xmm14,%xmm15 -.byte 102,15,56,220,240 - movaps %xmm13,%xmm14 -.byte 102,15,56,220,248 - movdqa %xmm9,%xmm0 - paddd %xmm9,%xmm9 -.byte 102,15,56,220,209 - pxor %xmm15,%xmm13 - psrad $31,%xmm0 -.byte 102,15,56,220,217 + pshufd $19,%xmm14,%xmm9 + pxor %xmm14,%xmm14 + movdqa %xmm15,%xmm12 paddq %xmm15,%xmm15 - pand %xmm8,%xmm0 -.byte 102,15,56,220,225 -.byte 102,15,56,220,233 - pxor %xmm0,%xmm15 - movups (%r11),%xmm0 -.byte 102,15,56,220,241 -.byte 102,15,56,220,249 - movups 16(%r11),%xmm1 +.byte 102,15,56,221,208 + pand %xmm8,%xmm9 +.byte 102,15,56,221,216 + pcmpgtd %xmm15,%xmm14 +.byte 102,15,56,221,224 + pxor %xmm9,%xmm15 +.byte 102,15,56,221,232 +.byte 102,15,56,221,240 +.byte 102,15,56,221,248 - pxor %xmm15,%xmm14 - psrad $31,%xmm9 -.byte 102,15,56,221,84,36,0 + pshufd $19,%xmm14,%xmm9 + pxor %xmm14,%xmm14 + movdqa %xmm15,%xmm13 paddq %xmm15,%xmm15 + xorps 0(%rsp),%xmm2 pand %xmm8,%xmm9 -.byte 102,15,56,221,92,36,16 -.byte 102,15,56,221,100,36,32 + xorps 16(%rsp),%xmm3 + pcmpgtd %xmm15,%xmm14 pxor %xmm9,%xmm15 -.byte 102,15,56,221,108,36,48 -.byte 102,15,56,221,116,36,64 -.byte 102,15,56,221,124,36,80 - movl %r10d,%eax + xorps 32(%rsp),%xmm4 + movups %xmm2,0(%rsi) + xorps 48(%rsp),%xmm5 + movups %xmm3,16(%rsi) + xorps 64(%rsp),%xmm6 + movups %xmm4,32(%rsi) + xorps 80(%rsp),%xmm7 + movups %xmm5,48(%rsi) + movl %r10d,%eax + movups %xmm6,64(%rsi) + movups %xmm7,80(%rsi) leaq 96(%rsi),%rsi - movups %xmm2,-96(%rsi) - movups %xmm3,-80(%rsi) - movups %xmm4,-64(%rsi) - movups %xmm5,-48(%rsi) - movups %xmm6,-32(%rsi) - movups %xmm7,-16(%rsi) subq $96,%rdx jnc .Lxts_enc_grandloop - leal 7(%rax,%rax,1),%eax + leal 3(%rax,%rax,1),%eax movq %r11,%rcx movl %eax,%r10d .Lxts_enc_short: - pxor %xmm0,%xmm10 addq $96,%rdx jz .Lxts_enc_done - pxor %xmm0,%xmm11 cmpq $32,%rdx jb .Lxts_enc_one - pxor %xmm0,%xmm12 je .Lxts_enc_two - pxor %xmm0,%xmm13 cmpq $64,%rdx jb .Lxts_enc_three - pxor %xmm0,%xmm14 je .Lxts_enc_four + pshufd $19,%xmm14,%xmm9 + movdqa %xmm15,%xmm14 + paddq %xmm15,%xmm15 movdqu (%rdi),%xmm2 + pand %xmm8,%xmm9 movdqu 16(%rdi),%xmm3 + pxor %xmm9,%xmm15 + movdqu 32(%rdi),%xmm4 pxor %xmm10,%xmm2 movdqu 48(%rdi),%xmm5 @@ -1676,7 +1450,7 @@ aesni_xts_encrypt: decl %eax movups (%rcx),%xmm1 leaq 16(%rcx),%rcx - jnz .Loop_enc1_9 + jnz .Loop_enc1_9 .byte 102,15,56,221,209 xorps %xmm10,%xmm2 movdqa %xmm11,%xmm10 @@ -1738,15 +1512,15 @@ aesni_xts_encrypt: call _aesni_encrypt4 - pxor %xmm10,%xmm2 - movdqa %xmm14,%xmm10 - pxor %xmm11,%xmm3 - pxor %xmm12,%xmm4 - movdqu %xmm2,(%rsi) - pxor %xmm13,%xmm5 - movdqu %xmm3,16(%rsi) - movdqu %xmm4,32(%rsi) - movdqu %xmm5,48(%rsi) + xorps %xmm10,%xmm2 + movdqa %xmm15,%xmm10 + xorps %xmm11,%xmm3 + xorps %xmm12,%xmm4 + movups %xmm2,(%rsi) + xorps %xmm13,%xmm5 + movups %xmm3,16(%rsi) + movups %xmm4,32(%rsi) + movups %xmm5,48(%rsi) leaq 64(%rsi),%rsi jmp .Lxts_enc_done @@ -1781,14 +1555,13 @@ aesni_xts_encrypt: decl %eax movups (%rcx),%xmm1 leaq 16(%rcx),%rcx - jnz .Loop_enc1_10 + jnz .Loop_enc1_10 .byte 102,15,56,221,209 xorps %xmm10,%xmm2 movups %xmm2,-16(%rsi) .Lxts_enc_ret: - leaq (%rbp),%rsp - popq %rbp + leaq 104(%rsp),%rsp .Lxts_enc_epilogue: .byte 0xf3,0xc3 .size aesni_xts_encrypt,.-aesni_xts_encrypt @@ -1796,11 +1569,7 @@ aesni_xts_encrypt: .type aesni_xts_decrypt,@function .align 16 aesni_xts_decrypt: - leaq (%rsp),%rax - pushq %rbp - subq $112,%rsp - andq $-16,%rsp - leaq -8(%rax),%rbp + leaq -104(%rsp),%rsp movups (%r9),%xmm15 movl 240(%r8),%eax movl 240(%rcx),%r10d @@ -1813,7 +1582,7 @@ aesni_xts_decrypt: decl %eax movups (%r8),%xmm1 leaq 16(%r8),%r8 - jnz .Loop_enc1_11 + jnz .Loop_enc1_11 .byte 102,68,15,56,221,249 xorl %eax,%eax testq $15,%rdx @@ -1821,266 +1590,228 @@ aesni_xts_decrypt: shlq $4,%rax subq %rax,%rdx - movups (%rcx),%xmm0 movq %rcx,%r11 movl %r10d,%eax - shll $4,%r10d movq %rdx,%r9 andq $-16,%rdx - movups 16(%rcx,%r10,1),%xmm1 - movl %eax,%r10d - movdqa .Lxts_magic(%rip),%xmm8 - pshufd $95,%xmm15,%xmm9 - pxor %xmm0,%xmm1 - movdqa %xmm9,%xmm14 - paddd %xmm9,%xmm9 + pxor %xmm14,%xmm14 + pcmpgtd %xmm15,%xmm14 + pshufd $19,%xmm14,%xmm9 + pxor %xmm14,%xmm14 movdqa %xmm15,%xmm10 - psrad $31,%xmm14 paddq %xmm15,%xmm15 - pand %xmm8,%xmm14 - pxor %xmm0,%xmm10 - pxor %xmm14,%xmm15 - movdqa %xmm9,%xmm14 - paddd %xmm9,%xmm9 + pand %xmm8,%xmm9 + pcmpgtd %xmm15,%xmm14 + pxor %xmm9,%xmm15 + pshufd $19,%xmm14,%xmm9 + pxor %xmm14,%xmm14 movdqa %xmm15,%xmm11 - psrad $31,%xmm14 paddq %xmm15,%xmm15 - pand %xmm8,%xmm14 - pxor %xmm0,%xmm11 - pxor %xmm14,%xmm15 - movdqa %xmm9,%xmm14 - paddd %xmm9,%xmm9 + pand %xmm8,%xmm9 + pcmpgtd %xmm15,%xmm14 + pxor %xmm9,%xmm15 + pshufd $19,%xmm14,%xmm9 + pxor %xmm14,%xmm14 movdqa %xmm15,%xmm12 - psrad $31,%xmm14 paddq %xmm15,%xmm15 - pand %xmm8,%xmm14 - pxor %xmm0,%xmm12 - pxor %xmm14,%xmm15 - movdqa %xmm9,%xmm14 - paddd %xmm9,%xmm9 + pand %xmm8,%xmm9 + pcmpgtd %xmm15,%xmm14 + pxor %xmm9,%xmm15 + pshufd $19,%xmm14,%xmm9 + pxor %xmm14,%xmm14 movdqa %xmm15,%xmm13 - psrad $31,%xmm14 - paddq %xmm15,%xmm15 - pand %xmm8,%xmm14 - pxor %xmm0,%xmm13 - pxor %xmm14,%xmm15 - movdqa %xmm15,%xmm14 - psrad $31,%xmm9 paddq %xmm15,%xmm15 pand %xmm8,%xmm9 - pxor %xmm0,%xmm14 + pcmpgtd %xmm15,%xmm14 pxor %xmm9,%xmm15 - movaps %xmm1,96(%rsp) - subq $96,%rdx jc .Lxts_dec_short shrl $1,%eax - subl $3,%eax - movups 16(%r11),%xmm1 + subl $1,%eax movl %eax,%r10d - leaq .Lxts_magic(%rip),%r8 jmp .Lxts_dec_grandloop -.align 32 +.align 16 .Lxts_dec_grandloop: + pshufd $19,%xmm14,%xmm9 + movdqa %xmm15,%xmm14 + paddq %xmm15,%xmm15 movdqu 0(%rdi),%xmm2 - movdqa %xmm0,%xmm8 + pand %xmm8,%xmm9 movdqu 16(%rdi),%xmm3 - pxor %xmm10,%xmm2 + pxor %xmm9,%xmm15 + movdqu 32(%rdi),%xmm4 - pxor %xmm11,%xmm3 -.byte 102,15,56,222,209 + pxor %xmm10,%xmm2 movdqu 48(%rdi),%xmm5 - pxor %xmm12,%xmm4 -.byte 102,15,56,222,217 + pxor %xmm11,%xmm3 movdqu 64(%rdi),%xmm6 - pxor %xmm13,%xmm5 -.byte 102,15,56,222,225 + pxor %xmm12,%xmm4 movdqu 80(%rdi),%xmm7 - pxor %xmm15,%xmm8 - movdqa 96(%rsp),%xmm9 - pxor %xmm14,%xmm6 -.byte 102,15,56,222,233 - movups 32(%r11),%xmm0 leaq 96(%rdi),%rdi - pxor %xmm8,%xmm7 + pxor %xmm13,%xmm5 + movups (%r11),%xmm0 + pxor %xmm14,%xmm6 + pxor %xmm15,%xmm7 - pxor %xmm9,%xmm10 -.byte 102,15,56,222,241 - pxor %xmm9,%xmm11 - movdqa %xmm10,0(%rsp) -.byte 102,15,56,222,249 - movups 48(%r11),%xmm1 -.byte 102,15,56,222,208 - pxor %xmm9,%xmm12 + + movups 16(%r11),%xmm1 + pxor %xmm0,%xmm2 + pxor %xmm0,%xmm3 + movdqa %xmm10,0(%rsp) +.byte 102,15,56,222,209 + leaq 32(%r11),%rcx + pxor %xmm0,%xmm4 movdqa %xmm11,16(%rsp) -.byte 102,15,56,222,216 - pxor %xmm9,%xmm13 +.byte 102,15,56,222,217 + pxor %xmm0,%xmm5 movdqa %xmm12,32(%rsp) -.byte 102,15,56,222,224 - pxor %xmm9,%xmm14 -.byte 102,15,56,222,232 - pxor %xmm9,%xmm8 +.byte 102,15,56,222,225 + pxor %xmm0,%xmm6 + movdqa %xmm13,48(%rsp) +.byte 102,15,56,222,233 + pxor %xmm0,%xmm7 + movups (%rcx),%xmm0 + decl %eax movdqa %xmm14,64(%rsp) -.byte 102,15,56,222,240 - movdqa %xmm8,80(%rsp) -.byte 102,15,56,222,248 - movups 64(%r11),%xmm0 - leaq 64(%r11),%rcx - pshufd $95,%xmm15,%xmm9 - jmp .Lxts_dec_loop6 -.align 32 +.byte 102,15,56,222,241 + movdqa %xmm15,80(%rsp) +.byte 102,15,56,222,249 + pxor %xmm14,%xmm14 + pcmpgtd %xmm15,%xmm14 + jmp .Lxts_dec_loop6_enter + +.align 16 .Lxts_dec_loop6: .byte 102,15,56,222,209 .byte 102,15,56,222,217 + decl %eax .byte 102,15,56,222,225 .byte 102,15,56,222,233 .byte 102,15,56,222,241 .byte 102,15,56,222,249 +.Lxts_dec_loop6_enter: movups 16(%rcx),%xmm1 - leaq 32(%rcx),%rcx - .byte 102,15,56,222,208 .byte 102,15,56,222,216 + leaq 32(%rcx),%rcx .byte 102,15,56,222,224 .byte 102,15,56,222,232 .byte 102,15,56,222,240 .byte 102,15,56,222,248 movups (%rcx),%xmm0 - decl %eax jnz .Lxts_dec_loop6 - movdqa (%r8),%xmm8 - movdqa %xmm9,%xmm14 - paddd %xmm9,%xmm9 -.byte 102,15,56,222,209 + pshufd $19,%xmm14,%xmm9 + pxor %xmm14,%xmm14 paddq %xmm15,%xmm15 - psrad $31,%xmm14 +.byte 102,15,56,222,209 + pand %xmm8,%xmm9 .byte 102,15,56,222,217 - pand %xmm8,%xmm14 - movups (%r11),%xmm10 + pcmpgtd %xmm15,%xmm14 .byte 102,15,56,222,225 + pxor %xmm9,%xmm15 .byte 102,15,56,222,233 - pxor %xmm14,%xmm15 .byte 102,15,56,222,241 - movaps %xmm10,%xmm11 .byte 102,15,56,222,249 movups 16(%rcx),%xmm1 - movdqa %xmm9,%xmm14 - paddd %xmm9,%xmm9 + pshufd $19,%xmm14,%xmm9 + pxor %xmm14,%xmm14 + movdqa %xmm15,%xmm10 + paddq %xmm15,%xmm15 .byte 102,15,56,222,208 - pxor %xmm15,%xmm10 - psrad $31,%xmm14 + pand %xmm8,%xmm9 .byte 102,15,56,222,216 - paddq %xmm15,%xmm15 - pand %xmm8,%xmm14 + pcmpgtd %xmm15,%xmm14 .byte 102,15,56,222,224 + pxor %xmm9,%xmm15 .byte 102,15,56,222,232 - pxor %xmm14,%xmm15 .byte 102,15,56,222,240 - movaps %xmm11,%xmm12 .byte 102,15,56,222,248 movups 32(%rcx),%xmm0 - movdqa %xmm9,%xmm14 - paddd %xmm9,%xmm9 + pshufd $19,%xmm14,%xmm9 + pxor %xmm14,%xmm14 + movdqa %xmm15,%xmm11 + paddq %xmm15,%xmm15 .byte 102,15,56,222,209 - pxor %xmm15,%xmm11 - psrad $31,%xmm14 + pand %xmm8,%xmm9 .byte 102,15,56,222,217 - paddq %xmm15,%xmm15 - pand %xmm8,%xmm14 + pcmpgtd %xmm15,%xmm14 .byte 102,15,56,222,225 - movdqa %xmm13,48(%rsp) + pxor %xmm9,%xmm15 .byte 102,15,56,222,233 - pxor %xmm14,%xmm15 .byte 102,15,56,222,241 - movaps %xmm12,%xmm13 .byte 102,15,56,222,249 - movups 48(%rcx),%xmm1 - movdqa %xmm9,%xmm14 - paddd %xmm9,%xmm9 -.byte 102,15,56,222,208 - pxor %xmm15,%xmm12 - psrad $31,%xmm14 -.byte 102,15,56,222,216 + pshufd $19,%xmm14,%xmm9 + pxor %xmm14,%xmm14 + movdqa %xmm15,%xmm12 paddq %xmm15,%xmm15 - pand %xmm8,%xmm14 -.byte 102,15,56,222,224 -.byte 102,15,56,222,232 - pxor %xmm14,%xmm15 -.byte 102,15,56,222,240 - movaps %xmm13,%xmm14 -.byte 102,15,56,222,248 - - movdqa %xmm9,%xmm0 - paddd %xmm9,%xmm9 -.byte 102,15,56,222,209 - pxor %xmm15,%xmm13 - psrad $31,%xmm0 -.byte 102,15,56,222,217 - paddq %xmm15,%xmm15 - pand %xmm8,%xmm0 -.byte 102,15,56,222,225 -.byte 102,15,56,222,233 - pxor %xmm0,%xmm15 - movups (%r11),%xmm0 -.byte 102,15,56,222,241 -.byte 102,15,56,222,249 - movups 16(%r11),%xmm1 +.byte 102,15,56,223,208 + pand %xmm8,%xmm9 +.byte 102,15,56,223,216 + pcmpgtd %xmm15,%xmm14 +.byte 102,15,56,223,224 + pxor %xmm9,%xmm15 +.byte 102,15,56,223,232 +.byte 102,15,56,223,240 +.byte 102,15,56,223,248 - pxor %xmm15,%xmm14 - psrad $31,%xmm9 -.byte 102,15,56,223,84,36,0 + pshufd $19,%xmm14,%xmm9 + pxor %xmm14,%xmm14 + movdqa %xmm15,%xmm13 paddq %xmm15,%xmm15 + xorps 0(%rsp),%xmm2 pand %xmm8,%xmm9 -.byte 102,15,56,223,92,36,16 -.byte 102,15,56,223,100,36,32 + xorps 16(%rsp),%xmm3 + pcmpgtd %xmm15,%xmm14 pxor %xmm9,%xmm15 -.byte 102,15,56,223,108,36,48 -.byte 102,15,56,223,116,36,64 -.byte 102,15,56,223,124,36,80 - movl %r10d,%eax + xorps 32(%rsp),%xmm4 + movups %xmm2,0(%rsi) + xorps 48(%rsp),%xmm5 + movups %xmm3,16(%rsi) + xorps 64(%rsp),%xmm6 + movups %xmm4,32(%rsi) + xorps 80(%rsp),%xmm7 + movups %xmm5,48(%rsi) + movl %r10d,%eax + movups %xmm6,64(%rsi) + movups %xmm7,80(%rsi) leaq 96(%rsi),%rsi - movups %xmm2,-96(%rsi) - movups %xmm3,-80(%rsi) - movups %xmm4,-64(%rsi) - movups %xmm5,-48(%rsi) - movups %xmm6,-32(%rsi) - movups %xmm7,-16(%rsi) subq $96,%rdx jnc .Lxts_dec_grandloop - leal 7(%rax,%rax,1),%eax + leal 3(%rax,%rax,1),%eax movq %r11,%rcx movl %eax,%r10d .Lxts_dec_short: - pxor %xmm0,%xmm10 - pxor %xmm0,%xmm11 addq $96,%rdx jz .Lxts_dec_done - pxor %xmm0,%xmm12 cmpq $32,%rdx jb .Lxts_dec_one - pxor %xmm0,%xmm13 je .Lxts_dec_two - pxor %xmm0,%xmm14 cmpq $64,%rdx jb .Lxts_dec_three je .Lxts_dec_four + pshufd $19,%xmm14,%xmm9 + movdqa %xmm15,%xmm14 + paddq %xmm15,%xmm15 movdqu (%rdi),%xmm2 + pand %xmm8,%xmm9 movdqu 16(%rdi),%xmm3 + pxor %xmm9,%xmm15 + movdqu 32(%rdi),%xmm4 pxor %xmm10,%xmm2 movdqu 48(%rdi),%xmm5 @@ -2130,7 +1861,7 @@ aesni_xts_decrypt: decl %eax movups (%rcx),%xmm1 leaq 16(%rcx),%rcx - jnz .Loop_dec1_12 + jnz .Loop_dec1_12 .byte 102,15,56,223,209 xorps %xmm10,%xmm2 movdqa %xmm11,%xmm10 @@ -2173,7 +1904,7 @@ aesni_xts_decrypt: xorps %xmm10,%xmm2 movdqa %xmm13,%xmm10 xorps %xmm11,%xmm3 - movdqa %xmm14,%xmm11 + movdqa %xmm15,%xmm11 xorps %xmm12,%xmm4 movups %xmm2,(%rsi) movups %xmm3,16(%rsi) @@ -2183,8 +1914,14 @@ aesni_xts_decrypt: .align 16 .Lxts_dec_four: + pshufd $19,%xmm14,%xmm9 + movdqa %xmm15,%xmm14 + paddq %xmm15,%xmm15 movups (%rdi),%xmm2 + pand %xmm8,%xmm9 movups 16(%rdi),%xmm3 + pxor %xmm9,%xmm15 + movups 32(%rdi),%xmm4 xorps %xmm10,%xmm2 movups 48(%rdi),%xmm5 @@ -2195,16 +1932,16 @@ aesni_xts_decrypt: call _aesni_decrypt4 - pxor %xmm10,%xmm2 + xorps %xmm10,%xmm2 movdqa %xmm14,%xmm10 - pxor %xmm11,%xmm3 + xorps %xmm11,%xmm3 movdqa %xmm15,%xmm11 - pxor %xmm12,%xmm4 - movdqu %xmm2,(%rsi) - pxor %xmm13,%xmm5 - movdqu %xmm3,16(%rsi) - movdqu %xmm4,32(%rsi) - movdqu %xmm5,48(%rsi) + xorps %xmm12,%xmm4 + movups %xmm2,(%rsi) + xorps %xmm13,%xmm5 + movups %xmm3,16(%rsi) + movups %xmm4,32(%rsi) + movups %xmm5,48(%rsi) leaq 64(%rsi),%rsi jmp .Lxts_dec_done @@ -2228,7 +1965,7 @@ aesni_xts_decrypt: decl %eax movups (%rcx),%xmm1 leaq 16(%rcx),%rcx - jnz .Loop_dec1_13 + jnz .Loop_dec1_13 .byte 102,15,56,223,209 xorps %xmm11,%xmm2 movups %xmm2,(%rsi) @@ -2258,14 +1995,13 @@ aesni_xts_decrypt: decl %eax movups (%rcx),%xmm1 leaq 16(%rcx),%rcx - jnz .Loop_dec1_14 + jnz .Loop_dec1_14 .byte 102,15,56,223,209 xorps %xmm10,%xmm2 movups %xmm2,(%rsi) .Lxts_dec_ret: - leaq (%rbp),%rsp - popq %rbp + leaq 104(%rsp),%rsp .Lxts_dec_epilogue: .byte 0xf3,0xc3 .size aesni_xts_decrypt,.-aesni_xts_decrypt @@ -2302,7 +2038,7 @@ aesni_cbc_encrypt: decl %eax movups (%rcx),%xmm1 leaq 16(%rcx),%rcx - jnz .Loop_enc1_15 + jnz .Loop_enc1_15 .byte 102,15,56,221,209 movl %r10d,%eax movq %r11,%rcx @@ -2318,338 +2054,163 @@ aesni_cbc_encrypt: .Lcbc_enc_tail: movq %rdx,%rcx xchgq %rdi,%rsi -.long 0x9066A4F3 +.long 0x9066A4F3 movl $16,%ecx subq %rdx,%rcx xorl %eax,%eax -.long 0x9066AAF3 +.long 0x9066AAF3 leaq -16(%rdi),%rdi movl %r10d,%eax movq %rdi,%rsi movq %r11,%rcx xorq %rdx,%rdx - jmp .Lcbc_enc_loop + jmp .Lcbc_enc_loop .align 16 .Lcbc_decrypt: - leaq (%rsp),%rax - pushq %rbp - subq $16,%rsp - andq $-16,%rsp - leaq -8(%rax),%rbp - movups (%r8),%xmm10 + movups (%r8),%xmm9 movl %r10d,%eax - cmpq $80,%rdx - jbe .Lcbc_dec_tail - - movups (%rcx),%xmm0 - movdqu 0(%rdi),%xmm2 - movdqu 16(%rdi),%xmm3 - movdqa %xmm2,%xmm11 - movdqu 32(%rdi),%xmm4 - movdqa %xmm3,%xmm12 - movdqu 48(%rdi),%xmm5 - movdqa %xmm4,%xmm13 - movdqu 64(%rdi),%xmm6 - movdqa %xmm5,%xmm14 - movdqu 80(%rdi),%xmm7 - movdqa %xmm6,%xmm15 cmpq $112,%rdx - jbe .Lcbc_dec_six_or_seven - + jbe .Lcbc_dec_tail + shrl $1,%r10d subq $112,%rdx - leaq 112(%rcx),%rcx + movl %r10d,%eax + movaps %xmm9,-24(%rsp) jmp .Lcbc_dec_loop8_enter .align 16 .Lcbc_dec_loop8: + movaps %xmm0,-24(%rsp) movups %xmm9,(%rsi) leaq 16(%rsi),%rsi .Lcbc_dec_loop8_enter: - movdqu 96(%rdi),%xmm8 - pxor %xmm0,%xmm2 - movdqu 112(%rdi),%xmm9 - pxor %xmm0,%xmm3 - movups 16-112(%rcx),%xmm1 - pxor %xmm0,%xmm4 - xorq %r11,%r11 - cmpq $112,%rdx - pxor %xmm0,%xmm5 - pxor %xmm0,%xmm6 - pxor %xmm0,%xmm7 - pxor %xmm0,%xmm8 + movups (%rcx),%xmm0 + movups (%rdi),%xmm2 + movups 16(%rdi),%xmm3 + movups 16(%rcx),%xmm1 + leaq 32(%rcx),%rcx + movdqu 32(%rdi),%xmm4 + xorps %xmm0,%xmm2 + movdqu 48(%rdi),%xmm5 + xorps %xmm0,%xmm3 + movdqu 64(%rdi),%xmm6 .byte 102,15,56,222,209 - pxor %xmm0,%xmm9 - movups 32-112(%rcx),%xmm0 -.byte 102,15,56,222,217 -.byte 102,15,56,222,225 -.byte 102,15,56,222,233 -.byte 102,15,56,222,241 -.byte 102,15,56,222,249 - setnc %r11b -.byte 102,68,15,56,222,193 - shlq $7,%r11 -.byte 102,68,15,56,222,201 - addq %rdi,%r11 - movups 48-112(%rcx),%xmm1 -.byte 102,15,56,222,208 -.byte 102,15,56,222,216 -.byte 102,15,56,222,224 -.byte 102,15,56,222,232 -.byte 102,15,56,222,240 -.byte 102,15,56,222,248 -.byte 102,68,15,56,222,192 -.byte 102,68,15,56,222,200 - movups 64-112(%rcx),%xmm0 -.byte 102,15,56,222,209 -.byte 102,15,56,222,217 -.byte 102,15,56,222,225 -.byte 102,15,56,222,233 -.byte 102,15,56,222,241 -.byte 102,15,56,222,249 -.byte 102,68,15,56,222,193 -.byte 102,68,15,56,222,201 - movups 80-112(%rcx),%xmm1 -.byte 102,15,56,222,208 -.byte 102,15,56,222,216 -.byte 102,15,56,222,224 -.byte 102,15,56,222,232 -.byte 102,15,56,222,240 -.byte 102,15,56,222,248 -.byte 102,68,15,56,222,192 -.byte 102,68,15,56,222,200 - movups 96-112(%rcx),%xmm0 -.byte 102,15,56,222,209 -.byte 102,15,56,222,217 -.byte 102,15,56,222,225 -.byte 102,15,56,222,233 -.byte 102,15,56,222,241 -.byte 102,15,56,222,249 -.byte 102,68,15,56,222,193 -.byte 102,68,15,56,222,201 - movups 112-112(%rcx),%xmm1 -.byte 102,15,56,222,208 -.byte 102,15,56,222,216 -.byte 102,15,56,222,224 -.byte 102,15,56,222,232 -.byte 102,15,56,222,240 -.byte 102,15,56,222,248 -.byte 102,68,15,56,222,192 -.byte 102,68,15,56,222,200 - movups 128-112(%rcx),%xmm0 -.byte 102,15,56,222,209 -.byte 102,15,56,222,217 -.byte 102,15,56,222,225 -.byte 102,15,56,222,233 -.byte 102,15,56,222,241 -.byte 102,15,56,222,249 -.byte 102,68,15,56,222,193 -.byte 102,68,15,56,222,201 - movups 144-112(%rcx),%xmm1 -.byte 102,15,56,222,208 -.byte 102,15,56,222,216 -.byte 102,15,56,222,224 -.byte 102,15,56,222,232 -.byte 102,15,56,222,240 -.byte 102,15,56,222,248 -.byte 102,68,15,56,222,192 -.byte 102,68,15,56,222,200 - movups 160-112(%rcx),%xmm0 - cmpl $11,%eax - jb .Lcbc_dec_done -.byte 102,15,56,222,209 -.byte 102,15,56,222,217 -.byte 102,15,56,222,225 -.byte 102,15,56,222,233 -.byte 102,15,56,222,241 -.byte 102,15,56,222,249 -.byte 102,68,15,56,222,193 -.byte 102,68,15,56,222,201 - movups 176-112(%rcx),%xmm1 -.byte 102,15,56,222,208 -.byte 102,15,56,222,216 -.byte 102,15,56,222,224 -.byte 102,15,56,222,232 -.byte 102,15,56,222,240 -.byte 102,15,56,222,248 -.byte 102,68,15,56,222,192 -.byte 102,68,15,56,222,200 - movups 192-112(%rcx),%xmm0 - je .Lcbc_dec_done -.byte 102,15,56,222,209 -.byte 102,15,56,222,217 -.byte 102,15,56,222,225 -.byte 102,15,56,222,233 -.byte 102,15,56,222,241 -.byte 102,15,56,222,249 -.byte 102,68,15,56,222,193 -.byte 102,68,15,56,222,201 - movups 208-112(%rcx),%xmm1 -.byte 102,15,56,222,208 -.byte 102,15,56,222,216 -.byte 102,15,56,222,224 -.byte 102,15,56,222,232 -.byte 102,15,56,222,240 -.byte 102,15,56,222,248 -.byte 102,68,15,56,222,192 -.byte 102,68,15,56,222,200 - movups 224-112(%rcx),%xmm0 -.Lcbc_dec_done: -.byte 102,15,56,222,209 - pxor %xmm0,%xmm10 + pxor %xmm0,%xmm4 + movdqu 80(%rdi),%xmm7 .byte 102,15,56,222,217 - pxor %xmm0,%xmm11 + pxor %xmm0,%xmm5 + movdqu 96(%rdi),%xmm8 .byte 102,15,56,222,225 - pxor %xmm0,%xmm12 + pxor %xmm0,%xmm6 + movdqu 112(%rdi),%xmm9 .byte 102,15,56,222,233 - pxor %xmm0,%xmm13 + pxor %xmm0,%xmm7 + decl %eax .byte 102,15,56,222,241 - pxor %xmm0,%xmm14 + pxor %xmm0,%xmm8 .byte 102,15,56,222,249 - pxor %xmm0,%xmm15 + pxor %xmm0,%xmm9 + movups (%rcx),%xmm0 .byte 102,68,15,56,222,193 .byte 102,68,15,56,222,201 - movdqu 80(%rdi),%xmm1 - -.byte 102,65,15,56,223,210 - movdqu 96(%rdi),%xmm10 - pxor %xmm0,%xmm1 -.byte 102,65,15,56,223,219 - pxor %xmm0,%xmm10 - movdqu 112(%rdi),%xmm0 - leaq 128(%rdi),%rdi -.byte 102,65,15,56,223,228 - movdqu 0(%r11),%xmm11 -.byte 102,65,15,56,223,237 - movdqu 16(%r11),%xmm12 -.byte 102,65,15,56,223,246 - movdqu 32(%r11),%xmm13 -.byte 102,65,15,56,223,255 - movdqu 48(%r11),%xmm14 -.byte 102,68,15,56,223,193 - movdqu 64(%r11),%xmm15 -.byte 102,69,15,56,223,202 - movdqa %xmm0,%xmm10 - movdqu 80(%r11),%xmm1 - movups -112(%rcx),%xmm0 + movups 16(%rcx),%xmm1 + call .Ldec_loop8_enter + + movups (%rdi),%xmm1 + movups 16(%rdi),%xmm0 + xorps -24(%rsp),%xmm2 + xorps %xmm1,%xmm3 + movups 32(%rdi),%xmm1 + xorps %xmm0,%xmm4 + movups 48(%rdi),%xmm0 + xorps %xmm1,%xmm5 + movups 64(%rdi),%xmm1 + xorps %xmm0,%xmm6 + movups 80(%rdi),%xmm0 + xorps %xmm1,%xmm7 + movups 96(%rdi),%xmm1 + xorps %xmm0,%xmm8 + movups 112(%rdi),%xmm0 + xorps %xmm1,%xmm9 movups %xmm2,(%rsi) - movdqa %xmm11,%xmm2 movups %xmm3,16(%rsi) - movdqa %xmm12,%xmm3 movups %xmm4,32(%rsi) - movdqa %xmm13,%xmm4 movups %xmm5,48(%rsi) - movdqa %xmm14,%xmm5 + movl %r10d,%eax movups %xmm6,64(%rsi) - movdqa %xmm15,%xmm6 + movq %r11,%rcx movups %xmm7,80(%rsi) - movdqa %xmm1,%xmm7 + leaq 128(%rdi),%rdi movups %xmm8,96(%rsi) leaq 112(%rsi),%rsi - subq $128,%rdx ja .Lcbc_dec_loop8 movaps %xmm9,%xmm2 - leaq -112(%rcx),%rcx + movaps %xmm0,%xmm9 addq $112,%rdx jle .Lcbc_dec_tail_collected - movups %xmm9,(%rsi) + movups %xmm2,(%rsi) + leal 1(%r10,%r10,1),%eax leaq 16(%rsi),%rsi - cmpq $80,%rdx - jbe .Lcbc_dec_tail - - movaps %xmm11,%xmm2 -.Lcbc_dec_six_or_seven: - cmpq $96,%rdx - ja .Lcbc_dec_seven - - movaps %xmm7,%xmm8 - call _aesni_decrypt6 - pxor %xmm10,%xmm2 - movaps %xmm8,%xmm10 - pxor %xmm11,%xmm3 - movdqu %xmm2,(%rsi) - pxor %xmm12,%xmm4 - movdqu %xmm3,16(%rsi) - pxor %xmm13,%xmm5 - movdqu %xmm4,32(%rsi) - pxor %xmm14,%xmm6 - movdqu %xmm5,48(%rsi) - pxor %xmm15,%xmm7 - movdqu %xmm6,64(%rsi) - leaq 80(%rsi),%rsi - movdqa %xmm7,%xmm2 - jmp .Lcbc_dec_tail_collected - -.align 16 -.Lcbc_dec_seven: - movups 96(%rdi),%xmm8 - xorps %xmm9,%xmm9 - call _aesni_decrypt8 - movups 80(%rdi),%xmm9 - pxor %xmm10,%xmm2 - movups 96(%rdi),%xmm10 - pxor %xmm11,%xmm3 - movdqu %xmm2,(%rsi) - pxor %xmm12,%xmm4 - movdqu %xmm3,16(%rsi) - pxor %xmm13,%xmm5 - movdqu %xmm4,32(%rsi) - pxor %xmm14,%xmm6 - movdqu %xmm5,48(%rsi) - pxor %xmm15,%xmm7 - movdqu %xmm6,64(%rsi) - pxor %xmm9,%xmm8 - movdqu %xmm7,80(%rsi) - leaq 96(%rsi),%rsi - movdqa %xmm8,%xmm2 - jmp .Lcbc_dec_tail_collected - .Lcbc_dec_tail: movups (%rdi),%xmm2 - subq $16,%rdx + movaps %xmm2,%xmm8 + cmpq $16,%rdx jbe .Lcbc_dec_one movups 16(%rdi),%xmm3 - movaps %xmm2,%xmm11 - subq $16,%rdx + movaps %xmm3,%xmm7 + cmpq $32,%rdx jbe .Lcbc_dec_two movups 32(%rdi),%xmm4 - movaps %xmm3,%xmm12 - subq $16,%rdx + movaps %xmm4,%xmm6 + cmpq $48,%rdx jbe .Lcbc_dec_three movups 48(%rdi),%xmm5 - movaps %xmm4,%xmm13 - subq $16,%rdx + cmpq $64,%rdx jbe .Lcbc_dec_four movups 64(%rdi),%xmm6 - movaps %xmm5,%xmm14 - movaps %xmm6,%xmm15 - xorps %xmm7,%xmm7 - call _aesni_decrypt6 - pxor %xmm10,%xmm2 - movaps %xmm15,%xmm10 - pxor %xmm11,%xmm3 - movdqu %xmm2,(%rsi) - pxor %xmm12,%xmm4 - movdqu %xmm3,16(%rsi) - pxor %xmm13,%xmm5 - movdqu %xmm4,32(%rsi) - pxor %xmm14,%xmm6 - movdqu %xmm5,48(%rsi) - leaq 64(%rsi),%rsi - movdqa %xmm6,%xmm2 - subq $16,%rdx - jmp .Lcbc_dec_tail_collected + cmpq $80,%rdx + jbe .Lcbc_dec_five + movups 80(%rdi),%xmm7 + cmpq $96,%rdx + jbe .Lcbc_dec_six + + movups 96(%rdi),%xmm8 + movaps %xmm9,-24(%rsp) + call _aesni_decrypt8 + movups (%rdi),%xmm1 + movups 16(%rdi),%xmm0 + xorps -24(%rsp),%xmm2 + xorps %xmm1,%xmm3 + movups 32(%rdi),%xmm1 + xorps %xmm0,%xmm4 + movups 48(%rdi),%xmm0 + xorps %xmm1,%xmm5 + movups 64(%rdi),%xmm1 + xorps %xmm0,%xmm6 + movups 80(%rdi),%xmm0 + xorps %xmm1,%xmm7 + movups 96(%rdi),%xmm9 + xorps %xmm0,%xmm8 + movups %xmm2,(%rsi) + movups %xmm3,16(%rsi) + movups %xmm4,32(%rsi) + movups %xmm5,48(%rsi) + movups %xmm6,64(%rsi) + movups %xmm7,80(%rsi) + leaq 96(%rsi),%rsi + movaps %xmm8,%xmm2 + subq $112,%rdx + jmp .Lcbc_dec_tail_collected .align 16 .Lcbc_dec_one: - movaps %xmm2,%xmm11 movups (%rcx),%xmm0 movups 16(%rcx),%xmm1 leaq 32(%rcx),%rcx @@ -2659,71 +2220,113 @@ aesni_cbc_encrypt: decl %eax movups (%rcx),%xmm1 leaq 16(%rcx),%rcx - jnz .Loop_dec1_16 + jnz .Loop_dec1_16 .byte 102,15,56,223,209 - xorps %xmm10,%xmm2 - movaps %xmm11,%xmm10 + xorps %xmm9,%xmm2 + movaps %xmm8,%xmm9 + subq $16,%rdx jmp .Lcbc_dec_tail_collected .align 16 .Lcbc_dec_two: - movaps %xmm3,%xmm12 xorps %xmm4,%xmm4 call _aesni_decrypt3 - pxor %xmm10,%xmm2 - movaps %xmm12,%xmm10 - pxor %xmm11,%xmm3 - movdqu %xmm2,(%rsi) - movdqa %xmm3,%xmm2 + xorps %xmm9,%xmm2 + xorps %xmm8,%xmm3 + movups %xmm2,(%rsi) + movaps %xmm7,%xmm9 + movaps %xmm3,%xmm2 leaq 16(%rsi),%rsi + subq $32,%rdx jmp .Lcbc_dec_tail_collected .align 16 .Lcbc_dec_three: - movaps %xmm4,%xmm13 call _aesni_decrypt3 - pxor %xmm10,%xmm2 - movaps %xmm13,%xmm10 - pxor %xmm11,%xmm3 - movdqu %xmm2,(%rsi) - pxor %xmm12,%xmm4 - movdqu %xmm3,16(%rsi) - movdqa %xmm4,%xmm2 + xorps %xmm9,%xmm2 + xorps %xmm8,%xmm3 + movups %xmm2,(%rsi) + xorps %xmm7,%xmm4 + movups %xmm3,16(%rsi) + movaps %xmm6,%xmm9 + movaps %xmm4,%xmm2 leaq 32(%rsi),%rsi + subq $48,%rdx jmp .Lcbc_dec_tail_collected .align 16 .Lcbc_dec_four: - movaps %xmm5,%xmm14 call _aesni_decrypt4 - pxor %xmm10,%xmm2 - movaps %xmm14,%xmm10 - pxor %xmm11,%xmm3 - movdqu %xmm2,(%rsi) - pxor %xmm12,%xmm4 - movdqu %xmm3,16(%rsi) - pxor %xmm13,%xmm5 - movdqu %xmm4,32(%rsi) - movdqa %xmm5,%xmm2 + xorps %xmm9,%xmm2 + movups 48(%rdi),%xmm9 + xorps %xmm8,%xmm3 + movups %xmm2,(%rsi) + xorps %xmm7,%xmm4 + movups %xmm3,16(%rsi) + xorps %xmm6,%xmm5 + movups %xmm4,32(%rsi) + movaps %xmm5,%xmm2 leaq 48(%rsi),%rsi + subq $64,%rdx + jmp .Lcbc_dec_tail_collected +.align 16 +.Lcbc_dec_five: + xorps %xmm7,%xmm7 + call _aesni_decrypt6 + movups 16(%rdi),%xmm1 + movups 32(%rdi),%xmm0 + xorps %xmm9,%xmm2 + xorps %xmm8,%xmm3 + xorps %xmm1,%xmm4 + movups 48(%rdi),%xmm1 + xorps %xmm0,%xmm5 + movups 64(%rdi),%xmm9 + xorps %xmm1,%xmm6 + movups %xmm2,(%rsi) + movups %xmm3,16(%rsi) + movups %xmm4,32(%rsi) + movups %xmm5,48(%rsi) + leaq 64(%rsi),%rsi + movaps %xmm6,%xmm2 + subq $80,%rdx + jmp .Lcbc_dec_tail_collected +.align 16 +.Lcbc_dec_six: + call _aesni_decrypt6 + movups 16(%rdi),%xmm1 + movups 32(%rdi),%xmm0 + xorps %xmm9,%xmm2 + xorps %xmm8,%xmm3 + xorps %xmm1,%xmm4 + movups 48(%rdi),%xmm1 + xorps %xmm0,%xmm5 + movups 64(%rdi),%xmm0 + xorps %xmm1,%xmm6 + movups 80(%rdi),%xmm9 + xorps %xmm0,%xmm7 + movups %xmm2,(%rsi) + movups %xmm3,16(%rsi) + movups %xmm4,32(%rsi) + movups %xmm5,48(%rsi) + movups %xmm6,64(%rsi) + leaq 80(%rsi),%rsi + movaps %xmm7,%xmm2 + subq $96,%rdx jmp .Lcbc_dec_tail_collected - .align 16 .Lcbc_dec_tail_collected: - movups %xmm10,(%r8) andq $15,%rdx + movups %xmm9,(%r8) jnz .Lcbc_dec_tail_partial movups %xmm2,(%rsi) jmp .Lcbc_dec_ret .align 16 .Lcbc_dec_tail_partial: - movaps %xmm2,(%rsp) + movaps %xmm2,-24(%rsp) movq $16,%rcx movq %rsi,%rdi subq %rdx,%rcx - leaq (%rsp),%rsi -.long 0x9066A4F3 + leaq -24(%rsp),%rsi +.long 0x9066A4F3 .Lcbc_dec_ret: - leaq (%rbp),%rsp - popq %rbp .Lcbc_ret: .byte 0xf3,0xc3 .size aesni_cbc_encrypt,.-aesni_cbc_encrypt @@ -2731,7 +2334,7 @@ aesni_cbc_encrypt: .type aesni_set_decrypt_key,@function .align 16 aesni_set_decrypt_key: -.byte 0x48,0x83,0xEC,0x08 +.byte 0x48,0x83,0xEC,0x08 call __aesni_set_encrypt_key shll $4,%esi testl %eax,%eax @@ -2770,7 +2373,7 @@ aesni_set_decrypt_key: .align 16 aesni_set_encrypt_key: __aesni_set_encrypt_key: -.byte 0x48,0x83,0xEC,0x08 +.byte 0x48,0x83,0xEC,0x08 movq $-1,%rax testq %rdi,%rdi jz .Lenc_key_ret @@ -2966,8 +2569,6 @@ __aesni_set_encrypt_key: .long 1,0,0,0 .Lxts_magic: .long 0x87,0,1,0 -.Lincrement1: -.byte 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1 .byte 65,69,83,32,102,111,114,32,73,110,116,101,108,32,65,69,83,45,78,73,44,32,67,82,89,80,84,79,71,65,77,83,32,98,121,32,60,97,112,112,114,111,64,111,112,101,110,115,115,108,46,111,114,103,62,0 .align 64 diff --git a/lib/accelerated/x86/elf/cpuid-x86_64.s b/lib/accelerated/x86/elf/cpuid-x86_64.s index 626c8f6b46..a6bb30ed7e 100644 --- a/lib/accelerated/x86/elf/cpuid-x86_64.s +++ b/lib/accelerated/x86/elf/cpuid-x86_64.s @@ -1,59 +1,69 @@ +#!/usr/bin/env perl # -# Copyright (C) 2011-2013 Free Software Foundation, Inc. -# Copyright (C) 2013 Nikos Mavrogiannopoulos +# ==================================================================== +# Written by Nikos Mavrogiannopoulos +# Based on e_padlock-x86_64 +# ==================================================================== # -# Author: Nikos Mavrogiannopoulos -# -# This file is part of GnuTLS. -# -# The GnuTLS is free software; you can redistribute it and/or -# modify it under the terms of the GNU Lesser General Public License -# as published by the Free Software Foundation; either version 2.1 of -# the License, or (at your option) any later version. -# -# This library is distributed in the hope that it will be useful, but -# WITHOUT ANY WARRANTY; without even the implied warranty of -# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU -# Lesser General Public License for more details. -# -# You should have received a copy of the GNU Lesser General Public License -# along with this program. If not, see . -# -# *** This file is auto-generated *** -# -.text -.globl gnutls_cpuid -.type gnutls_cpuid,@function + +$flavour = shift; +$output = shift; +if ($flavour =~ /\./) { $output = $flavour; undef $flavour; } + +$win64=0; $win64=1 if ($flavour =~ /[nm]asm|mingw64/ || $output =~ /\.asm$/); + +$0 =~ m/(.*[\/\\])[^\/\\]+$/; $dir=$1; +( $xlate="${dir}x86_64-xlate.pl" and -f $xlate ) or +( $xlate="${dir}../../crypto/perlasm/x86_64-xlate.pl" and -f $xlate) or +die "can't locate x86_64-xlate.pl"; + +open STDOUT,"| $^X $xlate $flavour $output"; + +$code=".text\n"; + +($arg1,$arg2,$arg3,$arg4)=$win64?("%rcx","%rdx","%r8", "%r9") : # Win64 order + ("%rdi","%rsi","%rdx","%rcx"); # Unix order + + +$code.=<<___; +.globl gnutls_cpuid +.type gnutls_cpuid,\@function .align 16 gnutls_cpuid: pushq %rbp - movq %rsp,%rbp + movq %rsp, %rbp pushq %rbx - movl %edi,-12(%rbp) - movq %rsi,-24(%rbp) - movq %rdx,-32(%rbp) - movq %rcx,-40(%rbp) - movq %r8,-48(%rbp) - movl -12(%rbp),%eax - movl %eax,-60(%rbp) - movl -60(%rbp),%eax + movl %edi, -12(%rbp) + movq %rsi, -24(%rbp) + movq %rdx, -32(%rbp) + movq %rcx, -40(%rbp) + movq %r8, -48(%rbp) + movl -12(%rbp), %eax + movl %eax, -60(%rbp) + movl -60(%rbp), %eax cpuid - movl %edx,-56(%rbp) - movl %ecx,%esi - movl %eax,-52(%rbp) - movq -24(%rbp),%rax - movl -52(%rbp),%edx - movl %edx,(%rax) - movq -32(%rbp),%rax - movl %ebx,(%rax) - movq -40(%rbp),%rax - movl %esi,(%rax) - movq -48(%rbp),%rax - movl -56(%rbp),%ecx - movl %ecx,(%rax) + movl %edx, -56(%rbp) + movl %ecx, %esi + movl %eax, -52(%rbp) + movq -24(%rbp), %rax + movl -52(%rbp), %edx + movl %edx, (%rax) + movq -32(%rbp), %rax + movl %ebx, (%rax) + movq -40(%rbp), %rax + movl %esi, (%rax) + movq -48(%rbp), %rax + movl -56(%rbp), %ecx + movl %ecx, (%rax) popq %rbx leave - .byte 0xf3,0xc3 -.size gnutls_cpuid,.-gnutls_cpuid + ret +.size gnutls_cpuid,.-gnutls_cpuid +___ + +$code =~ s/\`([^\`]*)\`/eval($1)/gem; + +print $code; + +close STDOUT; -.section .note.GNU-stack,"",%progbits diff --git a/lib/accelerated/x86/elf/e_padlock-x86.s b/lib/accelerated/x86/elf/e_padlock-x86.s index 0b8fc28810..65f32f9601 100644 --- a/lib/accelerated/x86/elf/e_padlock-x86.s +++ b/lib/accelerated/x86/elf/e_padlock-x86.s @@ -187,14 +187,16 @@ padlock_ecb_encrypt: leal 16(%edx),%edx xorl %eax,%eax xorl %ebx,%ebx + cmpl $128,%ecx + jbe .L006ecb_short testl $32,(%edx) - jnz .L006ecb_aligned + jnz .L007ecb_aligned testl $15,%edi setz %al testl $15,%esi setz %bl testl %ebx,%eax - jnz .L006ecb_aligned + jnz .L007ecb_aligned negl %eax movl $512,%ebx notl %eax @@ -206,28 +208,10 @@ padlock_ecb_encrypt: negl %eax andl $511,%ebx leal (%eax,%ebp,1),%esp - movl $512,%eax - cmovzl %eax,%ebx - movl %ebp,%eax - andl $-16,%ebp andl $-16,%esp - movl %eax,16(%ebp) - cmpl %ebx,%ecx - ja .L007ecb_loop - movl %esi,%eax - cmpl %esp,%ebp - cmovel %edi,%eax - addl %ecx,%eax - negl %eax - andl $4095,%eax - cmpl $128,%eax - movl $-128,%eax - cmovael %ebx,%eax - andl %eax,%ebx - jz .L008ecb_unaligned_tail - jmp .L007ecb_loop + jmp .L008ecb_loop .align 16 -.L007ecb_loop: +.L008ecb_loop: movl %edi,(%ebp) movl %esi,4(%ebp) movl %ecx,8(%ebp) @@ -252,8 +236,8 @@ padlock_ecb_encrypt: testl $15,%edi jz .L010ecb_out_aligned movl %ebx,%ecx - leal (%esp),%esi shrl $2,%ecx + leal (%esp),%esi .byte 243,165 subl %ebx,%edi .L010ecb_out_aligned: @@ -263,75 +247,43 @@ padlock_ecb_encrypt: addl %ebx,%esi subl %ebx,%ecx movl $512,%ebx - jz .L011ecb_break - cmpl %ebx,%ecx - jae .L007ecb_loop -.L008ecb_unaligned_tail: - xorl %eax,%eax + jnz .L008ecb_loop cmpl %ebp,%esp - cmovel %ecx,%eax - subl %eax,%esp - movl %edi,%eax - movl %ecx,%ebx - shrl $2,%ecx - leal (%esp),%edi -.byte 243,165 - movl %esp,%esi - movl %eax,%edi - movl %ebx,%ecx - jmp .L007ecb_loop -.align 16 -.L011ecb_break: - cmpl %ebp,%esp - je .L012ecb_done + je .L011ecb_done pxor %xmm0,%xmm0 leal (%esp),%eax -.L013ecb_bzero: +.L012ecb_bzero: movaps %xmm0,(%eax) leal 16(%eax),%eax cmpl %eax,%ebp - ja .L013ecb_bzero -.L012ecb_done: - movl 16(%ebp),%ebp + ja .L012ecb_bzero +.L011ecb_done: leal 24(%ebp),%esp - jmp .L014ecb_exit + jmp .L013ecb_exit .align 16 -.L006ecb_aligned: - leal (%esi,%ecx,1),%ebp - negl %ebp - andl $4095,%ebp +.L006ecb_short: xorl %eax,%eax - cmpl $128,%ebp - movl $127,%ebp - cmovael %eax,%ebp - andl %ecx,%ebp - subl %ebp,%ecx - jz .L015ecb_aligned_tail + leal -24(%esp),%ebp + subl %ecx,%eax + leal (%eax,%ebp,1),%esp + andl $-16,%esp + xorl %ebx,%ebx +.L014ecb_short_copy: + movups (%esi,%ebx,1),%xmm0 + leal 16(%ebx),%ebx + cmpl %ebx,%ecx + movaps %xmm0,-16(%esp,%ebx,1) + ja .L014ecb_short_copy + movl %esp,%esi + movl %ecx,%ebx + jmp .L008ecb_loop +.align 16 +.L007ecb_aligned: leal -16(%edx),%eax leal 16(%edx),%ebx shrl $4,%ecx .byte 243,15,167,200 - testl %ebp,%ebp - jz .L014ecb_exit -.L015ecb_aligned_tail: - movl %ebp,%ecx - leal -24(%esp),%ebp - movl %ebp,%esp - movl %ebp,%eax - subl %ecx,%esp - andl $-16,%ebp - andl $-16,%esp - movl %eax,16(%ebp) - movl %edi,%eax - movl %ecx,%ebx - shrl $2,%ecx - leal (%esp),%edi -.byte 243,165 - movl %esp,%esi - movl %eax,%edi - movl %ebx,%ecx - jmp .L007ecb_loop -.L014ecb_exit: +.L013ecb_exit: movl $1,%eax leal 4(%esp),%esp .L004ecb_abort: @@ -355,17 +307,19 @@ padlock_cbc_encrypt: movl 28(%esp),%edx movl 32(%esp),%ecx testl $15,%edx - jnz .L016cbc_abort + jnz .L015cbc_abort testl $15,%ecx - jnz .L016cbc_abort - leal .Lpadlock_saved_context-.L017cbc_pic_point,%eax + jnz .L015cbc_abort + leal .Lpadlock_saved_context-.L016cbc_pic_point,%eax pushfl cld call _padlock_verify_ctx -.L017cbc_pic_point: +.L016cbc_pic_point: leal 16(%edx),%edx xorl %eax,%eax xorl %ebx,%ebx + cmpl $64,%ecx + jbe .L017cbc_short testl $32,(%edx) jnz .L018cbc_aligned testl $15,%edi @@ -385,25 +339,7 @@ padlock_cbc_encrypt: negl %eax andl $511,%ebx leal (%eax,%ebp,1),%esp - movl $512,%eax - cmovzl %eax,%ebx - movl %ebp,%eax - andl $-16,%ebp andl $-16,%esp - movl %eax,16(%ebp) - cmpl %ebx,%ecx - ja .L019cbc_loop - movl %esi,%eax - cmpl %esp,%ebp - cmovel %edi,%eax - addl %ecx,%eax - negl %eax - andl $4095,%eax - cmpl $64,%eax - movl $-64,%eax - cmovael %ebx,%eax - andl %eax,%ebx - jz .L020cbc_unaligned_tail jmp .L019cbc_loop .align 16 .L019cbc_loop: @@ -415,13 +351,13 @@ padlock_cbc_encrypt: testl $15,%edi cmovnzl %esp,%edi testl $15,%esi - jz .L021cbc_inp_aligned + jz .L020cbc_inp_aligned shrl $2,%ecx .byte 243,165 subl %ebx,%edi movl %ebx,%ecx movl %edi,%esi -.L021cbc_inp_aligned: +.L020cbc_inp_aligned: leal -16(%edx),%eax leal 16(%edx),%ebx shrl $4,%ecx @@ -431,93 +367,61 @@ padlock_cbc_encrypt: movl (%ebp),%edi movl 12(%ebp),%ebx testl $15,%edi - jz .L022cbc_out_aligned + jz .L021cbc_out_aligned movl %ebx,%ecx - leal (%esp),%esi shrl $2,%ecx + leal (%esp),%esi .byte 243,165 subl %ebx,%edi -.L022cbc_out_aligned: +.L021cbc_out_aligned: movl 4(%ebp),%esi movl 8(%ebp),%ecx addl %ebx,%edi addl %ebx,%esi subl %ebx,%ecx movl $512,%ebx - jz .L023cbc_break - cmpl %ebx,%ecx - jae .L019cbc_loop -.L020cbc_unaligned_tail: - xorl %eax,%eax - cmpl %ebp,%esp - cmovel %ecx,%eax - subl %eax,%esp - movl %edi,%eax - movl %ecx,%ebx - shrl $2,%ecx - leal (%esp),%edi -.byte 243,165 - movl %esp,%esi - movl %eax,%edi - movl %ebx,%ecx - jmp .L019cbc_loop -.align 16 -.L023cbc_break: + jnz .L019cbc_loop cmpl %ebp,%esp - je .L024cbc_done + je .L022cbc_done pxor %xmm0,%xmm0 leal (%esp),%eax -.L025cbc_bzero: +.L023cbc_bzero: movaps %xmm0,(%eax) leal 16(%eax),%eax cmpl %eax,%ebp - ja .L025cbc_bzero -.L024cbc_done: - movl 16(%ebp),%ebp + ja .L023cbc_bzero +.L022cbc_done: leal 24(%ebp),%esp - jmp .L026cbc_exit + jmp .L024cbc_exit .align 16 -.L018cbc_aligned: - leal (%esi,%ecx,1),%ebp - negl %ebp - andl $4095,%ebp +.L017cbc_short: xorl %eax,%eax - cmpl $64,%ebp - movl $63,%ebp - cmovael %eax,%ebp - andl %ecx,%ebp - subl %ebp,%ecx - jz .L027cbc_aligned_tail + leal -24(%esp),%ebp + subl %ecx,%eax + leal (%eax,%ebp,1),%esp + andl $-16,%esp + xorl %ebx,%ebx +.L025cbc_short_copy: + movups (%esi,%ebx,1),%xmm0 + leal 16(%ebx),%ebx + cmpl %ebx,%ecx + movaps %xmm0,-16(%esp,%ebx,1) + ja .L025cbc_short_copy + movl %esp,%esi + movl %ecx,%ebx + jmp .L019cbc_loop +.align 16 +.L018cbc_aligned: leal -16(%edx),%eax leal 16(%edx),%ebx shrl $4,%ecx .byte 243,15,167,208 movaps (%eax),%xmm0 movaps %xmm0,-16(%edx) - testl %ebp,%ebp - jz .L026cbc_exit -.L027cbc_aligned_tail: - movl %ebp,%ecx - leal -24(%esp),%ebp - movl %ebp,%esp - movl %ebp,%eax - subl %ecx,%esp - andl $-16,%ebp - andl $-16,%esp - movl %eax,16(%ebp) - movl %edi,%eax - movl %ecx,%ebx - shrl $2,%ecx - leal (%esp),%edi -.byte 243,165 - movl %esp,%esi - movl %eax,%edi - movl %ebx,%ecx - jmp .L019cbc_loop -.L026cbc_exit: +.L024cbc_exit: movl $1,%eax leal 4(%esp),%esp -.L016cbc_abort: +.L015cbc_abort: popl %edi popl %esi popl %ebx @@ -538,25 +442,25 @@ padlock_cfb_encrypt: movl 28(%esp),%edx movl 32(%esp),%ecx testl $15,%edx - jnz .L028cfb_abort + jnz .L026cfb_abort testl $15,%ecx - jnz .L028cfb_abort - leal .Lpadlock_saved_context-.L029cfb_pic_point,%eax + jnz .L026cfb_abort + leal .Lpadlock_saved_context-.L027cfb_pic_point,%eax pushfl cld call _padlock_verify_ctx -.L029cfb_pic_point: +.L027cfb_pic_point: leal 16(%edx),%edx xorl %eax,%eax xorl %ebx,%ebx testl $32,(%edx) - jnz .L030cfb_aligned + jnz .L028cfb_aligned testl $15,%edi setz %al testl $15,%esi setz %bl testl %ebx,%eax - jnz .L030cfb_aligned + jnz .L028cfb_aligned negl %eax movl $512,%ebx notl %eax @@ -568,15 +472,10 @@ padlock_cfb_encrypt: negl %eax andl $511,%ebx leal (%eax,%ebp,1),%esp - movl $512,%eax - cmovzl %eax,%ebx - movl %ebp,%eax - andl $-16,%ebp andl $-16,%esp - movl %eax,16(%ebp) - jmp .L031cfb_loop + jmp .L029cfb_loop .align 16 -.L031cfb_loop: +.L029cfb_loop: movl %edi,(%ebp) movl %esi,4(%ebp) movl %ecx,8(%ebp) @@ -585,13 +484,13 @@ padlock_cfb_encrypt: testl $15,%edi cmovnzl %esp,%edi testl $15,%esi - jz .L032cfb_inp_aligned + jz .L030cfb_inp_aligned shrl $2,%ecx .byte 243,165 subl %ebx,%edi movl %ebx,%ecx movl %edi,%esi -.L032cfb_inp_aligned: +.L030cfb_inp_aligned: leal -16(%edx),%eax leal 16(%edx),%ebx shrl $4,%ecx @@ -601,45 +500,61 @@ padlock_cfb_encrypt: movl (%ebp),%edi movl 12(%ebp),%ebx testl $15,%edi - jz .L033cfb_out_aligned + jz .L031cfb_out_aligned movl %ebx,%ecx - leal (%esp),%esi shrl $2,%ecx + leal (%esp),%esi .byte 243,165 subl %ebx,%edi -.L033cfb_out_aligned: +.L031cfb_out_aligned: movl 4(%ebp),%esi movl 8(%ebp),%ecx addl %ebx,%edi addl %ebx,%esi subl %ebx,%ecx movl $512,%ebx - jnz .L031cfb_loop + jnz .L029cfb_loop cmpl %ebp,%esp - je .L034cfb_done + je .L032cfb_done pxor %xmm0,%xmm0 leal (%esp),%eax -.L035cfb_bzero: +.L033cfb_bzero: movaps %xmm0,(%eax) leal 16(%eax),%eax cmpl %eax,%ebp - ja .L035cfb_bzero -.L034cfb_done: - movl 16(%ebp),%ebp + ja .L033cfb_bzero +.L032cfb_done: leal 24(%ebp),%esp - jmp .L036cfb_exit + jmp .L034cfb_exit .align 16 -.L030cfb_aligned: +.L035cfb_short: + xorl %eax,%eax + leal -24(%esp),%ebp + subl %ecx,%eax + leal (%eax,%ebp,1),%esp + andl $-16,%esp + xorl %ebx,%ebx +.L036cfb_short_copy: + movups (%esi,%ebx,1),%xmm0 + leal 16(%ebx),%ebx + cmpl %ebx,%ecx + movaps %xmm0,-16(%esp,%ebx,1) + ja .L036cfb_short_copy + movl %esp,%esi + movl %ecx,%ebx + jmp .L029cfb_loop +.align 16 +.L028cfb_aligned: leal -16(%edx),%eax leal 16(%edx),%ebx shrl $4,%ecx .byte 243,15,167,224 movaps (%eax),%xmm0 movaps %xmm0,-16(%edx) -.L036cfb_exit: +.L034cfb_exit: movl $1,%eax leal 4(%esp),%esp -.L028cfb_abort: +.L026cfb_abort: popl %edi popl %esi popl %ebx @@ -690,12 +605,7 @@ padlock_ofb_encrypt: negl %eax andl $511,%ebx leal (%eax,%ebp,1),%esp - movl $512,%eax - cmovzl %eax,%ebx - movl %ebp,%eax - andl $-16,%ebp andl $-16,%esp - movl %eax,16(%ebp) jmp .L040ofb_loop .align 16 .L040ofb_loop: @@ -725,8 +635,8 @@ padlock_ofb_encrypt: testl $15,%edi jz .L042ofb_out_aligned movl %ebx,%ecx - leal (%esp),%esi shrl $2,%ecx + leal (%esp),%esi .byte 243,165 subl %ebx,%edi .L042ofb_out_aligned: @@ -747,10 +657,26 @@ padlock_ofb_encrypt: cmpl %eax,%ebp ja .L044ofb_bzero .L043ofb_done: - movl 16(%ebp),%ebp leal 24(%ebp),%esp jmp .L045ofb_exit .align 16 +.L046ofb_short: + xorl %eax,%eax + leal -24(%esp),%ebp + subl %ecx,%eax + leal (%eax,%ebp,1),%esp + andl $-16,%esp + xorl %ebx,%ebx +.L047ofb_short_copy: + movups (%esi,%ebx,1),%xmm0 + leal 16(%ebx),%ebx + cmpl %ebx,%ecx + movaps %xmm0,-16(%esp,%ebx,1) + ja .L047ofb_short_copy + movl %esp,%esi + movl %ecx,%ebx + jmp .L040ofb_loop +.align 16 .L039ofb_aligned: leal -16(%edx),%eax leal 16(%edx),%ebx @@ -782,14 +708,14 @@ padlock_ctr32_encrypt: movl 28(%esp),%edx movl 32(%esp),%ecx testl $15,%edx - jnz .L046ctr32_abort + jnz .L048ctr32_abort testl $15,%ecx - jnz .L046ctr32_abort - leal .Lpadlock_saved_context-.L047ctr32_pic_point,%eax + jnz .L048ctr32_abort + leal .Lpadlock_saved_context-.L049ctr32_pic_point,%eax pushfl cld call _padlock_verify_ctx -.L047ctr32_pic_point: +.L049ctr32_pic_point: leal 16(%edx),%edx xorl %eax,%eax movq -16(%edx),%mm0 @@ -803,15 +729,10 @@ padlock_ctr32_encrypt: negl %eax andl $511,%ebx leal (%eax,%ebp,1),%esp - movl $512,%eax - cmovzl %eax,%ebx - movl %ebp,%eax - andl $-16,%ebp andl $-16,%esp - movl %eax,16(%ebp) - jmp .L048ctr32_loop + jmp .L050ctr32_loop .align 16 -.L048ctr32_loop: +.L050ctr32_loop: movl %edi,(%ebp) movl %esi,4(%ebp) movl %ecx,8(%ebp) @@ -820,7 +741,7 @@ padlock_ctr32_encrypt: movl -4(%edx),%ecx xorl %edi,%edi movl -8(%edx),%eax -.L049ctr32_prepare: +.L051ctr32_prepare: movl %ecx,12(%esp,%edi,1) bswap %ecx movq %mm0,(%esp,%edi,1) @@ -829,7 +750,7 @@ padlock_ctr32_encrypt: bswap %ecx leal 16(%edi),%edi cmpl %ebx,%edi - jb .L049ctr32_prepare + jb .L051ctr32_prepare movl %ecx,-4(%edx) leal (%esp),%esi leal (%esp),%edi @@ -842,33 +763,32 @@ padlock_ctr32_encrypt: movl 12(%ebp),%ebx movl 4(%ebp),%esi xorl %ecx,%ecx -.L050ctr32_xor: +.L052ctr32_xor: movups (%esi,%ecx,1),%xmm1 leal 16(%ecx),%ecx pxor -16(%esp,%ecx,1),%xmm1 movups %xmm1,-16(%edi,%ecx,1) cmpl %ebx,%ecx - jb .L050ctr32_xor + jb .L052ctr32_xor movl 8(%ebp),%ecx addl %ebx,%edi addl %ebx,%esi subl %ebx,%ecx movl $512,%ebx - jnz .L048ctr32_loop + jnz .L050ctr32_loop pxor %xmm0,%xmm0 leal (%esp),%eax -.L051ctr32_bzero: +.L053ctr32_bzero: movaps %xmm0,(%eax) leal 16(%eax),%eax cmpl %eax,%ebp - ja .L051ctr32_bzero -.L052ctr32_done: - movl 16(%ebp),%ebp + ja .L053ctr32_bzero +.L054ctr32_done: leal 24(%ebp),%esp movl $1,%eax leal 4(%esp),%esp emms -.L046ctr32_abort: +.L048ctr32_abort: popl %edi popl %esi popl %ebx @@ -894,10 +814,10 @@ _win32_segv_handler: movl 4(%esp),%edx movl 12(%esp),%ecx cmpl $3221225477,(%edx) - jne .L053ret + jne .L055ret addl $4,184(%ecx) movl $0,%eax -.L053ret: +.L055ret: ret .size _win32_segv_handler,.-_win32_segv_handler .globl padlock_sha1_oneshot diff --git a/lib/accelerated/x86/elf/e_padlock-x86_64.s b/lib/accelerated/x86/elf/e_padlock-x86_64.s index 1f86a997a8..014ecaf569 100644 --- a/lib/accelerated/x86/elf/e_padlock-x86_64.s +++ b/lib/accelerated/x86/elf/e_padlock-x86_64.s @@ -127,7 +127,7 @@ padlock_aes_block: movq $1,%rcx leaq 32(%rdx),%rbx leaq 16(%rdx),%rdx -.byte 0xf3,0x0f,0xa7,0xc8 +.byte 0xf3,0x0f,0xa7,0xc8 movq %r8,%rbx .byte 0xf3,0xc3 .size padlock_aes_block,.-padlock_aes_block @@ -137,7 +137,7 @@ padlock_aes_block: .align 16 padlock_xstore: movl %esi,%edx -.byte 0x0f,0xa7,0xc0 +.byte 0x0f,0xa7,0xc0 .byte 0xf3,0xc3 .size padlock_xstore,.-padlock_xstore @@ -154,7 +154,7 @@ padlock_sha1_oneshot: movq %rsp,%rdi movl %eax,16(%rsp) xorq %rax,%rax -.byte 0xf3,0x0f,0xa6,0xc8 +.byte 0xf3,0x0f,0xa6,0xc8 movaps (%rsp),%xmm0 movl 16(%rsp),%eax addq $128+8,%rsp @@ -176,7 +176,7 @@ padlock_sha1_blocks: movq %rsp,%rdi movl %eax,16(%rsp) movq $-1,%rax -.byte 0xf3,0x0f,0xa6,0xc8 +.byte 0xf3,0x0f,0xa6,0xc8 movaps (%rsp),%xmm0 movl 16(%rsp),%eax addq $128+8,%rsp @@ -198,7 +198,7 @@ padlock_sha256_oneshot: movq %rsp,%rdi movaps %xmm1,16(%rsp) xorq %rax,%rax -.byte 0xf3,0x0f,0xa6,0xd0 +.byte 0xf3,0x0f,0xa6,0xd0 movaps (%rsp),%xmm0 movaps 16(%rsp),%xmm1 addq $128+8,%rsp @@ -220,7 +220,7 @@ padlock_sha256_blocks: movq %rsp,%rdi movaps %xmm1,16(%rsp) movq $-1,%rax -.byte 0xf3,0x0f,0xa6,0xd0 +.byte 0xf3,0x0f,0xa6,0xd0 movaps (%rsp),%xmm0 movaps 16(%rsp),%xmm1 addq $128+8,%rsp @@ -245,7 +245,7 @@ padlock_sha512_blocks: movaps %xmm1,16(%rsp) movaps %xmm2,32(%rsp) movaps %xmm3,48(%rsp) -.byte 0xf3,0x0f,0xa6,0xe0 +.byte 0xf3,0x0f,0xa6,0xe0 movaps (%rsp),%xmm0 movaps 16(%rsp),%xmm1 movaps 32(%rsp),%xmm2 @@ -276,6 +276,8 @@ padlock_ecb_encrypt: leaq 16(%rdx),%rdx xorl %eax,%eax xorl %ebx,%ebx + cmpq $128,%rcx + jbe .Lecb_short testl $32,(%rdx) jnz .Lecb_aligned testq $15,%rdi @@ -295,21 +297,6 @@ padlock_ecb_encrypt: negq %rax andq $512-1,%rbx leaq (%rax,%rbp,1),%rsp - movq $512,%rax - cmovzq %rax,%rbx - cmpq %rbx,%rcx - ja .Lecb_loop - movq %rsi,%rax - cmpq %rsp,%rbp - cmoveq %rdi,%rax - addq %rcx,%rax - negq %rax - andq $4095,%rax - cmpq $128,%rax - movq $-128,%rax - cmovaeq %rbx,%rax - andq %rax,%rbx - jz .Lecb_unaligned_tail jmp .Lecb_loop .align 16 .Lecb_loop: @@ -325,7 +312,7 @@ padlock_ecb_encrypt: testq $15,%rsi jz .Lecb_inp_aligned shrq $3,%rcx -.byte 0xf3,0x48,0xa5 +.byte 0xf3,0x48,0xa5 subq %rbx,%rdi movq %rbx,%rcx movq %rdi,%rsi @@ -333,15 +320,15 @@ padlock_ecb_encrypt: leaq -16(%rdx),%rax leaq 16(%rdx),%rbx shrq $4,%rcx -.byte 0xf3,0x0f,0xa7,200 +.byte 0xf3,0x0f,0xa7,200 movq %r8,%rdi movq %r11,%rbx testq $15,%rdi jz .Lecb_out_aligned movq %rbx,%rcx - leaq (%rsp),%rsi shrq $3,%rcx -.byte 0xf3,0x48,0xa5 + leaq (%rsp),%rsi +.byte 0xf3,0x48,0xa5 subq %rbx,%rdi .Lecb_out_aligned: movq %r9,%rsi @@ -350,26 +337,9 @@ padlock_ecb_encrypt: addq %rbx,%rsi subq %rbx,%rcx movq $512,%rbx - jz .Lecb_break - cmpq %rbx,%rcx - jae .Lecb_loop -.Lecb_unaligned_tail: - xorl %eax,%eax + jnz .Lecb_loop + cmpq %rsp,%rbp - cmoveq %rcx,%rax - movq %rdi,%r8 - movq %rcx,%rbx - subq %rax,%rsp - shrq $3,%rcx - leaq (%rsp),%rdi -.byte 0xf3,0x48,0xa5 - movq %rsp,%rsi - movq %r8,%rdi - movq %rbx,%rcx - jmp .Lecb_loop -.align 16 -.Lecb_break: - cmpq %rbp,%rsp je .Lecb_done pxor %xmm0,%xmm0 @@ -383,39 +353,26 @@ padlock_ecb_encrypt: .Lecb_done: leaq (%rbp),%rsp jmp .Lecb_exit - +.align 16 +.Lecb_short: + movq %rsp,%rbp + subq %rcx,%rsp + xorq %rbx,%rbx +.Lecb_short_copy: + movups (%rsi,%rbx,1),%xmm0 + leaq 16(%rbx),%rbx + cmpq %rbx,%rcx + movaps %xmm0,-16(%rsp,%rbx,1) + ja .Lecb_short_copy + movq %rsp,%rsi + movq %rcx,%rbx + jmp .Lecb_loop .align 16 .Lecb_aligned: - leaq (%rsi,%rcx,1),%rbp - negq %rbp - andq $4095,%rbp - xorl %eax,%eax - cmpq $128,%rbp - movq $128-1,%rbp - cmovaeq %rax,%rbp - andq %rcx,%rbp - subq %rbp,%rcx - jz .Lecb_aligned_tail leaq -16(%rdx),%rax leaq 16(%rdx),%rbx shrq $4,%rcx -.byte 0xf3,0x0f,0xa7,200 - testq %rbp,%rbp - jz .Lecb_exit - -.Lecb_aligned_tail: - movq %rdi,%r8 - movq %rbp,%rbx - movq %rbp,%rcx - leaq (%rsp),%rbp - subq %rcx,%rsp - shrq $3,%rcx - leaq (%rsp),%rdi -.byte 0xf3,0x48,0xa5 - leaq (%r8),%rdi - leaq (%rsp),%rsi - movq %rbx,%rcx - jmp .Lecb_loop +.byte 0xf3,0x0f,0xa7,200 .Lecb_exit: movl $1,%eax leaq 8(%rsp),%rsp @@ -443,6 +400,8 @@ padlock_cbc_encrypt: leaq 16(%rdx),%rdx xorl %eax,%eax xorl %ebx,%ebx + cmpq $64,%rcx + jbe .Lcbc_short testl $32,(%rdx) jnz .Lcbc_aligned testq $15,%rdi @@ -462,21 +421,6 @@ padlock_cbc_encrypt: negq %rax andq $512-1,%rbx leaq (%rax,%rbp,1),%rsp - movq $512,%rax - cmovzq %rax,%rbx - cmpq %rbx,%rcx - ja .Lcbc_loop - movq %rsi,%rax - cmpq %rsp,%rbp - cmoveq %rdi,%rax - addq %rcx,%rax - negq %rax - andq $4095,%rax - cmpq $64,%rax - movq $-64,%rax - cmovaeq %rbx,%rax - andq %rax,%rbx - jz .Lcbc_unaligned_tail jmp .Lcbc_loop .align 16 .Lcbc_loop: @@ -492,7 +436,7 @@ padlock_cbc_encrypt: testq $15,%rsi jz .Lcbc_inp_aligned shrq $3,%rcx -.byte 0xf3,0x48,0xa5 +.byte 0xf3,0x48,0xa5 subq %rbx,%rdi movq %rbx,%rcx movq %rdi,%rsi @@ -500,7 +444,7 @@ padlock_cbc_encrypt: leaq -16(%rdx),%rax leaq 16(%rdx),%rbx shrq $4,%rcx -.byte 0xf3,0x0f,0xa7,208 +.byte 0xf3,0x0f,0xa7,208 movdqa (%rax),%xmm0 movdqa %xmm0,-16(%rdx) movq %r8,%rdi @@ -508,9 +452,9 @@ padlock_cbc_encrypt: testq $15,%rdi jz .Lcbc_out_aligned movq %rbx,%rcx - leaq (%rsp),%rsi shrq $3,%rcx -.byte 0xf3,0x48,0xa5 + leaq (%rsp),%rsi +.byte 0xf3,0x48,0xa5 subq %rbx,%rdi .Lcbc_out_aligned: movq %r9,%rsi @@ -519,26 +463,9 @@ padlock_cbc_encrypt: addq %rbx,%rsi subq %rbx,%rcx movq $512,%rbx - jz .Lcbc_break - cmpq %rbx,%rcx - jae .Lcbc_loop -.Lcbc_unaligned_tail: - xorl %eax,%eax + jnz .Lcbc_loop + cmpq %rsp,%rbp - cmoveq %rcx,%rax - movq %rdi,%r8 - movq %rcx,%rbx - subq %rax,%rsp - shrq $3,%rcx - leaq (%rsp),%rdi -.byte 0xf3,0x48,0xa5 - movq %rsp,%rsi - movq %r8,%rdi - movq %rbx,%rcx - jmp .Lcbc_loop -.align 16 -.Lcbc_break: - cmpq %rbp,%rsp je .Lcbc_done pxor %xmm0,%xmm0 @@ -552,41 +479,28 @@ padlock_cbc_encrypt: .Lcbc_done: leaq (%rbp),%rsp jmp .Lcbc_exit - +.align 16 +.Lcbc_short: + movq %rsp,%rbp + subq %rcx,%rsp + xorq %rbx,%rbx +.Lcbc_short_copy: + movups (%rsi,%rbx,1),%xmm0 + leaq 16(%rbx),%rbx + cmpq %rbx,%rcx + movaps %xmm0,-16(%rsp,%rbx,1) + ja .Lcbc_short_copy + movq %rsp,%rsi + movq %rcx,%rbx + jmp .Lcbc_loop .align 16 .Lcbc_aligned: - leaq (%rsi,%rcx,1),%rbp - negq %rbp - andq $4095,%rbp - xorl %eax,%eax - cmpq $64,%rbp - movq $64-1,%rbp - cmovaeq %rax,%rbp - andq %rcx,%rbp - subq %rbp,%rcx - jz .Lcbc_aligned_tail leaq -16(%rdx),%rax leaq 16(%rdx),%rbx shrq $4,%rcx -.byte 0xf3,0x0f,0xa7,208 +.byte 0xf3,0x0f,0xa7,208 movdqa (%rax),%xmm0 movdqa %xmm0,-16(%rdx) - testq %rbp,%rbp - jz .Lcbc_exit - -.Lcbc_aligned_tail: - movq %rdi,%r8 - movq %rbp,%rbx - movq %rbp,%rcx - leaq (%rsp),%rbp - subq %rcx,%rsp - shrq $3,%rcx - leaq (%rsp),%rdi -.byte 0xf3,0x48,0xa5 - leaq (%r8),%rdi - leaq (%rsp),%rsi - movq %rbx,%rcx - jmp .Lcbc_loop .Lcbc_exit: movl $1,%eax leaq 8(%rsp),%rsp @@ -633,8 +547,6 @@ padlock_cfb_encrypt: negq %rax andq $512-1,%rbx leaq (%rax,%rbp,1),%rsp - movq $512,%rax - cmovzq %rax,%rbx jmp .Lcfb_loop .align 16 .Lcfb_loop: @@ -650,7 +562,7 @@ padlock_cfb_encrypt: testq $15,%rsi jz .Lcfb_inp_aligned shrq $3,%rcx -.byte 0xf3,0x48,0xa5 +.byte 0xf3,0x48,0xa5 subq %rbx,%rdi movq %rbx,%rcx movq %rdi,%rsi @@ -658,7 +570,7 @@ padlock_cfb_encrypt: leaq -16(%rdx),%rax leaq 16(%rdx),%rbx shrq $4,%rcx -.byte 0xf3,0x0f,0xa7,224 +.byte 0xf3,0x0f,0xa7,224 movdqa (%rax),%xmm0 movdqa %xmm0,-16(%rdx) movq %r8,%rdi @@ -666,9 +578,9 @@ padlock_cfb_encrypt: testq $15,%rdi jz .Lcfb_out_aligned movq %rbx,%rcx - leaq (%rsp),%rsi shrq $3,%rcx -.byte 0xf3,0x48,0xa5 + leaq (%rsp),%rsi +.byte 0xf3,0x48,0xa5 subq %rbx,%rdi .Lcfb_out_aligned: movq %r9,%rsi @@ -678,7 +590,8 @@ padlock_cfb_encrypt: subq %rbx,%rcx movq $512,%rbx jnz .Lcfb_loop - cmpq %rbp,%rsp + + cmpq %rsp,%rbp je .Lcfb_done pxor %xmm0,%xmm0 @@ -692,13 +605,12 @@ padlock_cfb_encrypt: .Lcfb_done: leaq (%rbp),%rsp jmp .Lcfb_exit - .align 16 .Lcfb_aligned: leaq -16(%rdx),%rax leaq 16(%rdx),%rbx shrq $4,%rcx -.byte 0xf3,0x0f,0xa7,224 +.byte 0xf3,0x0f,0xa7,224 movdqa (%rax),%xmm0 movdqa %xmm0,-16(%rdx) .Lcfb_exit: @@ -747,8 +659,6 @@ padlock_ofb_encrypt: negq %rax andq $512-1,%rbx leaq (%rax,%rbp,1),%rsp - movq $512,%rax - cmovzq %rax,%rbx jmp .Lofb_loop .align 16 .Lofb_loop: @@ -764,7 +674,7 @@ padlock_ofb_encrypt: testq $15,%rsi jz .Lofb_inp_aligned shrq $3,%rcx -.byte 0xf3,0x48,0xa5 +.byte 0xf3,0x48,0xa5 subq %rbx,%rdi movq %rbx,%rcx movq %rdi,%rsi @@ -772,7 +682,7 @@ padlock_ofb_encrypt: leaq -16(%rdx),%rax leaq 16(%rdx),%rbx shrq $4,%rcx -.byte 0xf3,0x0f,0xa7,232 +.byte 0xf3,0x0f,0xa7,232 movdqa (%rax),%xmm0 movdqa %xmm0,-16(%rdx) movq %r8,%rdi @@ -780,9 +690,9 @@ padlock_ofb_encrypt: testq $15,%rdi jz .Lofb_out_aligned movq %rbx,%rcx - leaq (%rsp),%rsi shrq $3,%rcx -.byte 0xf3,0x48,0xa5 + leaq (%rsp),%rsi +.byte 0xf3,0x48,0xa5 subq %rbx,%rdi .Lofb_out_aligned: movq %r9,%rsi @@ -792,7 +702,8 @@ padlock_ofb_encrypt: subq %rbx,%rcx movq $512,%rbx jnz .Lofb_loop - cmpq %rbp,%rsp + + cmpq %rsp,%rbp je .Lofb_done pxor %xmm0,%xmm0 @@ -806,13 +717,12 @@ padlock_ofb_encrypt: .Lofb_done: leaq (%rbp),%rsp jmp .Lofb_exit - .align 16 .Lofb_aligned: leaq -16(%rdx),%rax leaq 16(%rdx),%rbx shrq $4,%rcx -.byte 0xf3,0x0f,0xa7,232 +.byte 0xf3,0x0f,0xa7,232 movdqa (%rax),%xmm0 movdqa %xmm0,-16(%rdx) .Lofb_exit: @@ -842,6 +752,8 @@ padlock_ctr32_encrypt: leaq 16(%rdx),%rdx xorl %eax,%eax xorl %ebx,%ebx + cmpq $64,%rcx + jbe .Lctr32_short testl $32,(%rdx) jnz .Lctr32_aligned testq $15,%rdi @@ -861,32 +773,15 @@ padlock_ctr32_encrypt: negq %rax andq $512-1,%rbx leaq (%rax,%rbp,1),%rsp - movq $512,%rax - cmovzq %rax,%rbx .Lctr32_reenter: movl -4(%rdx),%eax bswapl %eax negl %eax andl $31,%eax - movq $512,%rbx + jz .Lctr32_loop shll $4,%eax - cmovzq %rbx,%rax cmpq %rax,%rcx cmovaq %rax,%rbx - cmovbeq %rcx,%rbx - cmpq %rbx,%rcx - ja .Lctr32_loop - movq %rsi,%rax - cmpq %rsp,%rbp - cmoveq %rdi,%rax - addq %rcx,%rax - negq %rax - andq $4095,%rax - cmpq $32,%rax - movq $-32,%rax - cmovaeq %rbx,%rax - andq %rax,%rbx - jz .Lctr32_unaligned_tail jmp .Lctr32_loop .align 16 .Lctr32_loop: @@ -902,7 +797,7 @@ padlock_ctr32_encrypt: testq $15,%rsi jz .Lctr32_inp_aligned shrq $3,%rcx -.byte 0xf3,0x48,0xa5 +.byte 0xf3,0x48,0xa5 subq %rbx,%rdi movq %rbx,%rcx movq %rdi,%rsi @@ -910,23 +805,23 @@ padlock_ctr32_encrypt: leaq -16(%rdx),%rax leaq 16(%rdx),%rbx shrq $4,%rcx -.byte 0xf3,0x0f,0xa7,216 +.byte 0xf3,0x0f,0xa7,216 movl -4(%rdx),%eax testl $4294901760,%eax - jnz .Lctr32_no_carry + jnz .Lctr32_no_corr bswapl %eax addl $65536,%eax bswapl %eax movl %eax,-4(%rdx) -.Lctr32_no_carry: +.Lctr32_no_corr: movq %r8,%rdi movq %r11,%rbx testq $15,%rdi jz .Lctr32_out_aligned movq %rbx,%rcx - leaq (%rsp),%rsi shrq $3,%rcx -.byte 0xf3,0x48,0xa5 + leaq (%rsp),%rsi +.byte 0xf3,0x48,0xa5 subq %rbx,%rdi .Lctr32_out_aligned: movq %r9,%rsi @@ -935,38 +830,9 @@ padlock_ctr32_encrypt: addq %rbx,%rsi subq %rbx,%rcx movq $512,%rbx - jz .Lctr32_break - cmpq %rbx,%rcx - jae .Lctr32_loop - movq %rcx,%rbx - movq %rsi,%rax - cmpq %rsp,%rbp - cmoveq %rdi,%rax - addq %rcx,%rax - negq %rax - andq $4095,%rax - cmpq $32,%rax - movq $-32,%rax - cmovaeq %rbx,%rax - andq %rax,%rbx jnz .Lctr32_loop -.Lctr32_unaligned_tail: - xorl %eax,%eax + cmpq %rsp,%rbp - cmoveq %rcx,%rax - movq %rdi,%r8 - movq %rcx,%rbx - subq %rax,%rsp - shrq $3,%rcx - leaq (%rsp),%rdi -.byte 0xf3,0x48,0xa5 - movq %rsp,%rsi - movq %r8,%rdi - movq %rbx,%rcx - jmp .Lctr32_loop -.align 16 -.Lctr32_break: - cmpq %rbp,%rsp je .Lctr32_done pxor %xmm0,%xmm0 @@ -980,75 +846,56 @@ padlock_ctr32_encrypt: .Lctr32_done: leaq (%rbp),%rsp jmp .Lctr32_exit - +.align 16 +.Lctr32_short: + movq %rsp,%rbp + subq %rcx,%rsp + xorq %rbx,%rbx +.Lctr32_short_copy: + movups (%rsi,%rbx,1),%xmm0 + leaq 16(%rbx),%rbx + cmpq %rbx,%rcx + movaps %xmm0,-16(%rsp,%rbx,1) + ja .Lctr32_short_copy + movq %rsp,%rsi + movq %rcx,%rbx + jmp .Lctr32_reenter .align 16 .Lctr32_aligned: movl -4(%rdx),%eax + movq $1048576,%rbx bswapl %eax + cmpq %rcx,%rbx + cmovaq %rcx,%rbx negl %eax andl $65535,%eax - movq $1048576,%rbx + jz .Lctr32_aligned_loop shll $4,%eax - cmovzq %rbx,%rax cmpq %rax,%rcx cmovaq %rax,%rbx - cmovbeq %rcx,%rbx - jbe .Lctr32_aligned_skip - + jmp .Lctr32_aligned_loop +.align 16 .Lctr32_aligned_loop: + cmpq %rcx,%rbx + cmovaq %rcx,%rbx movq %rcx,%r10 movq %rbx,%rcx movq %rbx,%r11 - leaq -16(%rdx),%rax leaq 16(%rdx),%rbx shrq $4,%rcx -.byte 0xf3,0x0f,0xa7,216 - +.byte 0xf3,0x0f,0xa7,216 movl -4(%rdx),%eax bswapl %eax addl $65536,%eax bswapl %eax movl %eax,-4(%rdx) + movq %r11,%rbx movq %r10,%rcx - subq %r11,%rcx + subq %rbx,%rcx movq $1048576,%rbx - jz .Lctr32_exit - cmpq %rbx,%rcx - jae .Lctr32_aligned_loop - -.Lctr32_aligned_skip: - leaq (%rsi,%rcx,1),%rbp - negq %rbp - andq $4095,%rbp - xorl %eax,%eax - cmpq $32,%rbp - movq $32-1,%rbp - cmovaeq %rax,%rbp - andq %rcx,%rbp - subq %rbp,%rcx - jz .Lctr32_aligned_tail - leaq -16(%rdx),%rax - leaq 16(%rdx),%rbx - shrq $4,%rcx -.byte 0xf3,0x0f,0xa7,216 - testq %rbp,%rbp - jz .Lctr32_exit - -.Lctr32_aligned_tail: - movq %rdi,%r8 - movq %rbp,%rbx - movq %rbp,%rcx - leaq (%rsp),%rbp - subq %rcx,%rsp - shrq $3,%rcx - leaq (%rsp),%rdi -.byte 0xf3,0x48,0xa5 - leaq (%r8),%rdi - leaq (%rsp),%rsi - movq %rbx,%rcx - jmp .Lctr32_loop + jnz .Lctr32_aligned_loop .Lctr32_exit: movl $1,%eax leaq 8(%rsp),%rsp diff --git a/lib/accelerated/x86/elf/ghash-x86_64.s b/lib/accelerated/x86/elf/ghash-x86_64.s index a2c26f955d..f60c95b546 100644 --- a/lib/accelerated/x86/elf/ghash-x86_64.s +++ b/lib/accelerated/x86/elf/ghash-x86_64.s @@ -697,7 +697,6 @@ gcm_ghash_4bit: .type gcm_init_clmul,@function .align 16 gcm_init_clmul: -.L_init_clmul: movdqu (%rsi),%xmm2 pshufd $78,%xmm2,%xmm2 @@ -716,15 +715,15 @@ gcm_init_clmul: pxor %xmm5,%xmm2 - pshufd $78,%xmm2,%xmm6 movdqa %xmm2,%xmm0 - pxor %xmm2,%xmm6 movdqa %xmm0,%xmm1 pshufd $78,%xmm0,%xmm3 + pshufd $78,%xmm2,%xmm4 pxor %xmm0,%xmm3 + pxor %xmm2,%xmm4 .byte 102,15,58,68,194,0 .byte 102,15,58,68,202,17 -.byte 102,15,58,68,222,0 +.byte 102,15,58,68,220,0 pxor %xmm0,%xmm3 pxor %xmm1,%xmm3 @@ -734,134 +733,44 @@ gcm_init_clmul: pxor %xmm3,%xmm1 pxor %xmm4,%xmm0 - movdqa %xmm0,%xmm4 movdqa %xmm0,%xmm3 - psllq $5,%xmm0 - pxor %xmm0,%xmm3 psllq $1,%xmm0 pxor %xmm3,%xmm0 - psllq $57,%xmm0 - movdqa %xmm0,%xmm3 - pslldq $8,%xmm0 - psrldq $8,%xmm3 - pxor %xmm4,%xmm0 - pxor %xmm3,%xmm1 - - - movdqa %xmm0,%xmm4 - psrlq $1,%xmm0 - pxor %xmm4,%xmm1 - pxor %xmm0,%xmm4 - psrlq $5,%xmm0 - pxor %xmm4,%xmm0 - psrlq $1,%xmm0 - pxor %xmm1,%xmm0 - pshufd $78,%xmm2,%xmm3 - pshufd $78,%xmm0,%xmm4 - pxor %xmm2,%xmm3 - movdqu %xmm2,0(%rdi) - pxor %xmm0,%xmm4 - movdqu %xmm0,16(%rdi) -.byte 102,15,58,15,227,8 - movdqu %xmm4,32(%rdi) - movdqa %xmm0,%xmm1 - pshufd $78,%xmm0,%xmm3 - pxor %xmm0,%xmm3 -.byte 102,15,58,68,194,0 -.byte 102,15,58,68,202,17 -.byte 102,15,58,68,222,0 - pxor %xmm0,%xmm3 - pxor %xmm1,%xmm3 - - movdqa %xmm3,%xmm4 - psrldq $8,%xmm3 - pslldq $8,%xmm4 - pxor %xmm3,%xmm1 - pxor %xmm4,%xmm0 - - movdqa %xmm0,%xmm4 - movdqa %xmm0,%xmm3 psllq $5,%xmm0 - pxor %xmm0,%xmm3 - psllq $1,%xmm0 pxor %xmm3,%xmm0 psllq $57,%xmm0 - movdqa %xmm0,%xmm3 + movdqa %xmm0,%xmm4 pslldq $8,%xmm0 - psrldq $8,%xmm3 - pxor %xmm4,%xmm0 - pxor %xmm3,%xmm1 + psrldq $8,%xmm4 + pxor %xmm3,%xmm0 + pxor %xmm4,%xmm1 movdqa %xmm0,%xmm4 - psrlq $1,%xmm0 - pxor %xmm4,%xmm1 - pxor %xmm0,%xmm4 psrlq $5,%xmm0 pxor %xmm4,%xmm0 psrlq $1,%xmm0 - pxor %xmm1,%xmm0 - movdqa %xmm0,%xmm5 - movdqa %xmm0,%xmm1 - pshufd $78,%xmm0,%xmm3 - pxor %xmm0,%xmm3 -.byte 102,15,58,68,194,0 -.byte 102,15,58,68,202,17 -.byte 102,15,58,68,222,0 - pxor %xmm0,%xmm3 - pxor %xmm1,%xmm3 - - movdqa %xmm3,%xmm4 - psrldq $8,%xmm3 - pslldq $8,%xmm4 - pxor %xmm3,%xmm1 - pxor %xmm4,%xmm0 - - movdqa %xmm0,%xmm4 - movdqa %xmm0,%xmm3 - psllq $5,%xmm0 - pxor %xmm0,%xmm3 - psllq $1,%xmm0 - pxor %xmm3,%xmm0 - psllq $57,%xmm0 - movdqa %xmm0,%xmm3 - pslldq $8,%xmm0 - psrldq $8,%xmm3 pxor %xmm4,%xmm0 - pxor %xmm3,%xmm1 - - - movdqa %xmm0,%xmm4 + pxor %xmm1,%xmm4 psrlq $1,%xmm0 - pxor %xmm4,%xmm1 - pxor %xmm0,%xmm4 - psrlq $5,%xmm0 pxor %xmm4,%xmm0 - psrlq $1,%xmm0 - pxor %xmm1,%xmm0 - pshufd $78,%xmm5,%xmm3 - pshufd $78,%xmm0,%xmm4 - pxor %xmm5,%xmm3 - movdqu %xmm5,48(%rdi) - pxor %xmm0,%xmm4 - movdqu %xmm0,64(%rdi) -.byte 102,15,58,15,227,8 - movdqu %xmm4,80(%rdi) + movdqu %xmm2,(%rdi) + movdqu %xmm0,16(%rdi) .byte 0xf3,0xc3 .size gcm_init_clmul,.-gcm_init_clmul .globl gcm_gmult_clmul .type gcm_gmult_clmul,@function .align 16 gcm_gmult_clmul: -.L_gmult_clmul: movdqu (%rdi),%xmm0 movdqa .Lbswap_mask(%rip),%xmm5 movdqu (%rsi),%xmm2 - movdqu 32(%rsi),%xmm4 .byte 102,15,56,0,197 movdqa %xmm0,%xmm1 pshufd $78,%xmm0,%xmm3 + pshufd $78,%xmm2,%xmm4 pxor %xmm0,%xmm3 + pxor %xmm2,%xmm4 .byte 102,15,58,68,194,0 .byte 102,15,58,68,202,17 .byte 102,15,58,68,220,0 @@ -874,358 +783,186 @@ gcm_gmult_clmul: pxor %xmm3,%xmm1 pxor %xmm4,%xmm0 - movdqa %xmm0,%xmm4 movdqa %xmm0,%xmm3 - psllq $5,%xmm0 - pxor %xmm0,%xmm3 psllq $1,%xmm0 pxor %xmm3,%xmm0 + psllq $5,%xmm0 + pxor %xmm3,%xmm0 psllq $57,%xmm0 - movdqa %xmm0,%xmm3 + movdqa %xmm0,%xmm4 pslldq $8,%xmm0 - psrldq $8,%xmm3 - pxor %xmm4,%xmm0 - pxor %xmm3,%xmm1 + psrldq $8,%xmm4 + pxor %xmm3,%xmm0 + pxor %xmm4,%xmm1 movdqa %xmm0,%xmm4 - psrlq $1,%xmm0 - pxor %xmm4,%xmm1 - pxor %xmm0,%xmm4 psrlq $5,%xmm0 pxor %xmm4,%xmm0 psrlq $1,%xmm0 - pxor %xmm1,%xmm0 + pxor %xmm4,%xmm0 + pxor %xmm1,%xmm4 + psrlq $1,%xmm0 + pxor %xmm4,%xmm0 .byte 102,15,56,0,197 movdqu %xmm0,(%rdi) .byte 0xf3,0xc3 .size gcm_gmult_clmul,.-gcm_gmult_clmul .globl gcm_ghash_clmul .type gcm_ghash_clmul,@function -.align 32 +.align 16 gcm_ghash_clmul: -.L_ghash_clmul: movdqa .Lbswap_mask(%rip),%xmm5 - movq $11547335547999543296,%rax movdqu (%rdi),%xmm0 movdqu (%rsi),%xmm2 - movdqu 32(%rsi),%xmm10 .byte 102,15,56,0,197 subq $16,%rcx jz .Lodd_tail - movdqu 16(%rsi),%xmm9 - cmpq $48,%rcx - jb .Lskip4x + movdqu 16(%rsi),%xmm8 - subq $48,%rcx - movdqu 48(%rsi),%xmm14 - movdqu 64(%rsi),%xmm15 - movdqu 48(%rdx),%xmm6 - movdqu 32(%rdx),%xmm11 -.byte 102,15,56,0,245 -.byte 102,68,15,56,0,221 - movdqa %xmm6,%xmm8 - pshufd $78,%xmm6,%xmm7 - pxor %xmm6,%xmm7 -.byte 102,15,58,68,242,0 -.byte 102,68,15,58,68,194,17 -.byte 102,65,15,58,68,250,0 - - movdqa %xmm11,%xmm13 - pshufd $78,%xmm11,%xmm12 - pxor %xmm11,%xmm12 -.byte 102,69,15,58,68,217,0 -.byte 102,69,15,58,68,233,17 - xorps %xmm11,%xmm6 -.byte 102,69,15,58,68,226,16 - xorps %xmm13,%xmm8 - movups 80(%rsi),%xmm10 - xorps %xmm12,%xmm7 - - movdqu 16(%rdx),%xmm11 - movdqu 0(%rdx),%xmm3 -.byte 102,68,15,56,0,221 + movdqu (%rdx),%xmm3 + movdqu 16(%rdx),%xmm6 .byte 102,15,56,0,221 - movdqa %xmm11,%xmm13 - pshufd $78,%xmm11,%xmm12 - pxor %xmm3,%xmm0 - pxor %xmm11,%xmm12 -.byte 102,69,15,58,68,222,0 - movdqa %xmm0,%xmm1 - pshufd $78,%xmm0,%xmm3 - pxor %xmm0,%xmm3 -.byte 102,69,15,58,68,238,17 - xorps %xmm11,%xmm6 -.byte 102,69,15,58,68,226,0 - xorps %xmm13,%xmm8 - - leaq 64(%rdx),%rdx - subq $64,%rcx - jc .Ltail4x - - jmp .Lmod4_loop -.align 32 -.Lmod4_loop: -.byte 102,65,15,58,68,199,0 - xorps %xmm12,%xmm7 - movdqu 48(%rdx),%xmm11 -.byte 102,68,15,56,0,221 -.byte 102,65,15,58,68,207,17 - xorps %xmm6,%xmm0 - movdqu 32(%rdx),%xmm6 - movdqa %xmm11,%xmm13 - pshufd $78,%xmm11,%xmm12 -.byte 102,65,15,58,68,218,16 - xorps %xmm8,%xmm1 - pxor %xmm11,%xmm12 .byte 102,15,56,0,245 - movups 32(%rsi),%xmm10 -.byte 102,68,15,58,68,218,0 - xorps %xmm7,%xmm3 - movdqa %xmm6,%xmm8 - pshufd $78,%xmm6,%xmm7 - - pxor %xmm0,%xmm3 - pxor %xmm6,%xmm7 - pxor %xmm1,%xmm3 - movdqa %xmm3,%xmm4 - pslldq $8,%xmm3 -.byte 102,68,15,58,68,234,17 - psrldq $8,%xmm4 pxor %xmm3,%xmm0 - movdqa .L7_mask(%rip),%xmm3 - pxor %xmm4,%xmm1 -.byte 102,72,15,110,224 + movdqa %xmm6,%xmm7 + pshufd $78,%xmm6,%xmm3 + pshufd $78,%xmm2,%xmm4 + pxor %xmm6,%xmm3 + pxor %xmm2,%xmm4 +.byte 102,15,58,68,242,0 +.byte 102,15,58,68,250,17 +.byte 102,15,58,68,220,0 + pxor %xmm6,%xmm3 + pxor %xmm7,%xmm3 - pand %xmm0,%xmm3 -.byte 102,15,56,0,227 -.byte 102,69,15,58,68,226,0 - pxor %xmm0,%xmm4 - psllq $57,%xmm4 - movdqa %xmm4,%xmm3 - pslldq $8,%xmm4 -.byte 102,65,15,58,68,241,0 + movdqa %xmm3,%xmm4 psrldq $8,%xmm3 - pxor %xmm4,%xmm0 - pxor %xmm3,%xmm1 - movdqu 0(%rdx),%xmm3 - - movdqa %xmm0,%xmm4 - psrlq $1,%xmm0 -.byte 102,69,15,58,68,193,17 - xorps %xmm11,%xmm6 - movdqu 16(%rdx),%xmm11 -.byte 102,68,15,56,0,221 -.byte 102,65,15,58,68,250,16 - xorps %xmm13,%xmm8 - movups 80(%rsi),%xmm10 -.byte 102,15,56,0,221 - pxor %xmm4,%xmm1 - pxor %xmm0,%xmm4 - psrlq $5,%xmm0 - - movdqa %xmm11,%xmm13 - pxor %xmm12,%xmm7 - pshufd $78,%xmm11,%xmm12 - pxor %xmm11,%xmm12 -.byte 102,69,15,58,68,222,0 - pxor %xmm4,%xmm0 - pxor %xmm3,%xmm1 - psrlq $1,%xmm0 -.byte 102,69,15,58,68,238,17 - xorps %xmm11,%xmm6 - pxor %xmm1,%xmm0 - -.byte 102,69,15,58,68,226,0 - xorps %xmm13,%xmm8 - + pslldq $8,%xmm4 + pxor %xmm3,%xmm7 + pxor %xmm4,%xmm6 movdqa %xmm0,%xmm1 pshufd $78,%xmm0,%xmm3 + pshufd $78,%xmm8,%xmm4 pxor %xmm0,%xmm3 + pxor %xmm8,%xmm4 - leaq 64(%rdx),%rdx - subq $64,%rcx - jnc .Lmod4_loop - -.Ltail4x: -.byte 102,65,15,58,68,199,0 - xorps %xmm12,%xmm7 -.byte 102,65,15,58,68,207,17 - xorps %xmm6,%xmm0 -.byte 102,65,15,58,68,218,16 - xorps %xmm8,%xmm1 - pxor %xmm0,%xmm1 - pxor %xmm7,%xmm3 + leaq 32(%rdx),%rdx + subq $32,%rcx + jbe .Leven_tail +.Lmod_loop: +.byte 102,65,15,58,68,192,0 +.byte 102,65,15,58,68,200,17 +.byte 102,15,58,68,220,0 + pxor %xmm0,%xmm3 pxor %xmm1,%xmm3 - pxor %xmm0,%xmm1 movdqa %xmm3,%xmm4 psrldq $8,%xmm3 pslldq $8,%xmm4 pxor %xmm3,%xmm1 pxor %xmm4,%xmm0 - - movdqa %xmm0,%xmm4 - movdqa %xmm0,%xmm3 - psllq $5,%xmm0 - pxor %xmm0,%xmm3 - psllq $1,%xmm0 - pxor %xmm3,%xmm0 - psllq $57,%xmm0 - movdqa %xmm0,%xmm3 - pslldq $8,%xmm0 - psrldq $8,%xmm3 - pxor %xmm4,%xmm0 - pxor %xmm3,%xmm1 - - - movdqa %xmm0,%xmm4 - psrlq $1,%xmm0 - pxor %xmm4,%xmm1 - pxor %xmm0,%xmm4 - psrlq $5,%xmm0 - pxor %xmm4,%xmm0 - psrlq $1,%xmm0 - pxor %xmm1,%xmm0 - addq $64,%rcx - jz .Ldone - movdqu 32(%rsi),%xmm10 - subq $16,%rcx - jz .Lodd_tail -.Lskip4x: - - - - - movdqu (%rdx),%xmm3 + pxor %xmm6,%xmm0 + pxor %xmm7,%xmm1 + movdqu 16(%rdx),%xmm6 .byte 102,15,56,0,221 .byte 102,15,56,0,245 - pxor %xmm3,%xmm0 - - movdqa %xmm6,%xmm8 - pshufd $78,%xmm6,%xmm3 - pxor %xmm6,%xmm3 -.byte 102,15,58,68,242,0 -.byte 102,68,15,58,68,194,17 -.byte 102,65,15,58,68,218,0 - - leaq 32(%rdx),%rdx - subq $32,%rcx - jbe .Leven_tail - jmp .Lmod_loop -.align 32 -.Lmod_loop: - movdqa %xmm0,%xmm1 - pshufd $78,%xmm0,%xmm4 - pxor %xmm0,%xmm4 - -.byte 102,65,15,58,68,193,0 -.byte 102,65,15,58,68,201,17 -.byte 102,65,15,58,68,226,16 - - pxor %xmm6,%xmm0 - pxor %xmm8,%xmm1 - movdqu (%rdx),%xmm8 -.byte 102,68,15,56,0,197 - movdqu 16(%rdx),%xmm6 - - pxor %xmm0,%xmm3 - pxor %xmm1,%xmm3 - pxor %xmm8,%xmm1 - pxor %xmm3,%xmm4 -.byte 102,15,56,0,245 - movdqa %xmm4,%xmm3 - psrldq $8,%xmm3 - pslldq $8,%xmm4 + movdqa %xmm6,%xmm7 + pshufd $78,%xmm6,%xmm9 + pshufd $78,%xmm2,%xmm10 + pxor %xmm6,%xmm9 + pxor %xmm2,%xmm10 pxor %xmm3,%xmm1 - pxor %xmm4,%xmm0 - movdqa %xmm6,%xmm8 - - movdqa %xmm0,%xmm4 movdqa %xmm0,%xmm3 - psllq $5,%xmm0 -.byte 102,15,58,68,242,0 - pxor %xmm0,%xmm3 psllq $1,%xmm0 pxor %xmm3,%xmm0 + psllq $5,%xmm0 + pxor %xmm3,%xmm0 +.byte 102,15,58,68,242,0 psllq $57,%xmm0 - movdqa %xmm0,%xmm3 + movdqa %xmm0,%xmm4 pslldq $8,%xmm0 - psrldq $8,%xmm3 - pxor %xmm4,%xmm0 - pxor %xmm3,%xmm1 - pshufd $78,%xmm8,%xmm3 - pxor %xmm8,%xmm3 + psrldq $8,%xmm4 + pxor %xmm3,%xmm0 + pxor %xmm4,%xmm1 -.byte 102,68,15,58,68,194,17 +.byte 102,15,58,68,250,17 movdqa %xmm0,%xmm4 - psrlq $1,%xmm0 - pxor %xmm4,%xmm1 - pxor %xmm0,%xmm4 psrlq $5,%xmm0 pxor %xmm4,%xmm0 psrlq $1,%xmm0 -.byte 102,65,15,58,68,218,0 - pxor %xmm1,%xmm0 + pxor %xmm4,%xmm0 + pxor %xmm1,%xmm4 + psrlq $1,%xmm0 + pxor %xmm4,%xmm0 + +.byte 102,69,15,58,68,202,0 + movdqa %xmm0,%xmm1 + pshufd $78,%xmm0,%xmm3 + pshufd $78,%xmm8,%xmm4 + pxor %xmm0,%xmm3 + pxor %xmm8,%xmm4 + + pxor %xmm6,%xmm9 + pxor %xmm7,%xmm9 + movdqa %xmm9,%xmm10 + psrldq $8,%xmm9 + pslldq $8,%xmm10 + pxor %xmm9,%xmm7 + pxor %xmm10,%xmm6 leaq 32(%rdx),%rdx subq $32,%rcx ja .Lmod_loop .Leven_tail: - movdqa %xmm0,%xmm1 - pshufd $78,%xmm0,%xmm4 - pxor %xmm0,%xmm4 - -.byte 102,65,15,58,68,193,0 -.byte 102,65,15,58,68,201,17 -.byte 102,65,15,58,68,226,16 - - pxor %xmm6,%xmm0 - pxor %xmm8,%xmm1 +.byte 102,65,15,58,68,192,0 +.byte 102,65,15,58,68,200,17 +.byte 102,15,58,68,220,0 pxor %xmm0,%xmm3 pxor %xmm1,%xmm3 - pxor %xmm3,%xmm4 - movdqa %xmm4,%xmm3 + + movdqa %xmm3,%xmm4 psrldq $8,%xmm3 pslldq $8,%xmm4 pxor %xmm3,%xmm1 pxor %xmm4,%xmm0 + pxor %xmm6,%xmm0 + pxor %xmm7,%xmm1 - movdqa %xmm0,%xmm4 movdqa %xmm0,%xmm3 - psllq $5,%xmm0 - pxor %xmm0,%xmm3 psllq $1,%xmm0 pxor %xmm3,%xmm0 + psllq $5,%xmm0 + pxor %xmm3,%xmm0 psllq $57,%xmm0 - movdqa %xmm0,%xmm3 + movdqa %xmm0,%xmm4 pslldq $8,%xmm0 - psrldq $8,%xmm3 - pxor %xmm4,%xmm0 - pxor %xmm3,%xmm1 + psrldq $8,%xmm4 + pxor %xmm3,%xmm0 + pxor %xmm4,%xmm1 movdqa %xmm0,%xmm4 - psrlq $1,%xmm0 - pxor %xmm4,%xmm1 - pxor %xmm0,%xmm4 psrlq $5,%xmm0 pxor %xmm4,%xmm0 psrlq $1,%xmm0 - pxor %xmm1,%xmm0 + pxor %xmm4,%xmm0 + pxor %xmm1,%xmm4 + psrlq $1,%xmm0 + pxor %xmm4,%xmm0 testq %rcx,%rcx jnz .Ldone @@ -1235,10 +972,12 @@ gcm_ghash_clmul: pxor %xmm3,%xmm0 movdqa %xmm0,%xmm1 pshufd $78,%xmm0,%xmm3 + pshufd $78,%xmm2,%xmm4 pxor %xmm0,%xmm3 + pxor %xmm2,%xmm4 .byte 102,15,58,68,194,0 .byte 102,15,58,68,202,17 -.byte 102,65,15,58,68,218,0 +.byte 102,15,58,68,220,0 pxor %xmm0,%xmm3 pxor %xmm1,%xmm3 @@ -1248,60 +987,38 @@ gcm_ghash_clmul: pxor %xmm3,%xmm1 pxor %xmm4,%xmm0 - movdqa %xmm0,%xmm4 movdqa %xmm0,%xmm3 - psllq $5,%xmm0 - pxor %xmm0,%xmm3 psllq $1,%xmm0 pxor %xmm3,%xmm0 + psllq $5,%xmm0 + pxor %xmm3,%xmm0 psllq $57,%xmm0 - movdqa %xmm0,%xmm3 + movdqa %xmm0,%xmm4 pslldq $8,%xmm0 - psrldq $8,%xmm3 - pxor %xmm4,%xmm0 - pxor %xmm3,%xmm1 + psrldq $8,%xmm4 + pxor %xmm3,%xmm0 + pxor %xmm4,%xmm1 movdqa %xmm0,%xmm4 - psrlq $1,%xmm0 - pxor %xmm4,%xmm1 - pxor %xmm0,%xmm4 psrlq $5,%xmm0 pxor %xmm4,%xmm0 psrlq $1,%xmm0 - pxor %xmm1,%xmm0 + pxor %xmm4,%xmm0 + pxor %xmm1,%xmm4 + psrlq $1,%xmm0 + pxor %xmm4,%xmm0 .Ldone: .byte 102,15,56,0,197 movdqu %xmm0,(%rdi) .byte 0xf3,0xc3 +.LSEH_end_gcm_ghash_clmul: .size gcm_ghash_clmul,.-gcm_ghash_clmul -.globl gcm_init_avx -.type gcm_init_avx,@function -.align 32 -gcm_init_avx: - jmp .L_init_clmul -.size gcm_init_avx,.-gcm_init_avx -.globl gcm_gmult_avx -.type gcm_gmult_avx,@function -.align 32 -gcm_gmult_avx: - jmp .L_gmult_clmul -.size gcm_gmult_avx,.-gcm_gmult_avx -.globl gcm_ghash_avx -.type gcm_ghash_avx,@function -.align 32 -gcm_ghash_avx: - jmp .L_ghash_clmul -.size gcm_ghash_avx,.-gcm_ghash_avx .align 64 .Lbswap_mask: .byte 15,14,13,12,11,10,9,8,7,6,5,4,3,2,1,0 .L0x1c2_polynomial: .byte 1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0xc2 -.L7_mask: -.long 7,0,7,0 -.L7_mask_poly: -.long 7,0,450,0 .align 64 .type .Lrem_4bit,@object .Lrem_4bit: diff --git a/lib/accelerated/x86/elf/sha1-ssse3-x86_64.s b/lib/accelerated/x86/elf/sha1-ssse3-x86_64.s index 583bd46a86..3bb65a54e9 100644 --- a/lib/accelerated/x86/elf/sha1-ssse3-x86_64.s +++ b/lib/accelerated/x86/elf/sha1-ssse3-x86_64.s @@ -46,7 +46,6 @@ sha1_block_data_order: movl _gnutls_x86_cpuid_s+0(%rip),%r9d movl _gnutls_x86_cpuid_s+4(%rip),%r8d - movl _gnutls_x86_cpuid_s+8(%rip),%r10d testl $512,%r8d jz .Lialu jmp _ssse3_shortcut @@ -1342,7 +1341,7 @@ _ssse3_shortcut: shlq $6,%r10 addq %r9,%r10 - leaq K_XX_XX+64(%rip),%r11 + leaq K_XX_XX(%rip),%r11 movl 0(%r8),%eax movl 4(%r8),%ebx @@ -1350,12 +1349,9 @@ _ssse3_shortcut: movl 12(%r8),%edx movl %ebx,%esi movl 16(%r8),%ebp - movl %ecx,%edi - xorl %edx,%edi - andl %edi,%esi movdqa 64(%r11),%xmm6 - movdqa -64(%r11),%xmm9 + movdqa 0(%r11),%xmm9 movdqu 0(%r9),%xmm0 movdqu 16(%r9),%xmm1 movdqu 32(%r9),%xmm2 @@ -1378,881 +1374,903 @@ _ssse3_shortcut: .align 16 .Loop_ssse3: movdqa %xmm1,%xmm4 - rorl $2,%ebx - xorl %edx,%esi + addl 0(%rsp),%ebp + xorl %edx,%ecx movdqa %xmm3,%xmm8 .byte 102,15,58,15,224,8 movl %eax,%edi - addl 0(%rsp),%ebp - paddd %xmm3,%xmm9 - xorl %ecx,%ebx roll $5,%eax + paddd %xmm3,%xmm9 + andl %ecx,%esi + xorl %edx,%ecx psrldq $4,%xmm8 - addl %esi,%ebp - andl %ebx,%edi - pxor %xmm0,%xmm4 - xorl %ecx,%ebx + xorl %edx,%esi addl %eax,%ebp + pxor %xmm0,%xmm4 + rorl $2,%ebx + addl %esi,%ebp pxor %xmm2,%xmm8 - rorl $7,%eax - xorl %ecx,%edi - movl %ebp,%esi addl 4(%rsp),%edx - pxor %xmm8,%xmm4 - xorl %ebx,%eax + xorl %ecx,%ebx + movl %ebp,%esi roll $5,%ebp + pxor %xmm8,%xmm4 + andl %ebx,%edi + xorl %ecx,%ebx movdqa %xmm9,48(%rsp) - addl %edi,%edx - andl %eax,%esi + xorl %ecx,%edi + addl %ebp,%edx movdqa %xmm4,%xmm10 movdqa %xmm4,%xmm8 + rorl $7,%eax + addl %edi,%edx + addl 8(%rsp),%ecx xorl %ebx,%eax - addl %ebp,%edx - rorl $7,%ebp - xorl %ebx,%esi pslldq $12,%xmm10 paddd %xmm4,%xmm4 movl %edx,%edi - addl 8(%rsp),%ecx - xorl %eax,%ebp roll $5,%edx + andl %eax,%esi + xorl %ebx,%eax psrld $31,%xmm8 - addl %esi,%ecx - andl %ebp,%edi - movdqa %xmm10,%xmm9 - xorl %eax,%ebp + xorl %ebx,%esi addl %edx,%ecx + movdqa %xmm10,%xmm9 + rorl $7,%ebp + addl %esi,%ecx psrld $30,%xmm10 por %xmm8,%xmm4 - rorl $7,%edx - xorl %eax,%edi - movl %ecx,%esi addl 12(%rsp),%ebx + xorl %eax,%ebp + movl %ecx,%esi + roll $5,%ecx pslld $2,%xmm9 pxor %xmm10,%xmm4 - xorl %ebp,%edx - roll $5,%ecx - movdqa -64(%r11),%xmm10 - addl %edi,%ebx - andl %edx,%esi - pxor %xmm9,%xmm4 - xorl %ebp,%edx + andl %ebp,%edi + xorl %eax,%ebp + movdqa 0(%r11),%xmm10 + xorl %eax,%edi addl %ecx,%ebx + pxor %xmm9,%xmm4 + rorl $7,%edx + addl %edi,%ebx movdqa %xmm2,%xmm5 - rorl $7,%ecx - xorl %ebp,%esi + addl 16(%rsp),%eax + xorl %ebp,%edx movdqa %xmm4,%xmm9 .byte 102,15,58,15,233,8 movl %ebx,%edi - addl 16(%rsp),%eax - paddd %xmm4,%xmm10 - xorl %edx,%ecx roll $5,%ebx + paddd %xmm4,%xmm10 + andl %edx,%esi + xorl %ebp,%edx psrldq $4,%xmm9 - addl %esi,%eax - andl %ecx,%edi - pxor %xmm1,%xmm5 - xorl %edx,%ecx + xorl %ebp,%esi addl %ebx,%eax + pxor %xmm1,%xmm5 + rorl $7,%ecx + addl %esi,%eax pxor %xmm3,%xmm9 - rorl $7,%ebx - xorl %edx,%edi - movl %eax,%esi addl 20(%rsp),%ebp - pxor %xmm9,%xmm5 - xorl %ecx,%ebx + xorl %edx,%ecx + movl %eax,%esi roll $5,%eax + pxor %xmm9,%xmm5 + andl %ecx,%edi + xorl %edx,%ecx movdqa %xmm10,0(%rsp) - addl %edi,%ebp - andl %ebx,%esi + xorl %edx,%edi + addl %eax,%ebp movdqa %xmm5,%xmm8 movdqa %xmm5,%xmm9 + rorl $7,%ebx + addl %edi,%ebp + addl 24(%rsp),%edx xorl %ecx,%ebx - addl %eax,%ebp - rorl $7,%eax - xorl %ecx,%esi pslldq $12,%xmm8 paddd %xmm5,%xmm5 movl %ebp,%edi - addl 24(%rsp),%edx - xorl %ebx,%eax roll $5,%ebp + andl %ebx,%esi + xorl %ecx,%ebx psrld $31,%xmm9 - addl %esi,%edx - andl %eax,%edi - movdqa %xmm8,%xmm10 - xorl %ebx,%eax + xorl %ecx,%esi addl %ebp,%edx + movdqa %xmm8,%xmm10 + rorl $7,%eax + addl %esi,%edx psrld $30,%xmm8 por %xmm9,%xmm5 - rorl $7,%ebp - xorl %ebx,%edi - movl %edx,%esi addl 28(%rsp),%ecx + xorl %ebx,%eax + movl %edx,%esi + roll $5,%edx pslld $2,%xmm10 pxor %xmm8,%xmm5 - xorl %eax,%ebp - roll $5,%edx - movdqa -32(%r11),%xmm8 - addl %edi,%ecx - andl %ebp,%esi - pxor %xmm10,%xmm5 - xorl %eax,%ebp + andl %eax,%edi + xorl %ebx,%eax + movdqa 16(%r11),%xmm8 + xorl %ebx,%edi addl %edx,%ecx + pxor %xmm10,%xmm5 + rorl $7,%ebp + addl %edi,%ecx movdqa %xmm3,%xmm6 - rorl $7,%edx - xorl %eax,%esi + addl 32(%rsp),%ebx + xorl %eax,%ebp movdqa %xmm5,%xmm10 .byte 102,15,58,15,242,8 movl %ecx,%edi - addl 32(%rsp),%ebx - paddd %xmm5,%xmm8 - xorl %ebp,%edx roll $5,%ecx + paddd %xmm5,%xmm8 + andl %ebp,%esi + xorl %eax,%ebp psrldq $4,%xmm10 - addl %esi,%ebx - andl %edx,%edi - pxor %xmm2,%xmm6 - xorl %ebp,%edx + xorl %eax,%esi addl %ecx,%ebx + pxor %xmm2,%xmm6 + rorl $7,%edx + addl %esi,%ebx pxor %xmm4,%xmm10 - rorl $7,%ecx - xorl %ebp,%edi - movl %ebx,%esi addl 36(%rsp),%eax - pxor %xmm10,%xmm6 - xorl %edx,%ecx + xorl %ebp,%edx + movl %ebx,%esi roll $5,%ebx + pxor %xmm10,%xmm6 + andl %edx,%edi + xorl %ebp,%edx movdqa %xmm8,16(%rsp) - addl %edi,%eax - andl %ecx,%esi + xorl %ebp,%edi + addl %ebx,%eax movdqa %xmm6,%xmm9 movdqa %xmm6,%xmm10 + rorl $7,%ecx + addl %edi,%eax + addl 40(%rsp),%ebp xorl %edx,%ecx - addl %ebx,%eax - rorl $7,%ebx - xorl %edx,%esi pslldq $12,%xmm9 paddd %xmm6,%xmm6 movl %eax,%edi - addl 40(%rsp),%ebp - xorl %ecx,%ebx roll $5,%eax + andl %ecx,%esi + xorl %edx,%ecx psrld $31,%xmm10 - addl %esi,%ebp - andl %ebx,%edi - movdqa %xmm9,%xmm8 - xorl %ecx,%ebx + xorl %edx,%esi addl %eax,%ebp + movdqa %xmm9,%xmm8 + rorl $7,%ebx + addl %esi,%ebp psrld $30,%xmm9 por %xmm10,%xmm6 - rorl $7,%eax - xorl %ecx,%edi - movl %ebp,%esi addl 44(%rsp),%edx + xorl %ecx,%ebx + movl %ebp,%esi + roll $5,%ebp pslld $2,%xmm8 pxor %xmm9,%xmm6 - xorl %ebx,%eax - roll $5,%ebp - movdqa -32(%r11),%xmm9 - addl %edi,%edx - andl %eax,%esi - pxor %xmm8,%xmm6 - xorl %ebx,%eax + andl %ebx,%edi + xorl %ecx,%ebx + movdqa 16(%r11),%xmm9 + xorl %ecx,%edi addl %ebp,%edx + pxor %xmm8,%xmm6 + rorl $7,%eax + addl %edi,%edx movdqa %xmm4,%xmm7 - rorl $7,%ebp - xorl %ebx,%esi + addl 48(%rsp),%ecx + xorl %ebx,%eax movdqa %xmm6,%xmm8 .byte 102,15,58,15,251,8 movl %edx,%edi - addl 48(%rsp),%ecx - paddd %xmm6,%xmm9 - xorl %eax,%ebp roll $5,%edx + paddd %xmm6,%xmm9 + andl %eax,%esi + xorl %ebx,%eax psrldq $4,%xmm8 - addl %esi,%ecx - andl %ebp,%edi - pxor %xmm3,%xmm7 - xorl %eax,%ebp + xorl %ebx,%esi addl %edx,%ecx + pxor %xmm3,%xmm7 + rorl $7,%ebp + addl %esi,%ecx pxor %xmm5,%xmm8 - rorl $7,%edx - xorl %eax,%edi - movl %ecx,%esi addl 52(%rsp),%ebx - pxor %xmm8,%xmm7 - xorl %ebp,%edx + xorl %eax,%ebp + movl %ecx,%esi roll $5,%ecx + pxor %xmm8,%xmm7 + andl %ebp,%edi + xorl %eax,%ebp movdqa %xmm9,32(%rsp) - addl %edi,%ebx - andl %edx,%esi + xorl %eax,%edi + addl %ecx,%ebx movdqa %xmm7,%xmm10 movdqa %xmm7,%xmm8 + rorl $7,%edx + addl %edi,%ebx + addl 56(%rsp),%eax xorl %ebp,%edx - addl %ecx,%ebx - rorl $7,%ecx - xorl %ebp,%esi pslldq $12,%xmm10 paddd %xmm7,%xmm7 movl %ebx,%edi - addl 56(%rsp),%eax - xorl %edx,%ecx roll $5,%ebx + andl %edx,%esi + xorl %ebp,%edx psrld $31,%xmm8 - addl %esi,%eax - andl %ecx,%edi - movdqa %xmm10,%xmm9 - xorl %edx,%ecx + xorl %ebp,%esi addl %ebx,%eax + movdqa %xmm10,%xmm9 + rorl $7,%ecx + addl %esi,%eax psrld $30,%xmm10 por %xmm8,%xmm7 - rorl $7,%ebx - xorl %edx,%edi - movl %eax,%esi addl 60(%rsp),%ebp - pslld $2,%xmm9 - pxor %xmm10,%xmm7 - xorl %ecx,%ebx - roll $5,%eax - movdqa -32(%r11),%xmm10 - addl %edi,%ebp - andl %ebx,%esi - pxor %xmm9,%xmm7 - xorl %ecx,%ebx + xorl %edx,%ecx + movl %eax,%esi + roll $5,%eax + pslld $2,%xmm9 + pxor %xmm10,%xmm7 + andl %ecx,%edi + xorl %edx,%ecx + movdqa 16(%r11),%xmm10 + xorl %edx,%edi addl %eax,%ebp + pxor %xmm9,%xmm7 + rorl $7,%ebx + addl %edi,%ebp movdqa %xmm7,%xmm9 - rorl $7,%eax + addl 0(%rsp),%edx pxor %xmm4,%xmm0 .byte 102,68,15,58,15,206,8 - xorl %ecx,%esi + xorl %ecx,%ebx movl %ebp,%edi - addl 0(%rsp),%edx - pxor %xmm1,%xmm0 - xorl %ebx,%eax roll $5,%ebp + pxor %xmm1,%xmm0 + andl %ebx,%esi + xorl %ecx,%ebx movdqa %xmm10,%xmm8 paddd %xmm7,%xmm10 - addl %esi,%edx - andl %eax,%edi + xorl %ecx,%esi + addl %ebp,%edx pxor %xmm9,%xmm0 + rorl $7,%eax + addl %esi,%edx + addl 4(%rsp),%ecx xorl %ebx,%eax - addl %ebp,%edx - rorl $7,%ebp - xorl %ebx,%edi movdqa %xmm0,%xmm9 movdqa %xmm10,48(%rsp) movl %edx,%esi - addl 4(%rsp),%ecx - xorl %eax,%ebp roll $5,%edx + andl %eax,%edi + xorl %ebx,%eax pslld $2,%xmm0 - addl %edi,%ecx - andl %ebp,%esi + xorl %ebx,%edi + addl %edx,%ecx psrld $30,%xmm9 + rorl $7,%ebp + addl %edi,%ecx + addl 8(%rsp),%ebx xorl %eax,%ebp - addl %edx,%ecx - rorl $7,%edx - xorl %eax,%esi movl %ecx,%edi - addl 8(%rsp),%ebx - por %xmm9,%xmm0 - xorl %ebp,%edx roll $5,%ecx + por %xmm9,%xmm0 + andl %ebp,%esi + xorl %eax,%ebp movdqa %xmm0,%xmm10 - addl %esi,%ebx - andl %edx,%edi - xorl %ebp,%edx + xorl %eax,%esi addl %ecx,%ebx + rorl $7,%edx + addl %esi,%ebx addl 12(%rsp),%eax - xorl %ebp,%edi + xorl %ebp,%edx movl %ebx,%esi roll $5,%ebx - addl %edi,%eax - xorl %edx,%esi - rorl $7,%ecx + andl %edx,%edi + xorl %ebp,%edx + xorl %ebp,%edi addl %ebx,%eax + rorl $7,%ecx + addl %edi,%eax addl 16(%rsp),%ebp pxor %xmm5,%xmm1 .byte 102,68,15,58,15,215,8 - xorl %ecx,%esi + xorl %edx,%esi movl %eax,%edi roll $5,%eax pxor %xmm2,%xmm1 - addl %esi,%ebp - xorl %ecx,%edi + xorl %ecx,%esi + addl %eax,%ebp movdqa %xmm8,%xmm9 paddd %xmm0,%xmm8 rorl $7,%ebx - addl %eax,%ebp + addl %esi,%ebp pxor %xmm10,%xmm1 addl 20(%rsp),%edx - xorl %ebx,%edi + xorl %ecx,%edi movl %ebp,%esi roll $5,%ebp movdqa %xmm1,%xmm10 movdqa %xmm8,0(%rsp) - addl %edi,%edx - xorl %ebx,%esi - rorl $7,%eax + xorl %ebx,%edi addl %ebp,%edx + rorl $7,%eax + addl %edi,%edx pslld $2,%xmm1 addl 24(%rsp),%ecx - xorl %eax,%esi + xorl %ebx,%esi psrld $30,%xmm10 movl %edx,%edi roll $5,%edx - addl %esi,%ecx - xorl %eax,%edi - rorl $7,%ebp + xorl %eax,%esi addl %edx,%ecx + rorl $7,%ebp + addl %esi,%ecx por %xmm10,%xmm1 addl 28(%rsp),%ebx - xorl %ebp,%edi + xorl %eax,%edi movdqa %xmm1,%xmm8 movl %ecx,%esi roll $5,%ecx - addl %edi,%ebx - xorl %ebp,%esi - rorl $7,%edx + xorl %ebp,%edi addl %ecx,%ebx + rorl $7,%edx + addl %edi,%ebx addl 32(%rsp),%eax pxor %xmm6,%xmm2 .byte 102,68,15,58,15,192,8 - xorl %edx,%esi + xorl %ebp,%esi movl %ebx,%edi roll $5,%ebx pxor %xmm3,%xmm2 - addl %esi,%eax - xorl %edx,%edi - movdqa 0(%r11),%xmm10 + xorl %edx,%esi + addl %ebx,%eax + movdqa 32(%r11),%xmm10 paddd %xmm1,%xmm9 rorl $7,%ecx - addl %ebx,%eax + addl %esi,%eax pxor %xmm8,%xmm2 addl 36(%rsp),%ebp - xorl %ecx,%edi + xorl %edx,%edi movl %eax,%esi roll $5,%eax movdqa %xmm2,%xmm8 movdqa %xmm9,16(%rsp) - addl %edi,%ebp - xorl %ecx,%esi - rorl $7,%ebx + xorl %ecx,%edi addl %eax,%ebp + rorl $7,%ebx + addl %edi,%ebp pslld $2,%xmm2 addl 40(%rsp),%edx - xorl %ebx,%esi + xorl %ecx,%esi psrld $30,%xmm8 movl %ebp,%edi roll $5,%ebp - addl %esi,%edx - xorl %ebx,%edi - rorl $7,%eax + xorl %ebx,%esi addl %ebp,%edx + rorl $7,%eax + addl %esi,%edx por %xmm8,%xmm2 addl 44(%rsp),%ecx - xorl %eax,%edi + xorl %ebx,%edi movdqa %xmm2,%xmm9 movl %edx,%esi roll $5,%edx - addl %edi,%ecx - xorl %eax,%esi - rorl $7,%ebp + xorl %eax,%edi addl %edx,%ecx + rorl $7,%ebp + addl %edi,%ecx addl 48(%rsp),%ebx pxor %xmm7,%xmm3 .byte 102,68,15,58,15,201,8 - xorl %ebp,%esi + xorl %eax,%esi movl %ecx,%edi roll $5,%ecx pxor %xmm4,%xmm3 - addl %esi,%ebx - xorl %ebp,%edi + xorl %ebp,%esi + addl %ecx,%ebx movdqa %xmm10,%xmm8 paddd %xmm2,%xmm10 rorl $7,%edx - addl %ecx,%ebx + addl %esi,%ebx pxor %xmm9,%xmm3 addl 52(%rsp),%eax - xorl %edx,%edi + xorl %ebp,%edi movl %ebx,%esi roll $5,%ebx movdqa %xmm3,%xmm9 movdqa %xmm10,32(%rsp) - addl %edi,%eax - xorl %edx,%esi - rorl $7,%ecx + xorl %edx,%edi addl %ebx,%eax + rorl $7,%ecx + addl %edi,%eax pslld $2,%xmm3 addl 56(%rsp),%ebp - xorl %ecx,%esi + xorl %edx,%esi psrld $30,%xmm9 movl %eax,%edi roll $5,%eax - addl %esi,%ebp - xorl %ecx,%edi - rorl $7,%ebx + xorl %ecx,%esi addl %eax,%ebp + rorl $7,%ebx + addl %esi,%ebp por %xmm9,%xmm3 addl 60(%rsp),%edx - xorl %ebx,%edi + xorl %ecx,%edi movdqa %xmm3,%xmm10 movl %ebp,%esi roll $5,%ebp - addl %edi,%edx - xorl %ebx,%esi - rorl $7,%eax + xorl %ebx,%edi addl %ebp,%edx + rorl $7,%eax + addl %edi,%edx addl 0(%rsp),%ecx pxor %xmm0,%xmm4 .byte 102,68,15,58,15,210,8 - xorl %eax,%esi + xorl %ebx,%esi movl %edx,%edi roll $5,%edx pxor %xmm5,%xmm4 - addl %esi,%ecx - xorl %eax,%edi + xorl %eax,%esi + addl %edx,%ecx movdqa %xmm8,%xmm9 paddd %xmm3,%xmm8 rorl $7,%ebp - addl %edx,%ecx + addl %esi,%ecx pxor %xmm10,%xmm4 addl 4(%rsp),%ebx - xorl %ebp,%edi + xorl %eax,%edi movl %ecx,%esi roll $5,%ecx movdqa %xmm4,%xmm10 movdqa %xmm8,48(%rsp) - addl %edi,%ebx - xorl %ebp,%esi - rorl $7,%edx + xorl %ebp,%edi addl %ecx,%ebx + rorl $7,%edx + addl %edi,%ebx pslld $2,%xmm4 addl 8(%rsp),%eax - xorl %edx,%esi + xorl %ebp,%esi psrld $30,%xmm10 movl %ebx,%edi roll $5,%ebx - addl %esi,%eax - xorl %edx,%edi - rorl $7,%ecx + xorl %edx,%esi addl %ebx,%eax + rorl $7,%ecx + addl %esi,%eax por %xmm10,%xmm4 addl 12(%rsp),%ebp - xorl %ecx,%edi + xorl %edx,%edi movdqa %xmm4,%xmm8 movl %eax,%esi roll $5,%eax - addl %edi,%ebp - xorl %ecx,%esi - rorl $7,%ebx + xorl %ecx,%edi addl %eax,%ebp + rorl $7,%ebx + addl %edi,%ebp addl 16(%rsp),%edx pxor %xmm1,%xmm5 .byte 102,68,15,58,15,195,8 - xorl %ebx,%esi + xorl %ecx,%esi movl %ebp,%edi roll $5,%ebp pxor %xmm6,%xmm5 - addl %esi,%edx - xorl %ebx,%edi + xorl %ebx,%esi + addl %ebp,%edx movdqa %xmm9,%xmm10 paddd %xmm4,%xmm9 rorl $7,%eax - addl %ebp,%edx + addl %esi,%edx pxor %xmm8,%xmm5 addl 20(%rsp),%ecx - xorl %eax,%edi + xorl %ebx,%edi movl %edx,%esi roll $5,%edx movdqa %xmm5,%xmm8 movdqa %xmm9,0(%rsp) - addl %edi,%ecx - xorl %eax,%esi - rorl $7,%ebp + xorl %eax,%edi addl %edx,%ecx + rorl $7,%ebp + addl %edi,%ecx pslld $2,%xmm5 addl 24(%rsp),%ebx - xorl %ebp,%esi + xorl %eax,%esi psrld $30,%xmm8 movl %ecx,%edi roll $5,%ecx - addl %esi,%ebx - xorl %ebp,%edi - rorl $7,%edx + xorl %ebp,%esi addl %ecx,%ebx + rorl $7,%edx + addl %esi,%ebx por %xmm8,%xmm5 addl 28(%rsp),%eax + xorl %ebp,%edi movdqa %xmm5,%xmm9 - rorl $7,%ecx movl %ebx,%esi - xorl %edx,%edi roll $5,%ebx - addl %edi,%eax - xorl %ecx,%esi - xorl %edx,%ecx + xorl %edx,%edi addl %ebx,%eax - addl 32(%rsp),%ebp + rorl $7,%ecx + addl %edi,%eax + movl %ecx,%edi pxor %xmm2,%xmm6 .byte 102,68,15,58,15,204,8 - andl %ecx,%esi xorl %edx,%ecx - rorl $7,%ebx + addl 32(%rsp),%ebp + andl %edx,%edi pxor %xmm7,%xmm6 - movl %eax,%edi - xorl %ecx,%esi + andl %ecx,%esi + rorl $7,%ebx movdqa %xmm10,%xmm8 paddd %xmm5,%xmm10 + addl %edi,%ebp + movl %eax,%edi + pxor %xmm9,%xmm6 roll $5,%eax addl %esi,%ebp - pxor %xmm9,%xmm6 - xorl %ebx,%edi - xorl %ecx,%ebx + xorl %edx,%ecx addl %eax,%ebp - addl 36(%rsp),%edx movdqa %xmm6,%xmm9 movdqa %xmm10,16(%rsp) - andl %ebx,%edi + movl %ebx,%esi xorl %ecx,%ebx - rorl $7,%eax - movl %ebp,%esi + addl 36(%rsp),%edx + andl %ecx,%esi pslld $2,%xmm6 - xorl %ebx,%edi + andl %ebx,%edi + rorl $7,%eax + psrld $30,%xmm9 + addl %esi,%edx + movl %ebp,%esi roll $5,%ebp - psrld $30,%xmm9 addl %edi,%edx - xorl %eax,%esi - xorl %ebx,%eax + xorl %ecx,%ebx addl %ebp,%edx - addl 40(%rsp),%ecx - andl %eax,%esi por %xmm9,%xmm6 + movl %eax,%edi xorl %ebx,%eax - rorl $7,%ebp movdqa %xmm6,%xmm10 + addl 40(%rsp),%ecx + andl %ebx,%edi + andl %eax,%esi + rorl $7,%ebp + addl %edi,%ecx movl %edx,%edi - xorl %eax,%esi roll $5,%edx addl %esi,%ecx - xorl %ebp,%edi - xorl %eax,%ebp + xorl %ebx,%eax addl %edx,%ecx + movl %ebp,%esi + xorl %eax,%ebp addl 44(%rsp),%ebx + andl %eax,%esi andl %ebp,%edi - xorl %eax,%ebp rorl $7,%edx + addl %esi,%ebx movl %ecx,%esi - xorl %ebp,%edi roll $5,%ecx addl %edi,%ebx - xorl %edx,%esi - xorl %ebp,%edx + xorl %eax,%ebp addl %ecx,%ebx - addl 48(%rsp),%eax + movl %edx,%edi pxor %xmm3,%xmm7 .byte 102,68,15,58,15,213,8 - andl %edx,%esi xorl %ebp,%edx - rorl $7,%ecx + addl 48(%rsp),%eax + andl %ebp,%edi pxor %xmm0,%xmm7 - movl %ebx,%edi - xorl %edx,%esi - movdqa 32(%r11),%xmm9 + andl %edx,%esi + rorl $7,%ecx + movdqa 48(%r11),%xmm9 paddd %xmm6,%xmm8 + addl %edi,%eax + movl %ebx,%edi + pxor %xmm10,%xmm7 roll $5,%ebx addl %esi,%eax - pxor %xmm10,%xmm7 - xorl %ecx,%edi - xorl %edx,%ecx + xorl %ebp,%edx addl %ebx,%eax - addl 52(%rsp),%ebp movdqa %xmm7,%xmm10 movdqa %xmm8,32(%rsp) - andl %ecx,%edi + movl %ecx,%esi xorl %edx,%ecx + addl 52(%rsp),%ebp + andl %edx,%esi + pslld $2,%xmm7 + andl %ecx,%edi rorl $7,%ebx + psrld $30,%xmm10 + addl %esi,%ebp movl %eax,%esi - pslld $2,%xmm7 - xorl %ecx,%edi roll $5,%eax - psrld $30,%xmm10 addl %edi,%ebp - xorl %ebx,%esi - xorl %ecx,%ebx + xorl %edx,%ecx addl %eax,%ebp - addl 56(%rsp),%edx - andl %ebx,%esi por %xmm10,%xmm7 + movl %ebx,%edi xorl %ecx,%ebx - rorl $7,%eax movdqa %xmm7,%xmm8 + addl 56(%rsp),%edx + andl %ecx,%edi + andl %ebx,%esi + rorl $7,%eax + addl %edi,%edx movl %ebp,%edi - xorl %ebx,%esi roll $5,%ebp addl %esi,%edx - xorl %eax,%edi - xorl %ebx,%eax + xorl %ecx,%ebx addl %ebp,%edx + movl %eax,%esi + xorl %ebx,%eax addl 60(%rsp),%ecx + andl %ebx,%esi andl %eax,%edi - xorl %ebx,%eax rorl $7,%ebp + addl %esi,%ecx movl %edx,%esi - xorl %eax,%edi roll $5,%edx addl %edi,%ecx - xorl %ebp,%esi - xorl %eax,%ebp + xorl %ebx,%eax addl %edx,%ecx - addl 0(%rsp),%ebx + movl %ebp,%edi pxor %xmm4,%xmm0 .byte 102,68,15,58,15,198,8 - andl %ebp,%esi xorl %eax,%ebp - rorl $7,%edx + addl 0(%rsp),%ebx + andl %eax,%edi pxor %xmm1,%xmm0 - movl %ecx,%edi - xorl %ebp,%esi + andl %ebp,%esi + rorl $7,%edx movdqa %xmm9,%xmm10 paddd %xmm7,%xmm9 + addl %edi,%ebx + movl %ecx,%edi + pxor %xmm8,%xmm0 roll $5,%ecx addl %esi,%ebx - pxor %xmm8,%xmm0 - xorl %edx,%edi - xorl %ebp,%edx + xorl %eax,%ebp addl %ecx,%ebx - addl 4(%rsp),%eax movdqa %xmm0,%xmm8 movdqa %xmm9,48(%rsp) - andl %edx,%edi + movl %edx,%esi xorl %ebp,%edx + addl 4(%rsp),%eax + andl %ebp,%esi + pslld $2,%xmm0 + andl %edx,%edi rorl $7,%ecx + psrld $30,%xmm8 + addl %esi,%eax movl %ebx,%esi - pslld $2,%xmm0 - xorl %edx,%edi roll $5,%ebx - psrld $30,%xmm8 addl %edi,%eax - xorl %ecx,%esi - xorl %edx,%ecx + xorl %ebp,%edx addl %ebx,%eax - addl 8(%rsp),%ebp - andl %ecx,%esi por %xmm8,%xmm0 + movl %ecx,%edi xorl %edx,%ecx - rorl $7,%ebx movdqa %xmm0,%xmm9 + addl 8(%rsp),%ebp + andl %edx,%edi + andl %ecx,%esi + rorl $7,%ebx + addl %edi,%ebp movl %eax,%edi - xorl %ecx,%esi roll $5,%eax addl %esi,%ebp - xorl %ebx,%edi - xorl %ecx,%ebx + xorl %edx,%ecx addl %eax,%ebp + movl %ebx,%esi + xorl %ecx,%ebx addl 12(%rsp),%edx + andl %ecx,%esi andl %ebx,%edi - xorl %ecx,%ebx rorl $7,%eax + addl %esi,%edx movl %ebp,%esi - xorl %ebx,%edi roll $5,%ebp addl %edi,%edx - xorl %eax,%esi - xorl %ebx,%eax + xorl %ecx,%ebx addl %ebp,%edx - addl 16(%rsp),%ecx + movl %eax,%edi pxor %xmm5,%xmm1 .byte 102,68,15,58,15,207,8 - andl %eax,%esi xorl %ebx,%eax - rorl $7,%ebp + addl 16(%rsp),%ecx + andl %ebx,%edi pxor %xmm2,%xmm1 - movl %edx,%edi - xorl %eax,%esi + andl %eax,%esi + rorl $7,%ebp movdqa %xmm10,%xmm8 paddd %xmm0,%xmm10 + addl %edi,%ecx + movl %edx,%edi + pxor %xmm9,%xmm1 roll $5,%edx addl %esi,%ecx - pxor %xmm9,%xmm1 - xorl %ebp,%edi - xorl %eax,%ebp + xorl %ebx,%eax addl %edx,%ecx - addl 20(%rsp),%ebx movdqa %xmm1,%xmm9 movdqa %xmm10,0(%rsp) - andl %ebp,%edi + movl %ebp,%esi xorl %eax,%ebp + addl 20(%rsp),%ebx + andl %eax,%esi + pslld $2,%xmm1 + andl %ebp,%edi rorl $7,%edx + psrld $30,%xmm9 + addl %esi,%ebx movl %ecx,%esi - pslld $2,%xmm1 - xorl %ebp,%edi roll $5,%ecx - psrld $30,%xmm9 addl %edi,%ebx - xorl %edx,%esi - xorl %ebp,%edx + xorl %eax,%ebp addl %ecx,%ebx - addl 24(%rsp),%eax - andl %edx,%esi por %xmm9,%xmm1 + movl %edx,%edi xorl %ebp,%edx - rorl $7,%ecx movdqa %xmm1,%xmm10 + addl 24(%rsp),%eax + andl %ebp,%edi + andl %edx,%esi + rorl $7,%ecx + addl %edi,%eax movl %ebx,%edi - xorl %edx,%esi roll $5,%ebx addl %esi,%eax - xorl %ecx,%edi - xorl %edx,%ecx + xorl %ebp,%edx addl %ebx,%eax + movl %ecx,%esi + xorl %edx,%ecx addl 28(%rsp),%ebp + andl %edx,%esi andl %ecx,%edi - xorl %edx,%ecx rorl $7,%ebx + addl %esi,%ebp movl %eax,%esi - xorl %ecx,%edi roll $5,%eax addl %edi,%ebp - xorl %ebx,%esi - xorl %ecx,%ebx + xorl %edx,%ecx addl %eax,%ebp - addl 32(%rsp),%edx + movl %ebx,%edi pxor %xmm6,%xmm2 .byte 102,68,15,58,15,208,8 - andl %ebx,%esi xorl %ecx,%ebx - rorl $7,%eax + addl 32(%rsp),%edx + andl %ecx,%edi pxor %xmm3,%xmm2 - movl %ebp,%edi - xorl %ebx,%esi + andl %ebx,%esi + rorl $7,%eax movdqa %xmm8,%xmm9 paddd %xmm1,%xmm8 + addl %edi,%edx + movl %ebp,%edi + pxor %xmm10,%xmm2 roll $5,%ebp addl %esi,%edx - pxor %xmm10,%xmm2 - xorl %eax,%edi - xorl %ebx,%eax + xorl %ecx,%ebx addl %ebp,%edx - addl 36(%rsp),%ecx movdqa %xmm2,%xmm10 movdqa %xmm8,16(%rsp) - andl %eax,%edi + movl %eax,%esi xorl %ebx,%eax + addl 36(%rsp),%ecx + andl %ebx,%esi + pslld $2,%xmm2 + andl %eax,%edi rorl $7,%ebp + psrld $30,%xmm10 + addl %esi,%ecx movl %edx,%esi - pslld $2,%xmm2 - xorl %eax,%edi roll $5,%edx - psrld $30,%xmm10 addl %edi,%ecx - xorl %ebp,%esi - xorl %eax,%ebp + xorl %ebx,%eax addl %edx,%ecx - addl 40(%rsp),%ebx - andl %ebp,%esi por %xmm10,%xmm2 + movl %ebp,%edi xorl %eax,%ebp - rorl $7,%edx movdqa %xmm2,%xmm8 + addl 40(%rsp),%ebx + andl %eax,%edi + andl %ebp,%esi + rorl $7,%edx + addl %edi,%ebx movl %ecx,%edi - xorl %ebp,%esi roll $5,%ecx addl %esi,%ebx - xorl %edx,%edi - xorl %ebp,%edx + xorl %eax,%ebp addl %ecx,%ebx + movl %edx,%esi + xorl %ebp,%edx addl 44(%rsp),%eax + andl %ebp,%esi andl %edx,%edi - xorl %ebp,%edx rorl $7,%ecx + addl %esi,%eax movl %ebx,%esi - xorl %edx,%edi roll $5,%ebx addl %edi,%eax - xorl %edx,%esi + xorl %ebp,%edx addl %ebx,%eax addl 48(%rsp),%ebp pxor %xmm7,%xmm3 .byte 102,68,15,58,15,193,8 - xorl %ecx,%esi + xorl %edx,%esi movl %eax,%edi roll $5,%eax pxor %xmm4,%xmm3 - addl %esi,%ebp - xorl %ecx,%edi + xorl %ecx,%esi + addl %eax,%ebp movdqa %xmm9,%xmm10 paddd %xmm2,%xmm9 rorl $7,%ebx - addl %eax,%ebp + addl %esi,%ebp pxor %xmm8,%xmm3 addl 52(%rsp),%edx - xorl %ebx,%edi + xorl %ecx,%edi movl %ebp,%esi roll $5,%ebp movdqa %xmm3,%xmm8 - movdqa %xmm9,32(%rsp) - addl %edi,%edx - xorl %ebx,%esi - rorl $7,%eax + movdqa %xmm9,32(%rsp) + xorl %ebx,%edi addl %ebp,%edx + rorl $7,%eax + addl %edi,%edx pslld $2,%xmm3 addl 56(%rsp),%ecx - xorl %eax,%esi + xorl %ebx,%esi psrld $30,%xmm8 movl %edx,%edi roll $5,%edx - addl %esi,%ecx - xorl %eax,%edi - rorl $7,%ebp + xorl %eax,%esi addl %edx,%ecx + rorl $7,%ebp + addl %esi,%ecx por %xmm8,%xmm3 addl 60(%rsp),%ebx - xorl %ebp,%edi + xorl %eax,%edi movl %ecx,%esi roll $5,%ecx - addl %edi,%ebx - xorl %ebp,%esi - rorl $7,%edx + xorl %ebp,%edi addl %ecx,%ebx + rorl $7,%edx + addl %edi,%ebx addl 0(%rsp),%eax paddd %xmm3,%xmm10 - xorl %edx,%esi + xorl %ebp,%esi movl %ebx,%edi roll $5,%ebx - addl %esi,%eax + xorl %edx,%esi movdqa %xmm10,48(%rsp) - xorl %edx,%edi - rorl $7,%ecx addl %ebx,%eax + rorl $7,%ecx + addl %esi,%eax addl 4(%rsp),%ebp - xorl %ecx,%edi + xorl %edx,%edi movl %eax,%esi roll $5,%eax - addl %edi,%ebp - xorl %ecx,%esi - rorl $7,%ebx + xorl %ecx,%edi addl %eax,%ebp + rorl $7,%ebx + addl %edi,%ebp addl 8(%rsp),%edx - xorl %ebx,%esi + xorl %ecx,%esi movl %ebp,%edi roll $5,%ebp - addl %esi,%edx - xorl %ebx,%edi - rorl $7,%eax + xorl %ebx,%esi addl %ebp,%edx + rorl $7,%eax + addl %esi,%edx addl 12(%rsp),%ecx - xorl %eax,%edi + xorl %ebx,%edi movl %edx,%esi roll $5,%edx - addl %edi,%ecx - xorl %eax,%esi - rorl $7,%ebp + xorl %eax,%edi addl %edx,%ecx + rorl $7,%ebp + addl %edi,%ecx cmpq %r10,%r9 je .Ldone_ssse3 movdqa 64(%r11),%xmm6 - movdqa -64(%r11),%xmm9 + movdqa 0(%r11),%xmm9 movdqu 0(%r9),%xmm0 movdqu 16(%r9),%xmm1 movdqu 32(%r9),%xmm2 @@ -2260,112 +2278,113 @@ _ssse3_shortcut: .byte 102,15,56,0,198 addq $64,%r9 addl 16(%rsp),%ebx - xorl %ebp,%esi + xorl %eax,%esi .byte 102,15,56,0,206 movl %ecx,%edi roll $5,%ecx - addl %esi,%ebx paddd %xmm9,%xmm0 - xorl %ebp,%edi - rorl $7,%edx + xorl %ebp,%esi addl %ecx,%ebx - addl 20(%rsp),%eax + rorl $7,%edx + addl %esi,%ebx movdqa %xmm0,0(%rsp) - xorl %edx,%edi - movl %ebx,%esi + addl 20(%rsp),%eax + xorl %ebp,%edi psubd %xmm9,%xmm0 + movl %ebx,%esi roll $5,%ebx - addl %edi,%eax - xorl %edx,%esi - rorl $7,%ecx + xorl %edx,%edi addl %ebx,%eax + rorl $7,%ecx + addl %edi,%eax addl 24(%rsp),%ebp - xorl %ecx,%esi + xorl %edx,%esi movl %eax,%edi roll $5,%eax - addl %esi,%ebp - xorl %ecx,%edi - rorl $7,%ebx + xorl %ecx,%esi addl %eax,%ebp + rorl $7,%ebx + addl %esi,%ebp addl 28(%rsp),%edx - xorl %ebx,%edi + xorl %ecx,%edi movl %ebp,%esi roll $5,%ebp - addl %edi,%edx - xorl %ebx,%esi - rorl $7,%eax + xorl %ebx,%edi addl %ebp,%edx + rorl $7,%eax + addl %edi,%edx addl 32(%rsp),%ecx - xorl %eax,%esi + xorl %ebx,%esi .byte 102,15,56,0,214 movl %edx,%edi roll $5,%edx - addl %esi,%ecx paddd %xmm9,%xmm1 - xorl %eax,%edi - rorl $7,%ebp + xorl %eax,%esi addl %edx,%ecx - addl 36(%rsp),%ebx + rorl $7,%ebp + addl %esi,%ecx movdqa %xmm1,16(%rsp) - xorl %ebp,%edi - movl %ecx,%esi + addl 36(%rsp),%ebx + xorl %eax,%edi psubd %xmm9,%xmm1 + movl %ecx,%esi roll $5,%ecx - addl %edi,%ebx - xorl %ebp,%esi - rorl $7,%edx + xorl %ebp,%edi addl %ecx,%ebx + rorl $7,%edx + addl %edi,%ebx addl 40(%rsp),%eax - xorl %edx,%esi + xorl %ebp,%esi movl %ebx,%edi roll $5,%ebx - addl %esi,%eax - xorl %edx,%edi - rorl $7,%ecx + xorl %edx,%esi addl %ebx,%eax + rorl $7,%ecx + addl %esi,%eax addl 44(%rsp),%ebp - xorl %ecx,%edi + xorl %edx,%edi movl %eax,%esi roll $5,%eax - addl %edi,%ebp - xorl %ecx,%esi - rorl $7,%ebx + xorl %ecx,%edi addl %eax,%ebp + rorl $7,%ebx + addl %edi,%ebp addl 48(%rsp),%edx - xorl %ebx,%esi + xorl %ecx,%esi .byte 102,15,56,0,222 movl %ebp,%edi roll $5,%ebp - addl %esi,%edx paddd %xmm9,%xmm2 - xorl %ebx,%edi - rorl $7,%eax + xorl %ebx,%esi addl %ebp,%edx - addl 52(%rsp),%ecx + rorl $7,%eax + addl %esi,%edx movdqa %xmm2,32(%rsp) - xorl %eax,%edi - movl %edx,%esi + addl 52(%rsp),%ecx + xorl %ebx,%edi psubd %xmm9,%xmm2 + movl %edx,%esi roll $5,%edx - addl %edi,%ecx - xorl %eax,%esi - rorl $7,%ebp + xorl %eax,%edi addl %edx,%ecx + rorl $7,%ebp + addl %edi,%ecx addl 56(%rsp),%ebx - xorl %ebp,%esi + xorl %eax,%esi movl %ecx,%edi roll $5,%ecx - addl %esi,%ebx - xorl %ebp,%edi - rorl $7,%edx + xorl %ebp,%esi addl %ecx,%ebx + rorl $7,%edx + addl %esi,%ebx addl 60(%rsp),%eax - xorl %edx,%edi + xorl %ebp,%edi movl %ebx,%esi roll $5,%ebx - addl %edi,%eax - rorl $7,%ecx + xorl %edx,%edi addl %ebx,%eax + rorl $7,%ecx + addl %edi,%eax addl 0(%r8),%eax addl 4(%r8),%esi addl 8(%r8),%ecx @@ -2375,110 +2394,108 @@ _ssse3_shortcut: movl %esi,4(%r8) movl %esi,%ebx movl %ecx,8(%r8) - movl %ecx,%edi movl %edx,12(%r8) - xorl %edx,%edi movl %ebp,16(%r8) - andl %edi,%esi jmp .Loop_ssse3 .align 16 .Ldone_ssse3: addl 16(%rsp),%ebx - xorl %ebp,%esi + xorl %eax,%esi movl %ecx,%edi roll $5,%ecx - addl %esi,%ebx - xorl %ebp,%edi - rorl $7,%edx + xorl %ebp,%esi addl %ecx,%ebx + rorl $7,%edx + addl %esi,%ebx addl 20(%rsp),%eax - xorl %edx,%edi + xorl %ebp,%edi movl %ebx,%esi roll $5,%ebx - addl %edi,%eax - xorl %edx,%esi - rorl $7,%ecx + xorl %edx,%edi addl %ebx,%eax + rorl $7,%ecx + addl %edi,%eax addl 24(%rsp),%ebp - xorl %ecx,%esi + xorl %edx,%esi movl %eax,%edi roll $5,%eax - addl %esi,%ebp - xorl %ecx,%edi - rorl $7,%ebx + xorl %ecx,%esi addl %eax,%ebp + rorl $7,%ebx + addl %esi,%ebp addl 28(%rsp),%edx - xorl %ebx,%edi + xorl %ecx,%edi movl %ebp,%esi roll $5,%ebp - addl %edi,%edx - xorl %ebx,%esi - rorl $7,%eax + xorl %ebx,%edi addl %ebp,%edx + rorl $7,%eax + addl %edi,%edx addl 32(%rsp),%ecx - xorl %eax,%esi + xorl %ebx,%esi movl %edx,%edi roll $5,%edx - addl %esi,%ecx - xorl %eax,%edi - rorl $7,%ebp + xorl %eax,%esi addl %edx,%ecx + rorl $7,%ebp + addl %esi,%ecx addl 36(%rsp),%ebx - xorl %ebp,%edi + xorl %eax,%edi movl %ecx,%esi roll $5,%ecx - addl %edi,%ebx - xorl %ebp,%esi - rorl $7,%edx + xorl %ebp,%edi addl %ecx,%ebx + rorl $7,%edx + addl %edi,%ebx addl 40(%rsp),%eax - xorl %edx,%esi + xorl %ebp,%esi movl %ebx,%edi roll $5,%ebx - addl %esi,%eax - xorl %edx,%edi - rorl $7,%ecx + xorl %edx,%esi addl %ebx,%eax + rorl $7,%ecx + addl %esi,%eax addl 44(%rsp),%ebp - xorl %ecx,%edi + xorl %edx,%edi movl %eax,%esi roll $5,%eax - addl %edi,%ebp - xorl %ecx,%esi - rorl $7,%ebx + xorl %ecx,%edi addl %eax,%ebp + rorl $7,%ebx + addl %edi,%ebp addl 48(%rsp),%edx - xorl %ebx,%esi + xorl %ecx,%esi movl %ebp,%edi roll $5,%ebp - addl %esi,%edx - xorl %ebx,%edi - rorl $7,%eax + xorl %ebx,%esi addl %ebp,%edx + rorl $7,%eax + addl %esi,%edx addl 52(%rsp),%ecx - xorl %eax,%edi + xorl %ebx,%edi movl %edx,%esi roll $5,%edx - addl %edi,%ecx - xorl %eax,%esi - rorl $7,%ebp + xorl %eax,%edi addl %edx,%ecx + rorl $7,%ebp + addl %edi,%ecx addl 56(%rsp),%ebx - xorl %ebp,%esi + xorl %eax,%esi movl %ecx,%edi roll $5,%ecx - addl %esi,%ebx - xorl %ebp,%edi - rorl $7,%edx + xorl %ebp,%esi addl %ecx,%ebx + rorl $7,%edx + addl %esi,%ebx addl 60(%rsp),%eax - xorl %edx,%edi + xorl %ebp,%edi movl %ebx,%esi roll $5,%ebx - addl %edi,%eax - rorl $7,%ecx + xorl %edx,%edi addl %ebx,%eax + rorl $7,%ecx + addl %edi,%eax addl 0(%r8),%eax addl 4(%r8),%esi addl 8(%r8),%ecx @@ -2499,16 +2516,11 @@ _ssse3_shortcut: .size sha1_block_data_order_ssse3,.-sha1_block_data_order_ssse3 .align 64 K_XX_XX: -.long 0x5a827999,0x5a827999,0x5a827999,0x5a827999 -.long 0x5a827999,0x5a827999,0x5a827999,0x5a827999 -.long 0x6ed9eba1,0x6ed9eba1,0x6ed9eba1,0x6ed9eba1 -.long 0x6ed9eba1,0x6ed9eba1,0x6ed9eba1,0x6ed9eba1 -.long 0x8f1bbcdc,0x8f1bbcdc,0x8f1bbcdc,0x8f1bbcdc -.long 0x8f1bbcdc,0x8f1bbcdc,0x8f1bbcdc,0x8f1bbcdc -.long 0xca62c1d6,0xca62c1d6,0xca62c1d6,0xca62c1d6 -.long 0xca62c1d6,0xca62c1d6,0xca62c1d6,0xca62c1d6 -.long 0x00010203,0x04050607,0x08090a0b,0x0c0d0e0f -.long 0x00010203,0x04050607,0x08090a0b,0x0c0d0e0f +.long 0x5a827999,0x5a827999,0x5a827999,0x5a827999 +.long 0x6ed9eba1,0x6ed9eba1,0x6ed9eba1,0x6ed9eba1 +.long 0x8f1bbcdc,0x8f1bbcdc,0x8f1bbcdc,0x8f1bbcdc +.long 0xca62c1d6,0xca62c1d6,0xca62c1d6,0xca62c1d6 +.long 0x00010203,0x04050607,0x08090a0b,0x0c0d0e0f .byte 83,72,65,49,32,98,108,111,99,107,32,116,114,97,110,115,102,111,114,109,32,102,111,114,32,120,56,54,95,54,52,44,32,67,82,89,80,84,79,71,65,77,83,32,98,121,32,60,97,112,112,114,111,64,111,112,101,110,115,115,108,46,111,114,103,62,0 .align 64 diff --git a/lib/accelerated/x86/elf/sha256-ssse3-x86.s b/lib/accelerated/x86/elf/sha256-ssse3-x86.s index 212962ae31..aab48fa387 100644 --- a/lib/accelerated/x86/elf/sha256-ssse3-x86.s +++ b/lib/accelerated/x86/elf/sha256-ssse3-x86.s @@ -64,405 +64,195 @@ sha256_block_data_order: movl %edi,4(%esp) movl %eax,8(%esp) movl %ebx,12(%esp) - leal _gnutls_x86_cpuid_s-.L001K256(%ebp),%edx - movl (%edx),%ecx - movl 4(%edx),%ebx - testl $1048576,%ecx - jnz .L002loop - andl $1073741824,%ecx - andl $268435968,%ebx - orl %ebx,%ecx - andl $1342177280,%ecx - cmpl $1342177280,%ecx - je .L003loop_shrd - subl %edi,%eax - cmpl $256,%eax - jae .L004unrolled - jmp .L002loop .align 16 .L002loop: movl (%edi),%eax movl 4(%edi),%ebx movl 8(%edi),%ecx - bswap %eax movl 12(%edi),%edx + bswap %eax bswap %ebx - pushl %eax bswap %ecx - pushl %ebx bswap %edx + pushl %eax + pushl %ebx pushl %ecx pushl %edx movl 16(%edi),%eax movl 20(%edi),%ebx movl 24(%edi),%ecx - bswap %eax movl 28(%edi),%edx + bswap %eax bswap %ebx - pushl %eax bswap %ecx - pushl %ebx bswap %edx + pushl %eax + pushl %ebx pushl %ecx pushl %edx movl 32(%edi),%eax movl 36(%edi),%ebx movl 40(%edi),%ecx - bswap %eax movl 44(%edi),%edx + bswap %eax bswap %ebx - pushl %eax bswap %ecx - pushl %ebx bswap %edx + pushl %eax + pushl %ebx pushl %ecx pushl %edx movl 48(%edi),%eax movl 52(%edi),%ebx movl 56(%edi),%ecx - bswap %eax movl 60(%edi),%edx + bswap %eax bswap %ebx - pushl %eax bswap %ecx - pushl %ebx bswap %edx + pushl %eax + pushl %ebx pushl %ecx pushl %edx addl $64,%edi - leal -36(%esp),%esp - movl %edi,104(%esp) + subl $32,%esp + movl %edi,100(%esp) movl (%esi),%eax movl 4(%esi),%ebx movl 8(%esi),%ecx movl 12(%esi),%edi - movl %ebx,8(%esp) - xorl %ecx,%ebx - movl %ecx,12(%esp) - movl %edi,16(%esp) - movl %ebx,(%esp) + movl %ebx,4(%esp) + movl %ecx,8(%esp) + movl %edi,12(%esp) movl 16(%esi),%edx movl 20(%esi),%ebx movl 24(%esi),%ecx movl 28(%esi),%edi - movl %ebx,24(%esp) - movl %ecx,28(%esp) - movl %edi,32(%esp) + movl %ebx,20(%esp) + movl %ecx,24(%esp) + movl %edi,28(%esp) .align 16 -.L00500_15: +.L00300_15: + movl 92(%esp),%ebx movl %edx,%ecx - movl 24(%esp),%esi rorl $14,%ecx - movl 28(%esp),%edi + movl 20(%esp),%esi xorl %edx,%ecx - xorl %edi,%esi - movl 96(%esp),%ebx rorl $5,%ecx - andl %edx,%esi - movl %edx,20(%esp) - xorl %ecx,%edx - addl 32(%esp),%ebx + xorl %edx,%ecx + rorl $6,%ecx + movl 24(%esp),%edi + addl %ecx,%ebx xorl %edi,%esi - rorl $6,%edx + movl %edx,16(%esp) movl %eax,%ecx + andl %edx,%esi + movl 12(%esp),%edx + xorl %edi,%esi + movl %eax,%edi addl %esi,%ebx rorl $9,%ecx - addl %edx,%ebx - movl 8(%esp),%edi + addl 28(%esp),%ebx xorl %eax,%ecx - movl %eax,4(%esp) - leal -4(%esp),%esp rorl $11,%ecx - movl (%ebp),%esi + movl 4(%esp),%esi xorl %eax,%ecx - movl 20(%esp),%edx - xorl %edi,%eax rorl $2,%ecx - addl %esi,%ebx - movl %eax,(%esp) addl %ebx,%edx - andl 4(%esp),%eax + movl 8(%esp),%edi addl %ecx,%ebx - xorl %edi,%eax + movl %eax,(%esp) + movl %eax,%ecx + subl $4,%esp + orl %esi,%eax + andl %esi,%ecx + andl %edi,%eax + movl (%ebp),%esi + orl %ecx,%eax addl $4,%ebp addl %ebx,%eax + addl %esi,%edx + addl %esi,%eax cmpl $3248222580,%esi - jne .L00500_15 - movl 156(%esp),%ecx - jmp .L00616_63 + jne .L00300_15 + movl 152(%esp),%ebx .align 16 -.L00616_63: - movl %ecx,%ebx - movl 104(%esp),%esi - rorl $11,%ecx - movl %esi,%edi - rorl $2,%esi - xorl %ebx,%ecx +.L00416_63: + movl %ebx,%esi + movl 100(%esp),%ecx + rorl $11,%esi + movl %ecx,%edi + xorl %ebx,%esi + rorl $7,%esi shrl $3,%ebx - rorl $7,%ecx - xorl %edi,%esi - xorl %ecx,%ebx - rorl $17,%esi - addl 160(%esp),%ebx - shrl $10,%edi - addl 124(%esp),%ebx + rorl $2,%edi + xorl %esi,%ebx + xorl %ecx,%edi + rorl $17,%edi + shrl $10,%ecx + addl 156(%esp),%ebx + xorl %ecx,%edi + addl 120(%esp),%ebx movl %edx,%ecx - xorl %esi,%edi - movl 24(%esp),%esi - rorl $14,%ecx addl %edi,%ebx - movl 28(%esp),%edi + rorl $14,%ecx + movl 20(%esp),%esi xorl %edx,%ecx - xorl %edi,%esi - movl %ebx,96(%esp) rorl $5,%ecx - andl %edx,%esi - movl %edx,20(%esp) - xorl %ecx,%edx - addl 32(%esp),%ebx + movl %ebx,92(%esp) + xorl %edx,%ecx + rorl $6,%ecx + movl 24(%esp),%edi + addl %ecx,%ebx xorl %edi,%esi - rorl $6,%edx + movl %edx,16(%esp) movl %eax,%ecx + andl %edx,%esi + movl 12(%esp),%edx + xorl %edi,%esi + movl %eax,%edi addl %esi,%ebx rorl $9,%ecx - addl %edx,%ebx - movl 8(%esp),%edi + addl 28(%esp),%ebx xorl %eax,%ecx - movl %eax,4(%esp) - leal -4(%esp),%esp rorl $11,%ecx - movl (%ebp),%esi + movl 4(%esp),%esi xorl %eax,%ecx - movl 20(%esp),%edx - xorl %edi,%eax rorl $2,%ecx - addl %esi,%ebx - movl %eax,(%esp) addl %ebx,%edx - andl 4(%esp),%eax - addl %ecx,%ebx - xorl %edi,%eax - movl 156(%esp),%ecx - addl $4,%ebp - addl %ebx,%eax - cmpl $3329325298,%esi - jne .L00616_63 - movl 356(%esp),%esi - movl 8(%esp),%ebx - movl 16(%esp),%ecx - addl (%esi),%eax - addl 4(%esi),%ebx - addl 8(%esi),%edi - addl 12(%esi),%ecx - movl %eax,(%esi) - movl %ebx,4(%esi) - movl %edi,8(%esi) - movl %ecx,12(%esi) - movl 24(%esp),%eax - movl 28(%esp),%ebx - movl 32(%esp),%ecx - movl 360(%esp),%edi - addl 16(%esi),%edx - addl 20(%esi),%eax - addl 24(%esi),%ebx - addl 28(%esi),%ecx - movl %edx,16(%esi) - movl %eax,20(%esi) - movl %ebx,24(%esi) - movl %ecx,28(%esi) - leal 356(%esp),%esp - subl $256,%ebp - cmpl 8(%esp),%edi - jb .L002loop - movl 12(%esp),%esp - popl %edi - popl %esi - popl %ebx - popl %ebp - ret -.align 32 -.L003loop_shrd: - movl (%edi),%eax - movl 4(%edi),%ebx - movl 8(%edi),%ecx - bswap %eax - movl 12(%edi),%edx - bswap %ebx - pushl %eax - bswap %ecx - pushl %ebx - bswap %edx - pushl %ecx - pushl %edx - movl 16(%edi),%eax - movl 20(%edi),%ebx - movl 24(%edi),%ecx - bswap %eax - movl 28(%edi),%edx - bswap %ebx - pushl %eax - bswap %ecx - pushl %ebx - bswap %edx - pushl %ecx - pushl %edx - movl 32(%edi),%eax - movl 36(%edi),%ebx - movl 40(%edi),%ecx - bswap %eax - movl 44(%edi),%edx - bswap %ebx - pushl %eax - bswap %ecx - pushl %ebx - bswap %edx - pushl %ecx - pushl %edx - movl 48(%edi),%eax - movl 52(%edi),%ebx - movl 56(%edi),%ecx - bswap %eax - movl 60(%edi),%edx - bswap %ebx - pushl %eax - bswap %ecx - pushl %ebx - bswap %edx - pushl %ecx - pushl %edx - addl $64,%edi - leal -36(%esp),%esp - movl %edi,104(%esp) - movl (%esi),%eax - movl 4(%esi),%ebx - movl 8(%esi),%ecx - movl 12(%esi),%edi - movl %ebx,8(%esp) - xorl %ecx,%ebx - movl %ecx,12(%esp) - movl %edi,16(%esp) - movl %ebx,(%esp) - movl 16(%esi),%edx - movl 20(%esi),%ebx - movl 24(%esi),%ecx - movl 28(%esi),%edi - movl %ebx,24(%esp) - movl %ecx,28(%esp) - movl %edi,32(%esp) -.align 16 -.L00700_15_shrd: - movl %edx,%ecx - movl 24(%esp),%esi - shrdl $14,%ecx,%ecx - movl 28(%esp),%edi - xorl %edx,%ecx - xorl %edi,%esi - movl 96(%esp),%ebx - shrdl $5,%ecx,%ecx - andl %edx,%esi - movl %edx,20(%esp) - xorl %ecx,%edx - addl 32(%esp),%ebx - xorl %edi,%esi - shrdl $6,%edx,%edx - movl %eax,%ecx - addl %esi,%ebx - shrdl $9,%ecx,%ecx - addl %edx,%ebx movl 8(%esp),%edi - xorl %eax,%ecx - movl %eax,4(%esp) - leal -4(%esp),%esp - shrdl $11,%ecx,%ecx - movl (%ebp),%esi - xorl %eax,%ecx - movl 20(%esp),%edx - xorl %edi,%eax - shrdl $2,%ecx,%ecx - addl %esi,%ebx - movl %eax,(%esp) - addl %ebx,%edx - andl 4(%esp),%eax addl %ecx,%ebx - xorl %edi,%eax - addl $4,%ebp - addl %ebx,%eax - cmpl $3248222580,%esi - jne .L00700_15_shrd - movl 156(%esp),%ecx - jmp .L00816_63_shrd -.align 16 -.L00816_63_shrd: - movl %ecx,%ebx - movl 104(%esp),%esi - shrdl $11,%ecx,%ecx - movl %esi,%edi - shrdl $2,%esi,%esi - xorl %ebx,%ecx - shrl $3,%ebx - shrdl $7,%ecx,%ecx - xorl %edi,%esi - xorl %ecx,%ebx - shrdl $17,%esi,%esi - addl 160(%esp),%ebx - shrl $10,%edi - addl 124(%esp),%ebx - movl %edx,%ecx - xorl %esi,%edi - movl 24(%esp),%esi - shrdl $14,%ecx,%ecx - addl %edi,%ebx - movl 28(%esp),%edi - xorl %edx,%ecx - xorl %edi,%esi - movl %ebx,96(%esp) - shrdl $5,%ecx,%ecx - andl %edx,%esi - movl %edx,20(%esp) - xorl %ecx,%edx - addl 32(%esp),%ebx - xorl %edi,%esi - shrdl $6,%edx,%edx + movl %eax,(%esp) movl %eax,%ecx - addl %esi,%ebx - shrdl $9,%ecx,%ecx - addl %edx,%ebx - movl 8(%esp),%edi - xorl %eax,%ecx - movl %eax,4(%esp) - leal -4(%esp),%esp - shrdl $11,%ecx,%ecx + subl $4,%esp + orl %esi,%eax + andl %esi,%ecx + andl %edi,%eax movl (%ebp),%esi - xorl %eax,%ecx - movl 20(%esp),%edx - xorl %edi,%eax - shrdl $2,%ecx,%ecx - addl %esi,%ebx - movl %eax,(%esp) - addl %ebx,%edx - andl 4(%esp),%eax - addl %ecx,%ebx - xorl %edi,%eax - movl 156(%esp),%ecx + orl %ecx,%eax addl $4,%ebp addl %ebx,%eax + movl 152(%esp),%ebx + addl %esi,%edx + addl %esi,%eax cmpl $3329325298,%esi - jne .L00816_63_shrd - movl 356(%esp),%esi - movl 8(%esp),%ebx - movl 16(%esp),%ecx + jne .L00416_63 + movl 352(%esp),%esi + movl 4(%esp),%ebx + movl 8(%esp),%ecx + movl 12(%esp),%edi addl (%esi),%eax addl 4(%esi),%ebx - addl 8(%esi),%edi - addl 12(%esi),%ecx + addl 8(%esi),%ecx + addl 12(%esi),%edi movl %eax,(%esi) movl %ebx,4(%esi) - movl %edi,8(%esi) - movl %ecx,12(%esi) - movl 24(%esp),%eax - movl 28(%esp),%ebx - movl 32(%esp),%ecx - movl 360(%esp),%edi + movl %ecx,8(%esi) + movl %edi,12(%esi) + movl 20(%esp),%eax + movl 24(%esp),%ebx + movl 28(%esp),%ecx + movl 356(%esp),%edi addl 16(%esi),%edx addl 20(%esi),%eax addl 24(%esi),%ebx @@ -471,10 +261,10 @@ sha256_block_data_order: movl %eax,20(%esi) movl %ebx,24(%esi) movl %ecx,28(%esi) - leal 356(%esp),%esp + addl $352,%esp subl $256,%ebp cmpl 8(%esp),%edi - jb .L003loop_shrd + jb .L002loop movl 12(%esp),%esp popl %edi popl %esi @@ -483,2921 +273,27 @@ sha256_block_data_order: ret .align 64 .L001K256: -.long 1116352408,1899447441,3049323471,3921009573,961987163,1508970993,2453635748,2870763221,3624381080,310598401,607225278,1426881987,1925078388,2162078206,2614888103,3248222580,3835390401,4022224774,264347078,604807628,770255983,1249150122,1555081692,1996064986,2554220882,2821834349,2952996808,3210313671,3336571891,3584528711,113926993,338241895,666307205,773529912,1294757372,1396182291,1695183700,1986661051,2177026350,2456956037,2730485921,2820302411,3259730800,3345764771,3516065817,3600352804,4094571909,275423344,430227734,506948616,659060556,883997877,958139571,1322822218,1537002063,1747873779,1955562222,2024104815,2227730452,2361852424,2428436474,2756734187,3204031479,3329325298 -.long 66051,67438087,134810123,202182159 +.long 1116352408,1899447441,3049323471,3921009573 +.long 961987163,1508970993,2453635748,2870763221 +.long 3624381080,310598401,607225278,1426881987 +.long 1925078388,2162078206,2614888103,3248222580 +.long 3835390401,4022224774,264347078,604807628 +.long 770255983,1249150122,1555081692,1996064986 +.long 2554220882,2821834349,2952996808,3210313671 +.long 3336571891,3584528711,113926993,338241895 +.long 666307205,773529912,1294757372,1396182291 +.long 1695183700,1986661051,2177026350,2456956037 +.long 2730485921,2820302411,3259730800,3345764771 +.long 3516065817,3600352804,4094571909,275423344 +.long 430227734,506948616,659060556,883997877 +.long 958139571,1322822218,1537002063,1747873779 +.long 1955562222,2024104815,2227730452,2361852424 +.long 2428436474,2756734187,3204031479,3329325298 +.size sha256_block_data_order,.-.L_sha256_block_data_order_begin .byte 83,72,65,50,53,54,32,98,108,111,99,107,32,116,114,97 .byte 110,115,102,111,114,109,32,102,111,114,32,120,56,54,44,32 .byte 67,82,89,80,84,79,71,65,77,83,32,98,121,32,60,97 .byte 112,112,114,111,64,111,112,101,110,115,115,108,46,111,114,103 .byte 62,0 -.align 16 -.L004unrolled: - leal -96(%esp),%esp - movl (%esi),%eax - movl 4(%esi),%ebp - movl 8(%esi),%ecx - movl 12(%esi),%ebx - movl %ebp,4(%esp) - xorl %ecx,%ebp - movl %ecx,8(%esp) - movl %ebx,12(%esp) - movl 16(%esi),%edx - movl 20(%esi),%ebx - movl 24(%esi),%ecx - movl 28(%esi),%esi - movl %ebx,20(%esp) - movl %ecx,24(%esp) - movl %esi,28(%esp) - jmp .L009grand_loop -.align 16 -.L009grand_loop: - movl (%edi),%ebx - movl 4(%edi),%ecx - bswap %ebx - movl 8(%edi),%esi - bswap %ecx - movl %ebx,32(%esp) - bswap %esi - movl %ecx,36(%esp) - movl %esi,40(%esp) - movl 12(%edi),%ebx - movl 16(%edi),%ecx - bswap %ebx - movl 20(%edi),%esi - bswap %ecx - movl %ebx,44(%esp) - bswap %esi - movl %ecx,48(%esp) - movl %esi,52(%esp) - movl 24(%edi),%ebx - movl 28(%edi),%ecx - bswap %ebx - movl 32(%edi),%esi - bswap %ecx - movl %ebx,56(%esp) - bswap %esi - movl %ecx,60(%esp) - movl %esi,64(%esp) - movl 36(%edi),%ebx - movl 40(%edi),%ecx - bswap %ebx - movl 44(%edi),%esi - bswap %ecx - movl %ebx,68(%esp) - bswap %esi - movl %ecx,72(%esp) - movl %esi,76(%esp) - movl 48(%edi),%ebx - movl 52(%edi),%ecx - bswap %ebx - movl 56(%edi),%esi - bswap %ecx - movl %ebx,80(%esp) - bswap %esi - movl %ecx,84(%esp) - movl %esi,88(%esp) - movl 60(%edi),%ebx - addl $64,%edi - bswap %ebx - movl %edi,100(%esp) - movl %ebx,92(%esp) - movl %edx,%ecx - movl 20(%esp),%esi - rorl $14,%edx - movl 24(%esp),%edi - xorl %ecx,%edx - movl 32(%esp),%ebx - xorl %edi,%esi - rorl $5,%edx - andl %ecx,%esi - movl %ecx,16(%esp) - xorl %ecx,%edx - addl 28(%esp),%ebx - xorl %esi,%edi - rorl $6,%edx - movl %eax,%ecx - addl %edi,%ebx - rorl $9,%ecx - movl %eax,%esi - movl 4(%esp),%edi - xorl %eax,%ecx - movl %eax,(%esp) - xorl %edi,%eax - rorl $11,%ecx - andl %eax,%ebp - leal 1116352408(%ebx,%edx,1),%edx - xorl %esi,%ecx - xorl %edi,%ebp - rorl $2,%ecx - addl %edx,%ebp - addl 12(%esp),%edx - addl %ecx,%ebp - movl %edx,%esi - movl 16(%esp),%ecx - rorl $14,%edx - movl 20(%esp),%edi - xorl %esi,%edx - movl 36(%esp),%ebx - xorl %edi,%ecx - rorl $5,%edx - andl %esi,%ecx - movl %esi,12(%esp) - xorl %esi,%edx - addl 24(%esp),%ebx - xorl %ecx,%edi - rorl $6,%edx - movl %ebp,%esi - addl %edi,%ebx - rorl $9,%esi - movl %ebp,%ecx - movl (%esp),%edi - xorl %ebp,%esi - movl %ebp,28(%esp) - xorl %edi,%ebp - rorl $11,%esi - andl %ebp,%eax - leal 1899447441(%ebx,%edx,1),%edx - xorl %ecx,%esi - xorl %edi,%eax - rorl $2,%esi - addl %edx,%eax - addl 8(%esp),%edx - addl %esi,%eax - movl %edx,%ecx - movl 12(%esp),%esi - rorl $14,%edx - movl 16(%esp),%edi - xorl %ecx,%edx - movl 40(%esp),%ebx - xorl %edi,%esi - rorl $5,%edx - andl %ecx,%esi - movl %ecx,8(%esp) - xorl %ecx,%edx - addl 20(%esp),%ebx - xorl %esi,%edi - rorl $6,%edx - movl %eax,%ecx - addl %edi,%ebx - rorl $9,%ecx - movl %eax,%esi - movl 28(%esp),%edi - xorl %eax,%ecx - movl %eax,24(%esp) - xorl %edi,%eax - rorl $11,%ecx - andl %eax,%ebp - leal 3049323471(%ebx,%edx,1),%edx - xorl %esi,%ecx - xorl %edi,%ebp - rorl $2,%ecx - addl %edx,%ebp - addl 4(%esp),%edx - addl %ecx,%ebp - movl %edx,%esi - movl 8(%esp),%ecx - rorl $14,%edx - movl 12(%esp),%edi - xorl %esi,%edx - movl 44(%esp),%ebx - xorl %edi,%ecx - rorl $5,%edx - andl %esi,%ecx - movl %esi,4(%esp) - xorl %esi,%edx - addl 16(%esp),%ebx - xorl %ecx,%edi - rorl $6,%edx - movl %ebp,%esi - addl %edi,%ebx - rorl $9,%esi - movl %ebp,%ecx - movl 24(%esp),%edi - xorl %ebp,%esi - movl %ebp,20(%esp) - xorl %edi,%ebp - rorl $11,%esi - andl %ebp,%eax - leal 3921009573(%ebx,%edx,1),%edx - xorl %ecx,%esi - xorl %edi,%eax - rorl $2,%esi - addl %edx,%eax - addl (%esp),%edx - addl %esi,%eax - movl %edx,%ecx - movl 4(%esp),%esi - rorl $14,%edx - movl 8(%esp),%edi - xorl %ecx,%edx - movl 48(%esp),%ebx - xorl %edi,%esi - rorl $5,%edx - andl %ecx,%esi - movl %ecx,(%esp) - xorl %ecx,%edx - addl 12(%esp),%ebx - xorl %esi,%edi - rorl $6,%edx - movl %eax,%ecx - addl %edi,%ebx - rorl $9,%ecx - movl %eax,%esi - movl 20(%esp),%edi - xorl %eax,%ecx - movl %eax,16(%esp) - xorl %edi,%eax - rorl $11,%ecx - andl %eax,%ebp - leal 961987163(%ebx,%edx,1),%edx - xorl %esi,%ecx - xorl %edi,%ebp - rorl $2,%ecx - addl %edx,%ebp - addl 28(%esp),%edx - addl %ecx,%ebp - movl %edx,%esi - movl (%esp),%ecx - rorl $14,%edx - movl 4(%esp),%edi - xorl %esi,%edx - movl 52(%esp),%ebx - xorl %edi,%ecx - rorl $5,%edx - andl %esi,%ecx - movl %esi,28(%esp) - xorl %esi,%edx - addl 8(%esp),%ebx - xorl %ecx,%edi - rorl $6,%edx - movl %ebp,%esi - addl %edi,%ebx - rorl $9,%esi - movl %ebp,%ecx - movl 16(%esp),%edi - xorl %ebp,%esi - movl %ebp,12(%esp) - xorl %edi,%ebp - rorl $11,%esi - andl %ebp,%eax - leal 1508970993(%ebx,%edx,1),%edx - xorl %ecx,%esi - xorl %edi,%eax - rorl $2,%esi - addl %edx,%eax - addl 24(%esp),%edx - addl %esi,%eax - movl %edx,%ecx - movl 28(%esp),%esi - rorl $14,%edx - movl (%esp),%edi - xorl %ecx,%edx - movl 56(%esp),%ebx - xorl %edi,%esi - rorl $5,%edx - andl %ecx,%esi - movl %ecx,24(%esp) - xorl %ecx,%edx - addl 4(%esp),%ebx - xorl %esi,%edi - rorl $6,%edx - movl %eax,%ecx - addl %edi,%ebx - rorl $9,%ecx - movl %eax,%esi - movl 12(%esp),%edi - xorl %eax,%ecx - movl %eax,8(%esp) - xorl %edi,%eax - rorl $11,%ecx - andl %eax,%ebp - leal 2453635748(%ebx,%edx,1),%edx - xorl %esi,%ecx - xorl %edi,%ebp - rorl $2,%ecx - addl %edx,%ebp - addl 20(%esp),%edx - addl %ecx,%ebp - movl %edx,%esi - movl 24(%esp),%ecx - rorl $14,%edx - movl 28(%esp),%edi - xorl %esi,%edx - movl 60(%esp),%ebx - xorl %edi,%ecx - rorl $5,%edx - andl %esi,%ecx - movl %esi,20(%esp) - xorl %esi,%edx - addl (%esp),%ebx - xorl %ecx,%edi - rorl $6,%edx - movl %ebp,%esi - addl %edi,%ebx - rorl $9,%esi - movl %ebp,%ecx - movl 8(%esp),%edi - xorl %ebp,%esi - movl %ebp,4(%esp) - xorl %edi,%ebp - rorl $11,%esi - andl %ebp,%eax - leal 2870763221(%ebx,%edx,1),%edx - xorl %ecx,%esi - xorl %edi,%eax - rorl $2,%esi - addl %edx,%eax - addl 16(%esp),%edx - addl %esi,%eax - movl %edx,%ecx - movl 20(%esp),%esi - rorl $14,%edx - movl 24(%esp),%edi - xorl %ecx,%edx - movl 64(%esp),%ebx - xorl %edi,%esi - rorl $5,%edx - andl %ecx,%esi - movl %ecx,16(%esp) - xorl %ecx,%edx - addl 28(%esp),%ebx - xorl %esi,%edi - rorl $6,%edx - movl %eax,%ecx - addl %edi,%ebx - rorl $9,%ecx - movl %eax,%esi - movl 4(%esp),%edi - xorl %eax,%ecx - movl %eax,(%esp) - xorl %edi,%eax - rorl $11,%ecx - andl %eax,%ebp - leal 3624381080(%ebx,%edx,1),%edx - xorl %esi,%ecx - xorl %edi,%ebp - rorl $2,%ecx - addl %edx,%ebp - addl 12(%esp),%edx - addl %ecx,%ebp - movl %edx,%esi - movl 16(%esp),%ecx - rorl $14,%edx - movl 20(%esp),%edi - xorl %esi,%edx - movl 68(%esp),%ebx - xorl %edi,%ecx - rorl $5,%edx - andl %esi,%ecx - movl %esi,12(%esp) - xorl %esi,%edx - addl 24(%esp),%ebx - xorl %ecx,%edi - rorl $6,%edx - movl %ebp,%esi - addl %edi,%ebx - rorl $9,%esi - movl %ebp,%ecx - movl (%esp),%edi - xorl %ebp,%esi - movl %ebp,28(%esp) - xorl %edi,%ebp - rorl $11,%esi - andl %ebp,%eax - leal 310598401(%ebx,%edx,1),%edx - xorl %ecx,%esi - xorl %edi,%eax - rorl $2,%esi - addl %edx,%eax - addl 8(%esp),%edx - addl %esi,%eax - movl %edx,%ecx - movl 12(%esp),%esi - rorl $14,%edx - movl 16(%esp),%edi - xorl %ecx,%edx - movl 72(%esp),%ebx - xorl %edi,%esi - rorl $5,%edx - andl %ecx,%esi - movl %ecx,8(%esp) - xorl %ecx,%edx - addl 20(%esp),%ebx - xorl %esi,%edi - rorl $6,%edx - movl %eax,%ecx - addl %edi,%ebx - rorl $9,%ecx - movl %eax,%esi - movl 28(%esp),%edi - xorl %eax,%ecx - movl %eax,24(%esp) - xorl %edi,%eax - rorl $11,%ecx - andl %eax,%ebp - leal 607225278(%ebx,%edx,1),%edx - xorl %esi,%ecx - xorl %edi,%ebp - rorl $2,%ecx - addl %edx,%ebp - addl 4(%esp),%edx - addl %ecx,%ebp - movl %edx,%esi - movl 8(%esp),%ecx - rorl $14,%edx - movl 12(%esp),%edi - xorl %esi,%edx - movl 76(%esp),%ebx - xorl %edi,%ecx - rorl $5,%edx - andl %esi,%ecx - movl %esi,4(%esp) - xorl %esi,%edx - addl 16(%esp),%ebx - xorl %ecx,%edi - rorl $6,%edx - movl %ebp,%esi - addl %edi,%ebx - rorl $9,%esi - movl %ebp,%ecx - movl 24(%esp),%edi - xorl %ebp,%esi - movl %ebp,20(%esp) - xorl %edi,%ebp - rorl $11,%esi - andl %ebp,%eax - leal 1426881987(%ebx,%edx,1),%edx - xorl %ecx,%esi - xorl %edi,%eax - rorl $2,%esi - addl %edx,%eax - addl (%esp),%edx - addl %esi,%eax - movl %edx,%ecx - movl 4(%esp),%esi - rorl $14,%edx - movl 8(%esp),%edi - xorl %ecx,%edx - movl 80(%esp),%ebx - xorl %edi,%esi - rorl $5,%edx - andl %ecx,%esi - movl %ecx,(%esp) - xorl %ecx,%edx - addl 12(%esp),%ebx - xorl %esi,%edi - rorl $6,%edx - movl %eax,%ecx - addl %edi,%ebx - rorl $9,%ecx - movl %eax,%esi - movl 20(%esp),%edi - xorl %eax,%ecx - movl %eax,16(%esp) - xorl %edi,%eax - rorl $11,%ecx - andl %eax,%ebp - leal 1925078388(%ebx,%edx,1),%edx - xorl %esi,%ecx - xorl %edi,%ebp - rorl $2,%ecx - addl %edx,%ebp - addl 28(%esp),%edx - addl %ecx,%ebp - movl %edx,%esi - movl (%esp),%ecx - rorl $14,%edx - movl 4(%esp),%edi - xorl %esi,%edx - movl 84(%esp),%ebx - xorl %edi,%ecx - rorl $5,%edx - andl %esi,%ecx - movl %esi,28(%esp) - xorl %esi,%edx - addl 8(%esp),%ebx - xorl %ecx,%edi - rorl $6,%edx - movl %ebp,%esi - addl %edi,%ebx - rorl $9,%esi - movl %ebp,%ecx - movl 16(%esp),%edi - xorl %ebp,%esi - movl %ebp,12(%esp) - xorl %edi,%ebp - rorl $11,%esi - andl %ebp,%eax - leal 2162078206(%ebx,%edx,1),%edx - xorl %ecx,%esi - xorl %edi,%eax - rorl $2,%esi - addl %edx,%eax - addl 24(%esp),%edx - addl %esi,%eax - movl %edx,%ecx - movl 28(%esp),%esi - rorl $14,%edx - movl (%esp),%edi - xorl %ecx,%edx - movl 88(%esp),%ebx - xorl %edi,%esi - rorl $5,%edx - andl %ecx,%esi - movl %ecx,24(%esp) - xorl %ecx,%edx - addl 4(%esp),%ebx - xorl %esi,%edi - rorl $6,%edx - movl %eax,%ecx - addl %edi,%ebx - rorl $9,%ecx - movl %eax,%esi - movl 12(%esp),%edi - xorl %eax,%ecx - movl %eax,8(%esp) - xorl %edi,%eax - rorl $11,%ecx - andl %eax,%ebp - leal 2614888103(%ebx,%edx,1),%edx - xorl %esi,%ecx - xorl %edi,%ebp - rorl $2,%ecx - addl %edx,%ebp - addl 20(%esp),%edx - addl %ecx,%ebp - movl %edx,%esi - movl 24(%esp),%ecx - rorl $14,%edx - movl 28(%esp),%edi - xorl %esi,%edx - movl 92(%esp),%ebx - xorl %edi,%ecx - rorl $5,%edx - andl %esi,%ecx - movl %esi,20(%esp) - xorl %esi,%edx - addl (%esp),%ebx - xorl %ecx,%edi - rorl $6,%edx - movl %ebp,%esi - addl %edi,%ebx - rorl $9,%esi - movl %ebp,%ecx - movl 8(%esp),%edi - xorl %ebp,%esi - movl %ebp,4(%esp) - xorl %edi,%ebp - rorl $11,%esi - andl %ebp,%eax - leal 3248222580(%ebx,%edx,1),%edx - xorl %ecx,%esi - xorl %edi,%eax - movl 36(%esp),%ecx - rorl $2,%esi - addl %edx,%eax - addl 16(%esp),%edx - addl %esi,%eax - movl 88(%esp),%esi - movl %ecx,%ebx - rorl $11,%ecx - movl %esi,%edi - rorl $2,%esi - xorl %ebx,%ecx - shrl $3,%ebx - rorl $7,%ecx - xorl %edi,%esi - xorl %ecx,%ebx - rorl $17,%esi - addl 32(%esp),%ebx - shrl $10,%edi - addl 68(%esp),%ebx - movl %edx,%ecx - xorl %esi,%edi - movl 20(%esp),%esi - rorl $14,%edx - addl %edi,%ebx - movl 24(%esp),%edi - xorl %ecx,%edx - movl %ebx,32(%esp) - xorl %edi,%esi - rorl $5,%edx - andl %ecx,%esi - movl %ecx,16(%esp) - xorl %ecx,%edx - addl 28(%esp),%ebx - xorl %esi,%edi - rorl $6,%edx - movl %eax,%ecx - addl %edi,%ebx - rorl $9,%ecx - movl %eax,%esi - movl 4(%esp),%edi - xorl %eax,%ecx - movl %eax,(%esp) - xorl %edi,%eax - rorl $11,%ecx - andl %eax,%ebp - leal 3835390401(%ebx,%edx,1),%edx - xorl %esi,%ecx - xorl %edi,%ebp - movl 40(%esp),%esi - rorl $2,%ecx - addl %edx,%ebp - addl 12(%esp),%edx - addl %ecx,%ebp - movl 92(%esp),%ecx - movl %esi,%ebx - rorl $11,%esi - movl %ecx,%edi - rorl $2,%ecx - xorl %ebx,%esi - shrl $3,%ebx - rorl $7,%esi - xorl %edi,%ecx - xorl %esi,%ebx - rorl $17,%ecx - addl 36(%esp),%ebx - shrl $10,%edi - addl 72(%esp),%ebx - movl %edx,%esi - xorl %ecx,%edi - movl 16(%esp),%ecx - rorl $14,%edx - addl %edi,%ebx - movl 20(%esp),%edi - xorl %esi,%edx - movl %ebx,36(%esp) - xorl %edi,%ecx - rorl $5,%edx - andl %esi,%ecx - movl %esi,12(%esp) - xorl %esi,%edx - addl 24(%esp),%ebx - xorl %ecx,%edi - rorl $6,%edx - movl %ebp,%esi - addl %edi,%ebx - rorl $9,%esi - movl %ebp,%ecx - movl (%esp),%edi - xorl %ebp,%esi - movl %ebp,28(%esp) - xorl %edi,%ebp - rorl $11,%esi - andl %ebp,%eax - leal 4022224774(%ebx,%edx,1),%edx - xorl %ecx,%esi - xorl %edi,%eax - movl 44(%esp),%ecx - rorl $2,%esi - addl %edx,%eax - addl 8(%esp),%edx - addl %esi,%eax - movl 32(%esp),%esi - movl %ecx,%ebx - rorl $11,%ecx - movl %esi,%edi - rorl $2,%esi - xorl %ebx,%ecx - shrl $3,%ebx - rorl $7,%ecx - xorl %edi,%esi - xorl %ecx,%ebx - rorl $17,%esi - addl 40(%esp),%ebx - shrl $10,%edi - addl 76(%esp),%ebx - movl %edx,%ecx - xorl %esi,%edi - movl 12(%esp),%esi - rorl $14,%edx - addl %edi,%ebx - movl 16(%esp),%edi - xorl %ecx,%edx - movl %ebx,40(%esp) - xorl %edi,%esi - rorl $5,%edx - andl %ecx,%esi - movl %ecx,8(%esp) - xorl %ecx,%edx - addl 20(%esp),%ebx - xorl %esi,%edi - rorl $6,%edx - movl %eax,%ecx - addl %edi,%ebx - rorl $9,%ecx - movl %eax,%esi - movl 28(%esp),%edi - xorl %eax,%ecx - movl %eax,24(%esp) - xorl %edi,%eax - rorl $11,%ecx - andl %eax,%ebp - leal 264347078(%ebx,%edx,1),%edx - xorl %esi,%ecx - xorl %edi,%ebp - movl 48(%esp),%esi - rorl $2,%ecx - addl %edx,%ebp - addl 4(%esp),%edx - addl %ecx,%ebp - movl 36(%esp),%ecx - movl %esi,%ebx - rorl $11,%esi - movl %ecx,%edi - rorl $2,%ecx - xorl %ebx,%esi - shrl $3,%ebx - rorl $7,%esi - xorl %edi,%ecx - xorl %esi,%ebx - rorl $17,%ecx - addl 44(%esp),%ebx - shrl $10,%edi - addl 80(%esp),%ebx - movl %edx,%esi - xorl %ecx,%edi - movl 8(%esp),%ecx - rorl $14,%edx - addl %edi,%ebx - movl 12(%esp),%edi - xorl %esi,%edx - movl %ebx,44(%esp) - xorl %edi,%ecx - rorl $5,%edx - andl %esi,%ecx - movl %esi,4(%esp) - xorl %esi,%edx - addl 16(%esp),%ebx - xorl %ecx,%edi - rorl $6,%edx - movl %ebp,%esi - addl %edi,%ebx - rorl $9,%esi - movl %ebp,%ecx - movl 24(%esp),%edi - xorl %ebp,%esi - movl %ebp,20(%esp) - xorl %edi,%ebp - rorl $11,%esi - andl %ebp,%eax - leal 604807628(%ebx,%edx,1),%edx - xorl %ecx,%esi - xorl %edi,%eax - movl 52(%esp),%ecx - rorl $2,%esi - addl %edx,%eax - addl (%esp),%edx - addl %esi,%eax - movl 40(%esp),%esi - movl %ecx,%ebx - rorl $11,%ecx - movl %esi,%edi - rorl $2,%esi - xorl %ebx,%ecx - shrl $3,%ebx - rorl $7,%ecx - xorl %edi,%esi - xorl %ecx,%ebx - rorl $17,%esi - addl 48(%esp),%ebx - shrl $10,%edi - addl 84(%esp),%ebx - movl %edx,%ecx - xorl %esi,%edi - movl 4(%esp),%esi - rorl $14,%edx - addl %edi,%ebx - movl 8(%esp),%edi - xorl %ecx,%edx - movl %ebx,48(%esp) - xorl %edi,%esi - rorl $5,%edx - andl %ecx,%esi - movl %ecx,(%esp) - xorl %ecx,%edx - addl 12(%esp),%ebx - xorl %esi,%edi - rorl $6,%edx - movl %eax,%ecx - addl %edi,%ebx - rorl $9,%ecx - movl %eax,%esi - movl 20(%esp),%edi - xorl %eax,%ecx - movl %eax,16(%esp) - xorl %edi,%eax - rorl $11,%ecx - andl %eax,%ebp - leal 770255983(%ebx,%edx,1),%edx - xorl %esi,%ecx - xorl %edi,%ebp - movl 56(%esp),%esi - rorl $2,%ecx - addl %edx,%ebp - addl 28(%esp),%edx - addl %ecx,%ebp - movl 44(%esp),%ecx - movl %esi,%ebx - rorl $11,%esi - movl %ecx,%edi - rorl $2,%ecx - xorl %ebx,%esi - shrl $3,%ebx - rorl $7,%esi - xorl %edi,%ecx - xorl %esi,%ebx - rorl $17,%ecx - addl 52(%esp),%ebx - shrl $10,%edi - addl 88(%esp),%ebx - movl %edx,%esi - xorl %ecx,%edi - movl (%esp),%ecx - rorl $14,%edx - addl %edi,%ebx - movl 4(%esp),%edi - xorl %esi,%edx - movl %ebx,52(%esp) - xorl %edi,%ecx - rorl $5,%edx - andl %esi,%ecx - movl %esi,28(%esp) - xorl %esi,%edx - addl 8(%esp),%ebx - xorl %ecx,%edi - rorl $6,%edx - movl %ebp,%esi - addl %edi,%ebx - rorl $9,%esi - movl %ebp,%ecx - movl 16(%esp),%edi - xorl %ebp,%esi - movl %ebp,12(%esp) - xorl %edi,%ebp - rorl $11,%esi - andl %ebp,%eax - leal 1249150122(%ebx,%edx,1),%edx - xorl %ecx,%esi - xorl %edi,%eax - movl 60(%esp),%ecx - rorl $2,%esi - addl %edx,%eax - addl 24(%esp),%edx - addl %esi,%eax - movl 48(%esp),%esi - movl %ecx,%ebx - rorl $11,%ecx - movl %esi,%edi - rorl $2,%esi - xorl %ebx,%ecx - shrl $3,%ebx - rorl $7,%ecx - xorl %edi,%esi - xorl %ecx,%ebx - rorl $17,%esi - addl 56(%esp),%ebx - shrl $10,%edi - addl 92(%esp),%ebx - movl %edx,%ecx - xorl %esi,%edi - movl 28(%esp),%esi - rorl $14,%edx - addl %edi,%ebx - movl (%esp),%edi - xorl %ecx,%edx - movl %ebx,56(%esp) - xorl %edi,%esi - rorl $5,%edx - andl %ecx,%esi - movl %ecx,24(%esp) - xorl %ecx,%edx - addl 4(%esp),%ebx - xorl %esi,%edi - rorl $6,%edx - movl %eax,%ecx - addl %edi,%ebx - rorl $9,%ecx - movl %eax,%esi - movl 12(%esp),%edi - xorl %eax,%ecx - movl %eax,8(%esp) - xorl %edi,%eax - rorl $11,%ecx - andl %eax,%ebp - leal 1555081692(%ebx,%edx,1),%edx - xorl %esi,%ecx - xorl %edi,%ebp - movl 64(%esp),%esi - rorl $2,%ecx - addl %edx,%ebp - addl 20(%esp),%edx - addl %ecx,%ebp - movl 52(%esp),%ecx - movl %esi,%ebx - rorl $11,%esi - movl %ecx,%edi - rorl $2,%ecx - xorl %ebx,%esi - shrl $3,%ebx - rorl $7,%esi - xorl %edi,%ecx - xorl %esi,%ebx - rorl $17,%ecx - addl 60(%esp),%ebx - shrl $10,%edi - addl 32(%esp),%ebx - movl %edx,%esi - xorl %ecx,%edi - movl 24(%esp),%ecx - rorl $14,%edx - addl %edi,%ebx - movl 28(%esp),%edi - xorl %esi,%edx - movl %ebx,60(%esp) - xorl %edi,%ecx - rorl $5,%edx - andl %esi,%ecx - movl %esi,20(%esp) - xorl %esi,%edx - addl (%esp),%ebx - xorl %ecx,%edi - rorl $6,%edx - movl %ebp,%esi - addl %edi,%ebx - rorl $9,%esi - movl %ebp,%ecx - movl 8(%esp),%edi - xorl %ebp,%esi - movl %ebp,4(%esp) - xorl %edi,%ebp - rorl $11,%esi - andl %ebp,%eax - leal 1996064986(%ebx,%edx,1),%edx - xorl %ecx,%esi - xorl %edi,%eax - movl 68(%esp),%ecx - rorl $2,%esi - addl %edx,%eax - addl 16(%esp),%edx - addl %esi,%eax - movl 56(%esp),%esi - movl %ecx,%ebx - rorl $11,%ecx - movl %esi,%edi - rorl $2,%esi - xorl %ebx,%ecx - shrl $3,%ebx - rorl $7,%ecx - xorl %edi,%esi - xorl %ecx,%ebx - rorl $17,%esi - addl 64(%esp),%ebx - shrl $10,%edi - addl 36(%esp),%ebx - movl %edx,%ecx - xorl %esi,%edi - movl 20(%esp),%esi - rorl $14,%edx - addl %edi,%ebx - movl 24(%esp),%edi - xorl %ecx,%edx - movl %ebx,64(%esp) - xorl %edi,%esi - rorl $5,%edx - andl %ecx,%esi - movl %ecx,16(%esp) - xorl %ecx,%edx - addl 28(%esp),%ebx - xorl %esi,%edi - rorl $6,%edx - movl %eax,%ecx - addl %edi,%ebx - rorl $9,%ecx - movl %eax,%esi - movl 4(%esp),%edi - xorl %eax,%ecx - movl %eax,(%esp) - xorl %edi,%eax - rorl $11,%ecx - andl %eax,%ebp - leal 2554220882(%ebx,%edx,1),%edx - xorl %esi,%ecx - xorl %edi,%ebp - movl 72(%esp),%esi - rorl $2,%ecx - addl %edx,%ebp - addl 12(%esp),%edx - addl %ecx,%ebp - movl 60(%esp),%ecx - movl %esi,%ebx - rorl $11,%esi - movl %ecx,%edi - rorl $2,%ecx - xorl %ebx,%esi - shrl $3,%ebx - rorl $7,%esi - xorl %edi,%ecx - xorl %esi,%ebx - rorl $17,%ecx - addl 68(%esp),%ebx - shrl $10,%edi - addl 40(%esp),%ebx - movl %edx,%esi - xorl %ecx,%edi - movl 16(%esp),%ecx - rorl $14,%edx - addl %edi,%ebx - movl 20(%esp),%edi - xorl %esi,%edx - movl %ebx,68(%esp) - xorl %edi,%ecx - rorl $5,%edx - andl %esi,%ecx - movl %esi,12(%esp) - xorl %esi,%edx - addl 24(%esp),%ebx - xorl %ecx,%edi - rorl $6,%edx - movl %ebp,%esi - addl %edi,%ebx - rorl $9,%esi - movl %ebp,%ecx - movl (%esp),%edi - xorl %ebp,%esi - movl %ebp,28(%esp) - xorl %edi,%ebp - rorl $11,%esi - andl %ebp,%eax - leal 2821834349(%ebx,%edx,1),%edx - xorl %ecx,%esi - xorl %edi,%eax - movl 76(%esp),%ecx - rorl $2,%esi - addl %edx,%eax - addl 8(%esp),%edx - addl %esi,%eax - movl 64(%esp),%esi - movl %ecx,%ebx - rorl $11,%ecx - movl %esi,%edi - rorl $2,%esi - xorl %ebx,%ecx - shrl $3,%ebx - rorl $7,%ecx - xorl %edi,%esi - xorl %ecx,%ebx - rorl $17,%esi - addl 72(%esp),%ebx - shrl $10,%edi - addl 44(%esp),%ebx - movl %edx,%ecx - xorl %esi,%edi - movl 12(%esp),%esi - rorl $14,%edx - addl %edi,%ebx - movl 16(%esp),%edi - xorl %ecx,%edx - movl %ebx,72(%esp) - xorl %edi,%esi - rorl $5,%edx - andl %ecx,%esi - movl %ecx,8(%esp) - xorl %ecx,%edx - addl 20(%esp),%ebx - xorl %esi,%edi - rorl $6,%edx - movl %eax,%ecx - addl %edi,%ebx - rorl $9,%ecx - movl %eax,%esi - movl 28(%esp),%edi - xorl %eax,%ecx - movl %eax,24(%esp) - xorl %edi,%eax - rorl $11,%ecx - andl %eax,%ebp - leal 2952996808(%ebx,%edx,1),%edx - xorl %esi,%ecx - xorl %edi,%ebp - movl 80(%esp),%esi - rorl $2,%ecx - addl %edx,%ebp - addl 4(%esp),%edx - addl %ecx,%ebp - movl 68(%esp),%ecx - movl %esi,%ebx - rorl $11,%esi - movl %ecx,%edi - rorl $2,%ecx - xorl %ebx,%esi - shrl $3,%ebx - rorl $7,%esi - xorl %edi,%ecx - xorl %esi,%ebx - rorl $17,%ecx - addl 76(%esp),%ebx - shrl $10,%edi - addl 48(%esp),%ebx - movl %edx,%esi - xorl %ecx,%edi - movl 8(%esp),%ecx - rorl $14,%edx - addl %edi,%ebx - movl 12(%esp),%edi - xorl %esi,%edx - movl %ebx,76(%esp) - xorl %edi,%ecx - rorl $5,%edx - andl %esi,%ecx - movl %esi,4(%esp) - xorl %esi,%edx - addl 16(%esp),%ebx - xorl %ecx,%edi - rorl $6,%edx - movl %ebp,%esi - addl %edi,%ebx - rorl $9,%esi - movl %ebp,%ecx - movl 24(%esp),%edi - xorl %ebp,%esi - movl %ebp,20(%esp) - xorl %edi,%ebp - rorl $11,%esi - andl %ebp,%eax - leal 3210313671(%ebx,%edx,1),%edx - xorl %ecx,%esi - xorl %edi,%eax - movl 84(%esp),%ecx - rorl $2,%esi - addl %edx,%eax - addl (%esp),%edx - addl %esi,%eax - movl 72(%esp),%esi - movl %ecx,%ebx - rorl $11,%ecx - movl %esi,%edi - rorl $2,%esi - xorl %ebx,%ecx - shrl $3,%ebx - rorl $7,%ecx - xorl %edi,%esi - xorl %ecx,%ebx - rorl $17,%esi - addl 80(%esp),%ebx - shrl $10,%edi - addl 52(%esp),%ebx - movl %edx,%ecx - xorl %esi,%edi - movl 4(%esp),%esi - rorl $14,%edx - addl %edi,%ebx - movl 8(%esp),%edi - xorl %ecx,%edx - movl %ebx,80(%esp) - xorl %edi,%esi - rorl $5,%edx - andl %ecx,%esi - movl %ecx,(%esp) - xorl %ecx,%edx - addl 12(%esp),%ebx - xorl %esi,%edi - rorl $6,%edx - movl %eax,%ecx - addl %edi,%ebx - rorl $9,%ecx - movl %eax,%esi - movl 20(%esp),%edi - xorl %eax,%ecx - movl %eax,16(%esp) - xorl %edi,%eax - rorl $11,%ecx - andl %eax,%ebp - leal 3336571891(%ebx,%edx,1),%edx - xorl %esi,%ecx - xorl %edi,%ebp - movl 88(%esp),%esi - rorl $2,%ecx - addl %edx,%ebp - addl 28(%esp),%edx - addl %ecx,%ebp - movl 76(%esp),%ecx - movl %esi,%ebx - rorl $11,%esi - movl %ecx,%edi - rorl $2,%ecx - xorl %ebx,%esi - shrl $3,%ebx - rorl $7,%esi - xorl %edi,%ecx - xorl %esi,%ebx - rorl $17,%ecx - addl 84(%esp),%ebx - shrl $10,%edi - addl 56(%esp),%ebx - movl %edx,%esi - xorl %ecx,%edi - movl (%esp),%ecx - rorl $14,%edx - addl %edi,%ebx - movl 4(%esp),%edi - xorl %esi,%edx - movl %ebx,84(%esp) - xorl %edi,%ecx - rorl $5,%edx - andl %esi,%ecx - movl %esi,28(%esp) - xorl %esi,%edx - addl 8(%esp),%ebx - xorl %ecx,%edi - rorl $6,%edx - movl %ebp,%esi - addl %edi,%ebx - rorl $9,%esi - movl %ebp,%ecx - movl 16(%esp),%edi - xorl %ebp,%esi - movl %ebp,12(%esp) - xorl %edi,%ebp - rorl $11,%esi - andl %ebp,%eax - leal 3584528711(%ebx,%edx,1),%edx - xorl %ecx,%esi - xorl %edi,%eax - movl 92(%esp),%ecx - rorl $2,%esi - addl %edx,%eax - addl 24(%esp),%edx - addl %esi,%eax - movl 80(%esp),%esi - movl %ecx,%ebx - rorl $11,%ecx - movl %esi,%edi - rorl $2,%esi - xorl %ebx,%ecx - shrl $3,%ebx - rorl $7,%ecx - xorl %edi,%esi - xorl %ecx,%ebx - rorl $17,%esi - addl 88(%esp),%ebx - shrl $10,%edi - addl 60(%esp),%ebx - movl %edx,%ecx - xorl %esi,%edi - movl 28(%esp),%esi - rorl $14,%edx - addl %edi,%ebx - movl (%esp),%edi - xorl %ecx,%edx - movl %ebx,88(%esp) - xorl %edi,%esi - rorl $5,%edx - andl %ecx,%esi - movl %ecx,24(%esp) - xorl %ecx,%edx - addl 4(%esp),%ebx - xorl %esi,%edi - rorl $6,%edx - movl %eax,%ecx - addl %edi,%ebx - rorl $9,%ecx - movl %eax,%esi - movl 12(%esp),%edi - xorl %eax,%ecx - movl %eax,8(%esp) - xorl %edi,%eax - rorl $11,%ecx - andl %eax,%ebp - leal 113926993(%ebx,%edx,1),%edx - xorl %esi,%ecx - xorl %edi,%ebp - movl 32(%esp),%esi - rorl $2,%ecx - addl %edx,%ebp - addl 20(%esp),%edx - addl %ecx,%ebp - movl 84(%esp),%ecx - movl %esi,%ebx - rorl $11,%esi - movl %ecx,%edi - rorl $2,%ecx - xorl %ebx,%esi - shrl $3,%ebx - rorl $7,%esi - xorl %edi,%ecx - xorl %esi,%ebx - rorl $17,%ecx - addl 92(%esp),%ebx - shrl $10,%edi - addl 64(%esp),%ebx - movl %edx,%esi - xorl %ecx,%edi - movl 24(%esp),%ecx - rorl $14,%edx - addl %edi,%ebx - movl 28(%esp),%edi - xorl %esi,%edx - movl %ebx,92(%esp) - xorl %edi,%ecx - rorl $5,%edx - andl %esi,%ecx - movl %esi,20(%esp) - xorl %esi,%edx - addl (%esp),%ebx - xorl %ecx,%edi - rorl $6,%edx - movl %ebp,%esi - addl %edi,%ebx - rorl $9,%esi - movl %ebp,%ecx - movl 8(%esp),%edi - xorl %ebp,%esi - movl %ebp,4(%esp) - xorl %edi,%ebp - rorl $11,%esi - andl %ebp,%eax - leal 338241895(%ebx,%edx,1),%edx - xorl %ecx,%esi - xorl %edi,%eax - movl 36(%esp),%ecx - rorl $2,%esi - addl %edx,%eax - addl 16(%esp),%edx - addl %esi,%eax - movl 88(%esp),%esi - movl %ecx,%ebx - rorl $11,%ecx - movl %esi,%edi - rorl $2,%esi - xorl %ebx,%ecx - shrl $3,%ebx - rorl $7,%ecx - xorl %edi,%esi - xorl %ecx,%ebx - rorl $17,%esi - addl 32(%esp),%ebx - shrl $10,%edi - addl 68(%esp),%ebx - movl %edx,%ecx - xorl %esi,%edi - movl 20(%esp),%esi - rorl $14,%edx - addl %edi,%ebx - movl 24(%esp),%edi - xorl %ecx,%edx - movl %ebx,32(%esp) - xorl %edi,%esi - rorl $5,%edx - andl %ecx,%esi - movl %ecx,16(%esp) - xorl %ecx,%edx - addl 28(%esp),%ebx - xorl %esi,%edi - rorl $6,%edx - movl %eax,%ecx - addl %edi,%ebx - rorl $9,%ecx - movl %eax,%esi - movl 4(%esp),%edi - xorl %eax,%ecx - movl %eax,(%esp) - xorl %edi,%eax - rorl $11,%ecx - andl %eax,%ebp - leal 666307205(%ebx,%edx,1),%edx - xorl %esi,%ecx - xorl %edi,%ebp - movl 40(%esp),%esi - rorl $2,%ecx - addl %edx,%ebp - addl 12(%esp),%edx - addl %ecx,%ebp - movl 92(%esp),%ecx - movl %esi,%ebx - rorl $11,%esi - movl %ecx,%edi - rorl $2,%ecx - xorl %ebx,%esi - shrl $3,%ebx - rorl $7,%esi - xorl %edi,%ecx - xorl %esi,%ebx - rorl $17,%ecx - addl 36(%esp),%ebx - shrl $10,%edi - addl 72(%esp),%ebx - movl %edx,%esi - xorl %ecx,%edi - movl 16(%esp),%ecx - rorl $14,%edx - addl %edi,%ebx - movl 20(%esp),%edi - xorl %esi,%edx - movl %ebx,36(%esp) - xorl %edi,%ecx - rorl $5,%edx - andl %esi,%ecx - movl %esi,12(%esp) - xorl %esi,%edx - addl 24(%esp),%ebx - xorl %ecx,%edi - rorl $6,%edx - movl %ebp,%esi - addl %edi,%ebx - rorl $9,%esi - movl %ebp,%ecx - movl (%esp),%edi - xorl %ebp,%esi - movl %ebp,28(%esp) - xorl %edi,%ebp - rorl $11,%esi - andl %ebp,%eax - leal 773529912(%ebx,%edx,1),%edx - xorl %ecx,%esi - xorl %edi,%eax - movl 44(%esp),%ecx - rorl $2,%esi - addl %edx,%eax - addl 8(%esp),%edx - addl %esi,%eax - movl 32(%esp),%esi - movl %ecx,%ebx - rorl $11,%ecx - movl %esi,%edi - rorl $2,%esi - xorl %ebx,%ecx - shrl $3,%ebx - rorl $7,%ecx - xorl %edi,%esi - xorl %ecx,%ebx - rorl $17,%esi - addl 40(%esp),%ebx - shrl $10,%edi - addl 76(%esp),%ebx - movl %edx,%ecx - xorl %esi,%edi - movl 12(%esp),%esi - rorl $14,%edx - addl %edi,%ebx - movl 16(%esp),%edi - xorl %ecx,%edx - movl %ebx,40(%esp) - xorl %edi,%esi - rorl $5,%edx - andl %ecx,%esi - movl %ecx,8(%esp) - xorl %ecx,%edx - addl 20(%esp),%ebx - xorl %esi,%edi - rorl $6,%edx - movl %eax,%ecx - addl %edi,%ebx - rorl $9,%ecx - movl %eax,%esi - movl 28(%esp),%edi - xorl %eax,%ecx - movl %eax,24(%esp) - xorl %edi,%eax - rorl $11,%ecx - andl %eax,%ebp - leal 1294757372(%ebx,%edx,1),%edx - xorl %esi,%ecx - xorl %edi,%ebp - movl 48(%esp),%esi - rorl $2,%ecx - addl %edx,%ebp - addl 4(%esp),%edx - addl %ecx,%ebp - movl 36(%esp),%ecx - movl %esi,%ebx - rorl $11,%esi - movl %ecx,%edi - rorl $2,%ecx - xorl %ebx,%esi - shrl $3,%ebx - rorl $7,%esi - xorl %edi,%ecx - xorl %esi,%ebx - rorl $17,%ecx - addl 44(%esp),%ebx - shrl $10,%edi - addl 80(%esp),%ebx - movl %edx,%esi - xorl %ecx,%edi - movl 8(%esp),%ecx - rorl $14,%edx - addl %edi,%ebx - movl 12(%esp),%edi - xorl %esi,%edx - movl %ebx,44(%esp) - xorl %edi,%ecx - rorl $5,%edx - andl %esi,%ecx - movl %esi,4(%esp) - xorl %esi,%edx - addl 16(%esp),%ebx - xorl %ecx,%edi - rorl $6,%edx - movl %ebp,%esi - addl %edi,%ebx - rorl $9,%esi - movl %ebp,%ecx - movl 24(%esp),%edi - xorl %ebp,%esi - movl %ebp,20(%esp) - xorl %edi,%ebp - rorl $11,%esi - andl %ebp,%eax - leal 1396182291(%ebx,%edx,1),%edx - xorl %ecx,%esi - xorl %edi,%eax - movl 52(%esp),%ecx - rorl $2,%esi - addl %edx,%eax - addl (%esp),%edx - addl %esi,%eax - movl 40(%esp),%esi - movl %ecx,%ebx - rorl $11,%ecx - movl %esi,%edi - rorl $2,%esi - xorl %ebx,%ecx - shrl $3,%ebx - rorl $7,%ecx - xorl %edi,%esi - xorl %ecx,%ebx - rorl $17,%esi - addl 48(%esp),%ebx - shrl $10,%edi - addl 84(%esp),%ebx - movl %edx,%ecx - xorl %esi,%edi - movl 4(%esp),%esi - rorl $14,%edx - addl %edi,%ebx - movl 8(%esp),%edi - xorl %ecx,%edx - movl %ebx,48(%esp) - xorl %edi,%esi - rorl $5,%edx - andl %ecx,%esi - movl %ecx,(%esp) - xorl %ecx,%edx - addl 12(%esp),%ebx - xorl %esi,%edi - rorl $6,%edx - movl %eax,%ecx - addl %edi,%ebx - rorl $9,%ecx - movl %eax,%esi - movl 20(%esp),%edi - xorl %eax,%ecx - movl %eax,16(%esp) - xorl %edi,%eax - rorl $11,%ecx - andl %eax,%ebp - leal 1695183700(%ebx,%edx,1),%edx - xorl %esi,%ecx - xorl %edi,%ebp - movl 56(%esp),%esi - rorl $2,%ecx - addl %edx,%ebp - addl 28(%esp),%edx - addl %ecx,%ebp - movl 44(%esp),%ecx - movl %esi,%ebx - rorl $11,%esi - movl %ecx,%edi - rorl $2,%ecx - xorl %ebx,%esi - shrl $3,%ebx - rorl $7,%esi - xorl %edi,%ecx - xorl %esi,%ebx - rorl $17,%ecx - addl 52(%esp),%ebx - shrl $10,%edi - addl 88(%esp),%ebx - movl %edx,%esi - xorl %ecx,%edi - movl (%esp),%ecx - rorl $14,%edx - addl %edi,%ebx - movl 4(%esp),%edi - xorl %esi,%edx - movl %ebx,52(%esp) - xorl %edi,%ecx - rorl $5,%edx - andl %esi,%ecx - movl %esi,28(%esp) - xorl %esi,%edx - addl 8(%esp),%ebx - xorl %ecx,%edi - rorl $6,%edx - movl %ebp,%esi - addl %edi,%ebx - rorl $9,%esi - movl %ebp,%ecx - movl 16(%esp),%edi - xorl %ebp,%esi - movl %ebp,12(%esp) - xorl %edi,%ebp - rorl $11,%esi - andl %ebp,%eax - leal 1986661051(%ebx,%edx,1),%edx - xorl %ecx,%esi - xorl %edi,%eax - movl 60(%esp),%ecx - rorl $2,%esi - addl %edx,%eax - addl 24(%esp),%edx - addl %esi,%eax - movl 48(%esp),%esi - movl %ecx,%ebx - rorl $11,%ecx - movl %esi,%edi - rorl $2,%esi - xorl %ebx,%ecx - shrl $3,%ebx - rorl $7,%ecx - xorl %edi,%esi - xorl %ecx,%ebx - rorl $17,%esi - addl 56(%esp),%ebx - shrl $10,%edi - addl 92(%esp),%ebx - movl %edx,%ecx - xorl %esi,%edi - movl 28(%esp),%esi - rorl $14,%edx - addl %edi,%ebx - movl (%esp),%edi - xorl %ecx,%edx - movl %ebx,56(%esp) - xorl %edi,%esi - rorl $5,%edx - andl %ecx,%esi - movl %ecx,24(%esp) - xorl %ecx,%edx - addl 4(%esp),%ebx - xorl %esi,%edi - rorl $6,%edx - movl %eax,%ecx - addl %edi,%ebx - rorl $9,%ecx - movl %eax,%esi - movl 12(%esp),%edi - xorl %eax,%ecx - movl %eax,8(%esp) - xorl %edi,%eax - rorl $11,%ecx - andl %eax,%ebp - leal 2177026350(%ebx,%edx,1),%edx - xorl %esi,%ecx - xorl %edi,%ebp - movl 64(%esp),%esi - rorl $2,%ecx - addl %edx,%ebp - addl 20(%esp),%edx - addl %ecx,%ebp - movl 52(%esp),%ecx - movl %esi,%ebx - rorl $11,%esi - movl %ecx,%edi - rorl $2,%ecx - xorl %ebx,%esi - shrl $3,%ebx - rorl $7,%esi - xorl %edi,%ecx - xorl %esi,%ebx - rorl $17,%ecx - addl 60(%esp),%ebx - shrl $10,%edi - addl 32(%esp),%ebx - movl %edx,%esi - xorl %ecx,%edi - movl 24(%esp),%ecx - rorl $14,%edx - addl %edi,%ebx - movl 28(%esp),%edi - xorl %esi,%edx - movl %ebx,60(%esp) - xorl %edi,%ecx - rorl $5,%edx - andl %esi,%ecx - movl %esi,20(%esp) - xorl %esi,%edx - addl (%esp),%ebx - xorl %ecx,%edi - rorl $6,%edx - movl %ebp,%esi - addl %edi,%ebx - rorl $9,%esi - movl %ebp,%ecx - movl 8(%esp),%edi - xorl %ebp,%esi - movl %ebp,4(%esp) - xorl %edi,%ebp - rorl $11,%esi - andl %ebp,%eax - leal 2456956037(%ebx,%edx,1),%edx - xorl %ecx,%esi - xorl %edi,%eax - movl 68(%esp),%ecx - rorl $2,%esi - addl %edx,%eax - addl 16(%esp),%edx - addl %esi,%eax - movl 56(%esp),%esi - movl %ecx,%ebx - rorl $11,%ecx - movl %esi,%edi - rorl $2,%esi - xorl %ebx,%ecx - shrl $3,%ebx - rorl $7,%ecx - xorl %edi,%esi - xorl %ecx,%ebx - rorl $17,%esi - addl 64(%esp),%ebx - shrl $10,%edi - addl 36(%esp),%ebx - movl %edx,%ecx - xorl %esi,%edi - movl 20(%esp),%esi - rorl $14,%edx - addl %edi,%ebx - movl 24(%esp),%edi - xorl %ecx,%edx - movl %ebx,64(%esp) - xorl %edi,%esi - rorl $5,%edx - andl %ecx,%esi - movl %ecx,16(%esp) - xorl %ecx,%edx - addl 28(%esp),%ebx - xorl %esi,%edi - rorl $6,%edx - movl %eax,%ecx - addl %edi,%ebx - rorl $9,%ecx - movl %eax,%esi - movl 4(%esp),%edi - xorl %eax,%ecx - movl %eax,(%esp) - xorl %edi,%eax - rorl $11,%ecx - andl %eax,%ebp - leal 2730485921(%ebx,%edx,1),%edx - xorl %esi,%ecx - xorl %edi,%ebp - movl 72(%esp),%esi - rorl $2,%ecx - addl %edx,%ebp - addl 12(%esp),%edx - addl %ecx,%ebp - movl 60(%esp),%ecx - movl %esi,%ebx - rorl $11,%esi - movl %ecx,%edi - rorl $2,%ecx - xorl %ebx,%esi - shrl $3,%ebx - rorl $7,%esi - xorl %edi,%ecx - xorl %esi,%ebx - rorl $17,%ecx - addl 68(%esp),%ebx - shrl $10,%edi - addl 40(%esp),%ebx - movl %edx,%esi - xorl %ecx,%edi - movl 16(%esp),%ecx - rorl $14,%edx - addl %edi,%ebx - movl 20(%esp),%edi - xorl %esi,%edx - movl %ebx,68(%esp) - xorl %edi,%ecx - rorl $5,%edx - andl %esi,%ecx - movl %esi,12(%esp) - xorl %esi,%edx - addl 24(%esp),%ebx - xorl %ecx,%edi - rorl $6,%edx - movl %ebp,%esi - addl %edi,%ebx - rorl $9,%esi - movl %ebp,%ecx - movl (%esp),%edi - xorl %ebp,%esi - movl %ebp,28(%esp) - xorl %edi,%ebp - rorl $11,%esi - andl %ebp,%eax - leal 2820302411(%ebx,%edx,1),%edx - xorl %ecx,%esi - xorl %edi,%eax - movl 76(%esp),%ecx - rorl $2,%esi - addl %edx,%eax - addl 8(%esp),%edx - addl %esi,%eax - movl 64(%esp),%esi - movl %ecx,%ebx - rorl $11,%ecx - movl %esi,%edi - rorl $2,%esi - xorl %ebx,%ecx - shrl $3,%ebx - rorl $7,%ecx - xorl %edi,%esi - xorl %ecx,%ebx - rorl $17,%esi - addl 72(%esp),%ebx - shrl $10,%edi - addl 44(%esp),%ebx - movl %edx,%ecx - xorl %esi,%edi - movl 12(%esp),%esi - rorl $14,%edx - addl %edi,%ebx - movl 16(%esp),%edi - xorl %ecx,%edx - movl %ebx,72(%esp) - xorl %edi,%esi - rorl $5,%edx - andl %ecx,%esi - movl %ecx,8(%esp) - xorl %ecx,%edx - addl 20(%esp),%ebx - xorl %esi,%edi - rorl $6,%edx - movl %eax,%ecx - addl %edi,%ebx - rorl $9,%ecx - movl %eax,%esi - movl 28(%esp),%edi - xorl %eax,%ecx - movl %eax,24(%esp) - xorl %edi,%eax - rorl $11,%ecx - andl %eax,%ebp - leal 3259730800(%ebx,%edx,1),%edx - xorl %esi,%ecx - xorl %edi,%ebp - movl 80(%esp),%esi - rorl $2,%ecx - addl %edx,%ebp - addl 4(%esp),%edx - addl %ecx,%ebp - movl 68(%esp),%ecx - movl %esi,%ebx - rorl $11,%esi - movl %ecx,%edi - rorl $2,%ecx - xorl %ebx,%esi - shrl $3,%ebx - rorl $7,%esi - xorl %edi,%ecx - xorl %esi,%ebx - rorl $17,%ecx - addl 76(%esp),%ebx - shrl $10,%edi - addl 48(%esp),%ebx - movl %edx,%esi - xorl %ecx,%edi - movl 8(%esp),%ecx - rorl $14,%edx - addl %edi,%ebx - movl 12(%esp),%edi - xorl %esi,%edx - movl %ebx,76(%esp) - xorl %edi,%ecx - rorl $5,%edx - andl %esi,%ecx - movl %esi,4(%esp) - xorl %esi,%edx - addl 16(%esp),%ebx - xorl %ecx,%edi - rorl $6,%edx - movl %ebp,%esi - addl %edi,%ebx - rorl $9,%esi - movl %ebp,%ecx - movl 24(%esp),%edi - xorl %ebp,%esi - movl %ebp,20(%esp) - xorl %edi,%ebp - rorl $11,%esi - andl %ebp,%eax - leal 3345764771(%ebx,%edx,1),%edx - xorl %ecx,%esi - xorl %edi,%eax - movl 84(%esp),%ecx - rorl $2,%esi - addl %edx,%eax - addl (%esp),%edx - addl %esi,%eax - movl 72(%esp),%esi - movl %ecx,%ebx - rorl $11,%ecx - movl %esi,%edi - rorl $2,%esi - xorl %ebx,%ecx - shrl $3,%ebx - rorl $7,%ecx - xorl %edi,%esi - xorl %ecx,%ebx - rorl $17,%esi - addl 80(%esp),%ebx - shrl $10,%edi - addl 52(%esp),%ebx - movl %edx,%ecx - xorl %esi,%edi - movl 4(%esp),%esi - rorl $14,%edx - addl %edi,%ebx - movl 8(%esp),%edi - xorl %ecx,%edx - movl %ebx,80(%esp) - xorl %edi,%esi - rorl $5,%edx - andl %ecx,%esi - movl %ecx,(%esp) - xorl %ecx,%edx - addl 12(%esp),%ebx - xorl %esi,%edi - rorl $6,%edx - movl %eax,%ecx - addl %edi,%ebx - rorl $9,%ecx - movl %eax,%esi - movl 20(%esp),%edi - xorl %eax,%ecx - movl %eax,16(%esp) - xorl %edi,%eax - rorl $11,%ecx - andl %eax,%ebp - leal 3516065817(%ebx,%edx,1),%edx - xorl %esi,%ecx - xorl %edi,%ebp - movl 88(%esp),%esi - rorl $2,%ecx - addl %edx,%ebp - addl 28(%esp),%edx - addl %ecx,%ebp - movl 76(%esp),%ecx - movl %esi,%ebx - rorl $11,%esi - movl %ecx,%edi - rorl $2,%ecx - xorl %ebx,%esi - shrl $3,%ebx - rorl $7,%esi - xorl %edi,%ecx - xorl %esi,%ebx - rorl $17,%ecx - addl 84(%esp),%ebx - shrl $10,%edi - addl 56(%esp),%ebx - movl %edx,%esi - xorl %ecx,%edi - movl (%esp),%ecx - rorl $14,%edx - addl %edi,%ebx - movl 4(%esp),%edi - xorl %esi,%edx - movl %ebx,84(%esp) - xorl %edi,%ecx - rorl $5,%edx - andl %esi,%ecx - movl %esi,28(%esp) - xorl %esi,%edx - addl 8(%esp),%ebx - xorl %ecx,%edi - rorl $6,%edx - movl %ebp,%esi - addl %edi,%ebx - rorl $9,%esi - movl %ebp,%ecx - movl 16(%esp),%edi - xorl %ebp,%esi - movl %ebp,12(%esp) - xorl %edi,%ebp - rorl $11,%esi - andl %ebp,%eax - leal 3600352804(%ebx,%edx,1),%edx - xorl %ecx,%esi - xorl %edi,%eax - movl 92(%esp),%ecx - rorl $2,%esi - addl %edx,%eax - addl 24(%esp),%edx - addl %esi,%eax - movl 80(%esp),%esi - movl %ecx,%ebx - rorl $11,%ecx - movl %esi,%edi - rorl $2,%esi - xorl %ebx,%ecx - shrl $3,%ebx - rorl $7,%ecx - xorl %edi,%esi - xorl %ecx,%ebx - rorl $17,%esi - addl 88(%esp),%ebx - shrl $10,%edi - addl 60(%esp),%ebx - movl %edx,%ecx - xorl %esi,%edi - movl 28(%esp),%esi - rorl $14,%edx - addl %edi,%ebx - movl (%esp),%edi - xorl %ecx,%edx - movl %ebx,88(%esp) - xorl %edi,%esi - rorl $5,%edx - andl %ecx,%esi - movl %ecx,24(%esp) - xorl %ecx,%edx - addl 4(%esp),%ebx - xorl %esi,%edi - rorl $6,%edx - movl %eax,%ecx - addl %edi,%ebx - rorl $9,%ecx - movl %eax,%esi - movl 12(%esp),%edi - xorl %eax,%ecx - movl %eax,8(%esp) - xorl %edi,%eax - rorl $11,%ecx - andl %eax,%ebp - leal 4094571909(%ebx,%edx,1),%edx - xorl %esi,%ecx - xorl %edi,%ebp - movl 32(%esp),%esi - rorl $2,%ecx - addl %edx,%ebp - addl 20(%esp),%edx - addl %ecx,%ebp - movl 84(%esp),%ecx - movl %esi,%ebx - rorl $11,%esi - movl %ecx,%edi - rorl $2,%ecx - xorl %ebx,%esi - shrl $3,%ebx - rorl $7,%esi - xorl %edi,%ecx - xorl %esi,%ebx - rorl $17,%ecx - addl 92(%esp),%ebx - shrl $10,%edi - addl 64(%esp),%ebx - movl %edx,%esi - xorl %ecx,%edi - movl 24(%esp),%ecx - rorl $14,%edx - addl %edi,%ebx - movl 28(%esp),%edi - xorl %esi,%edx - movl %ebx,92(%esp) - xorl %edi,%ecx - rorl $5,%edx - andl %esi,%ecx - movl %esi,20(%esp) - xorl %esi,%edx - addl (%esp),%ebx - xorl %ecx,%edi - rorl $6,%edx - movl %ebp,%esi - addl %edi,%ebx - rorl $9,%esi - movl %ebp,%ecx - movl 8(%esp),%edi - xorl %ebp,%esi - movl %ebp,4(%esp) - xorl %edi,%ebp - rorl $11,%esi - andl %ebp,%eax - leal 275423344(%ebx,%edx,1),%edx - xorl %ecx,%esi - xorl %edi,%eax - movl 36(%esp),%ecx - rorl $2,%esi - addl %edx,%eax - addl 16(%esp),%edx - addl %esi,%eax - movl 88(%esp),%esi - movl %ecx,%ebx - rorl $11,%ecx - movl %esi,%edi - rorl $2,%esi - xorl %ebx,%ecx - shrl $3,%ebx - rorl $7,%ecx - xorl %edi,%esi - xorl %ecx,%ebx - rorl $17,%esi - addl 32(%esp),%ebx - shrl $10,%edi - addl 68(%esp),%ebx - movl %edx,%ecx - xorl %esi,%edi - movl 20(%esp),%esi - rorl $14,%edx - addl %edi,%ebx - movl 24(%esp),%edi - xorl %ecx,%edx - movl %ebx,32(%esp) - xorl %edi,%esi - rorl $5,%edx - andl %ecx,%esi - movl %ecx,16(%esp) - xorl %ecx,%edx - addl 28(%esp),%ebx - xorl %esi,%edi - rorl $6,%edx - movl %eax,%ecx - addl %edi,%ebx - rorl $9,%ecx - movl %eax,%esi - movl 4(%esp),%edi - xorl %eax,%ecx - movl %eax,(%esp) - xorl %edi,%eax - rorl $11,%ecx - andl %eax,%ebp - leal 430227734(%ebx,%edx,1),%edx - xorl %esi,%ecx - xorl %edi,%ebp - movl 40(%esp),%esi - rorl $2,%ecx - addl %edx,%ebp - addl 12(%esp),%edx - addl %ecx,%ebp - movl 92(%esp),%ecx - movl %esi,%ebx - rorl $11,%esi - movl %ecx,%edi - rorl $2,%ecx - xorl %ebx,%esi - shrl $3,%ebx - rorl $7,%esi - xorl %edi,%ecx - xorl %esi,%ebx - rorl $17,%ecx - addl 36(%esp),%ebx - shrl $10,%edi - addl 72(%esp),%ebx - movl %edx,%esi - xorl %ecx,%edi - movl 16(%esp),%ecx - rorl $14,%edx - addl %edi,%ebx - movl 20(%esp),%edi - xorl %esi,%edx - movl %ebx,36(%esp) - xorl %edi,%ecx - rorl $5,%edx - andl %esi,%ecx - movl %esi,12(%esp) - xorl %esi,%edx - addl 24(%esp),%ebx - xorl %ecx,%edi - rorl $6,%edx - movl %ebp,%esi - addl %edi,%ebx - rorl $9,%esi - movl %ebp,%ecx - movl (%esp),%edi - xorl %ebp,%esi - movl %ebp,28(%esp) - xorl %edi,%ebp - rorl $11,%esi - andl %ebp,%eax - leal 506948616(%ebx,%edx,1),%edx - xorl %ecx,%esi - xorl %edi,%eax - movl 44(%esp),%ecx - rorl $2,%esi - addl %edx,%eax - addl 8(%esp),%edx - addl %esi,%eax - movl 32(%esp),%esi - movl %ecx,%ebx - rorl $11,%ecx - movl %esi,%edi - rorl $2,%esi - xorl %ebx,%ecx - shrl $3,%ebx - rorl $7,%ecx - xorl %edi,%esi - xorl %ecx,%ebx - rorl $17,%esi - addl 40(%esp),%ebx - shrl $10,%edi - addl 76(%esp),%ebx - movl %edx,%ecx - xorl %esi,%edi - movl 12(%esp),%esi - rorl $14,%edx - addl %edi,%ebx - movl 16(%esp),%edi - xorl %ecx,%edx - movl %ebx,40(%esp) - xorl %edi,%esi - rorl $5,%edx - andl %ecx,%esi - movl %ecx,8(%esp) - xorl %ecx,%edx - addl 20(%esp),%ebx - xorl %esi,%edi - rorl $6,%edx - movl %eax,%ecx - addl %edi,%ebx - rorl $9,%ecx - movl %eax,%esi - movl 28(%esp),%edi - xorl %eax,%ecx - movl %eax,24(%esp) - xorl %edi,%eax - rorl $11,%ecx - andl %eax,%ebp - leal 659060556(%ebx,%edx,1),%edx - xorl %esi,%ecx - xorl %edi,%ebp - movl 48(%esp),%esi - rorl $2,%ecx - addl %edx,%ebp - addl 4(%esp),%edx - addl %ecx,%ebp - movl 36(%esp),%ecx - movl %esi,%ebx - rorl $11,%esi - movl %ecx,%edi - rorl $2,%ecx - xorl %ebx,%esi - shrl $3,%ebx - rorl $7,%esi - xorl %edi,%ecx - xorl %esi,%ebx - rorl $17,%ecx - addl 44(%esp),%ebx - shrl $10,%edi - addl 80(%esp),%ebx - movl %edx,%esi - xorl %ecx,%edi - movl 8(%esp),%ecx - rorl $14,%edx - addl %edi,%ebx - movl 12(%esp),%edi - xorl %esi,%edx - movl %ebx,44(%esp) - xorl %edi,%ecx - rorl $5,%edx - andl %esi,%ecx - movl %esi,4(%esp) - xorl %esi,%edx - addl 16(%esp),%ebx - xorl %ecx,%edi - rorl $6,%edx - movl %ebp,%esi - addl %edi,%ebx - rorl $9,%esi - movl %ebp,%ecx - movl 24(%esp),%edi - xorl %ebp,%esi - movl %ebp,20(%esp) - xorl %edi,%ebp - rorl $11,%esi - andl %ebp,%eax - leal 883997877(%ebx,%edx,1),%edx - xorl %ecx,%esi - xorl %edi,%eax - movl 52(%esp),%ecx - rorl $2,%esi - addl %edx,%eax - addl (%esp),%edx - addl %esi,%eax - movl 40(%esp),%esi - movl %ecx,%ebx - rorl $11,%ecx - movl %esi,%edi - rorl $2,%esi - xorl %ebx,%ecx - shrl $3,%ebx - rorl $7,%ecx - xorl %edi,%esi - xorl %ecx,%ebx - rorl $17,%esi - addl 48(%esp),%ebx - shrl $10,%edi - addl 84(%esp),%ebx - movl %edx,%ecx - xorl %esi,%edi - movl 4(%esp),%esi - rorl $14,%edx - addl %edi,%ebx - movl 8(%esp),%edi - xorl %ecx,%edx - movl %ebx,48(%esp) - xorl %edi,%esi - rorl $5,%edx - andl %ecx,%esi - movl %ecx,(%esp) - xorl %ecx,%edx - addl 12(%esp),%ebx - xorl %esi,%edi - rorl $6,%edx - movl %eax,%ecx - addl %edi,%ebx - rorl $9,%ecx - movl %eax,%esi - movl 20(%esp),%edi - xorl %eax,%ecx - movl %eax,16(%esp) - xorl %edi,%eax - rorl $11,%ecx - andl %eax,%ebp - leal 958139571(%ebx,%edx,1),%edx - xorl %esi,%ecx - xorl %edi,%ebp - movl 56(%esp),%esi - rorl $2,%ecx - addl %edx,%ebp - addl 28(%esp),%edx - addl %ecx,%ebp - movl 44(%esp),%ecx - movl %esi,%ebx - rorl $11,%esi - movl %ecx,%edi - rorl $2,%ecx - xorl %ebx,%esi - shrl $3,%ebx - rorl $7,%esi - xorl %edi,%ecx - xorl %esi,%ebx - rorl $17,%ecx - addl 52(%esp),%ebx - shrl $10,%edi - addl 88(%esp),%ebx - movl %edx,%esi - xorl %ecx,%edi - movl (%esp),%ecx - rorl $14,%edx - addl %edi,%ebx - movl 4(%esp),%edi - xorl %esi,%edx - movl %ebx,52(%esp) - xorl %edi,%ecx - rorl $5,%edx - andl %esi,%ecx - movl %esi,28(%esp) - xorl %esi,%edx - addl 8(%esp),%ebx - xorl %ecx,%edi - rorl $6,%edx - movl %ebp,%esi - addl %edi,%ebx - rorl $9,%esi - movl %ebp,%ecx - movl 16(%esp),%edi - xorl %ebp,%esi - movl %ebp,12(%esp) - xorl %edi,%ebp - rorl $11,%esi - andl %ebp,%eax - leal 1322822218(%ebx,%edx,1),%edx - xorl %ecx,%esi - xorl %edi,%eax - movl 60(%esp),%ecx - rorl $2,%esi - addl %edx,%eax - addl 24(%esp),%edx - addl %esi,%eax - movl 48(%esp),%esi - movl %ecx,%ebx - rorl $11,%ecx - movl %esi,%edi - rorl $2,%esi - xorl %ebx,%ecx - shrl $3,%ebx - rorl $7,%ecx - xorl %edi,%esi - xorl %ecx,%ebx - rorl $17,%esi - addl 56(%esp),%ebx - shrl $10,%edi - addl 92(%esp),%ebx - movl %edx,%ecx - xorl %esi,%edi - movl 28(%esp),%esi - rorl $14,%edx - addl %edi,%ebx - movl (%esp),%edi - xorl %ecx,%edx - movl %ebx,56(%esp) - xorl %edi,%esi - rorl $5,%edx - andl %ecx,%esi - movl %ecx,24(%esp) - xorl %ecx,%edx - addl 4(%esp),%ebx - xorl %esi,%edi - rorl $6,%edx - movl %eax,%ecx - addl %edi,%ebx - rorl $9,%ecx - movl %eax,%esi - movl 12(%esp),%edi - xorl %eax,%ecx - movl %eax,8(%esp) - xorl %edi,%eax - rorl $11,%ecx - andl %eax,%ebp - leal 1537002063(%ebx,%edx,1),%edx - xorl %esi,%ecx - xorl %edi,%ebp - movl 64(%esp),%esi - rorl $2,%ecx - addl %edx,%ebp - addl 20(%esp),%edx - addl %ecx,%ebp - movl 52(%esp),%ecx - movl %esi,%ebx - rorl $11,%esi - movl %ecx,%edi - rorl $2,%ecx - xorl %ebx,%esi - shrl $3,%ebx - rorl $7,%esi - xorl %edi,%ecx - xorl %esi,%ebx - rorl $17,%ecx - addl 60(%esp),%ebx - shrl $10,%edi - addl 32(%esp),%ebx - movl %edx,%esi - xorl %ecx,%edi - movl 24(%esp),%ecx - rorl $14,%edx - addl %edi,%ebx - movl 28(%esp),%edi - xorl %esi,%edx - movl %ebx,60(%esp) - xorl %edi,%ecx - rorl $5,%edx - andl %esi,%ecx - movl %esi,20(%esp) - xorl %esi,%edx - addl (%esp),%ebx - xorl %ecx,%edi - rorl $6,%edx - movl %ebp,%esi - addl %edi,%ebx - rorl $9,%esi - movl %ebp,%ecx - movl 8(%esp),%edi - xorl %ebp,%esi - movl %ebp,4(%esp) - xorl %edi,%ebp - rorl $11,%esi - andl %ebp,%eax - leal 1747873779(%ebx,%edx,1),%edx - xorl %ecx,%esi - xorl %edi,%eax - movl 68(%esp),%ecx - rorl $2,%esi - addl %edx,%eax - addl 16(%esp),%edx - addl %esi,%eax - movl 56(%esp),%esi - movl %ecx,%ebx - rorl $11,%ecx - movl %esi,%edi - rorl $2,%esi - xorl %ebx,%ecx - shrl $3,%ebx - rorl $7,%ecx - xorl %edi,%esi - xorl %ecx,%ebx - rorl $17,%esi - addl 64(%esp),%ebx - shrl $10,%edi - addl 36(%esp),%ebx - movl %edx,%ecx - xorl %esi,%edi - movl 20(%esp),%esi - rorl $14,%edx - addl %edi,%ebx - movl 24(%esp),%edi - xorl %ecx,%edx - movl %ebx,64(%esp) - xorl %edi,%esi - rorl $5,%edx - andl %ecx,%esi - movl %ecx,16(%esp) - xorl %ecx,%edx - addl 28(%esp),%ebx - xorl %esi,%edi - rorl $6,%edx - movl %eax,%ecx - addl %edi,%ebx - rorl $9,%ecx - movl %eax,%esi - movl 4(%esp),%edi - xorl %eax,%ecx - movl %eax,(%esp) - xorl %edi,%eax - rorl $11,%ecx - andl %eax,%ebp - leal 1955562222(%ebx,%edx,1),%edx - xorl %esi,%ecx - xorl %edi,%ebp - movl 72(%esp),%esi - rorl $2,%ecx - addl %edx,%ebp - addl 12(%esp),%edx - addl %ecx,%ebp - movl 60(%esp),%ecx - movl %esi,%ebx - rorl $11,%esi - movl %ecx,%edi - rorl $2,%ecx - xorl %ebx,%esi - shrl $3,%ebx - rorl $7,%esi - xorl %edi,%ecx - xorl %esi,%ebx - rorl $17,%ecx - addl 68(%esp),%ebx - shrl $10,%edi - addl 40(%esp),%ebx - movl %edx,%esi - xorl %ecx,%edi - movl 16(%esp),%ecx - rorl $14,%edx - addl %edi,%ebx - movl 20(%esp),%edi - xorl %esi,%edx - movl %ebx,68(%esp) - xorl %edi,%ecx - rorl $5,%edx - andl %esi,%ecx - movl %esi,12(%esp) - xorl %esi,%edx - addl 24(%esp),%ebx - xorl %ecx,%edi - rorl $6,%edx - movl %ebp,%esi - addl %edi,%ebx - rorl $9,%esi - movl %ebp,%ecx - movl (%esp),%edi - xorl %ebp,%esi - movl %ebp,28(%esp) - xorl %edi,%ebp - rorl $11,%esi - andl %ebp,%eax - leal 2024104815(%ebx,%edx,1),%edx - xorl %ecx,%esi - xorl %edi,%eax - movl 76(%esp),%ecx - rorl $2,%esi - addl %edx,%eax - addl 8(%esp),%edx - addl %esi,%eax - movl 64(%esp),%esi - movl %ecx,%ebx - rorl $11,%ecx - movl %esi,%edi - rorl $2,%esi - xorl %ebx,%ecx - shrl $3,%ebx - rorl $7,%ecx - xorl %edi,%esi - xorl %ecx,%ebx - rorl $17,%esi - addl 72(%esp),%ebx - shrl $10,%edi - addl 44(%esp),%ebx - movl %edx,%ecx - xorl %esi,%edi - movl 12(%esp),%esi - rorl $14,%edx - addl %edi,%ebx - movl 16(%esp),%edi - xorl %ecx,%edx - movl %ebx,72(%esp) - xorl %edi,%esi - rorl $5,%edx - andl %ecx,%esi - movl %ecx,8(%esp) - xorl %ecx,%edx - addl 20(%esp),%ebx - xorl %esi,%edi - rorl $6,%edx - movl %eax,%ecx - addl %edi,%ebx - rorl $9,%ecx - movl %eax,%esi - movl 28(%esp),%edi - xorl %eax,%ecx - movl %eax,24(%esp) - xorl %edi,%eax - rorl $11,%ecx - andl %eax,%ebp - leal 2227730452(%ebx,%edx,1),%edx - xorl %esi,%ecx - xorl %edi,%ebp - movl 80(%esp),%esi - rorl $2,%ecx - addl %edx,%ebp - addl 4(%esp),%edx - addl %ecx,%ebp - movl 68(%esp),%ecx - movl %esi,%ebx - rorl $11,%esi - movl %ecx,%edi - rorl $2,%ecx - xorl %ebx,%esi - shrl $3,%ebx - rorl $7,%esi - xorl %edi,%ecx - xorl %esi,%ebx - rorl $17,%ecx - addl 76(%esp),%ebx - shrl $10,%edi - addl 48(%esp),%ebx - movl %edx,%esi - xorl %ecx,%edi - movl 8(%esp),%ecx - rorl $14,%edx - addl %edi,%ebx - movl 12(%esp),%edi - xorl %esi,%edx - movl %ebx,76(%esp) - xorl %edi,%ecx - rorl $5,%edx - andl %esi,%ecx - movl %esi,4(%esp) - xorl %esi,%edx - addl 16(%esp),%ebx - xorl %ecx,%edi - rorl $6,%edx - movl %ebp,%esi - addl %edi,%ebx - rorl $9,%esi - movl %ebp,%ecx - movl 24(%esp),%edi - xorl %ebp,%esi - movl %ebp,20(%esp) - xorl %edi,%ebp - rorl $11,%esi - andl %ebp,%eax - leal 2361852424(%ebx,%edx,1),%edx - xorl %ecx,%esi - xorl %edi,%eax - movl 84(%esp),%ecx - rorl $2,%esi - addl %edx,%eax - addl (%esp),%edx - addl %esi,%eax - movl 72(%esp),%esi - movl %ecx,%ebx - rorl $11,%ecx - movl %esi,%edi - rorl $2,%esi - xorl %ebx,%ecx - shrl $3,%ebx - rorl $7,%ecx - xorl %edi,%esi - xorl %ecx,%ebx - rorl $17,%esi - addl 80(%esp),%ebx - shrl $10,%edi - addl 52(%esp),%ebx - movl %edx,%ecx - xorl %esi,%edi - movl 4(%esp),%esi - rorl $14,%edx - addl %edi,%ebx - movl 8(%esp),%edi - xorl %ecx,%edx - movl %ebx,80(%esp) - xorl %edi,%esi - rorl $5,%edx - andl %ecx,%esi - movl %ecx,(%esp) - xorl %ecx,%edx - addl 12(%esp),%ebx - xorl %esi,%edi - rorl $6,%edx - movl %eax,%ecx - addl %edi,%ebx - rorl $9,%ecx - movl %eax,%esi - movl 20(%esp),%edi - xorl %eax,%ecx - movl %eax,16(%esp) - xorl %edi,%eax - rorl $11,%ecx - andl %eax,%ebp - leal 2428436474(%ebx,%edx,1),%edx - xorl %esi,%ecx - xorl %edi,%ebp - movl 88(%esp),%esi - rorl $2,%ecx - addl %edx,%ebp - addl 28(%esp),%edx - addl %ecx,%ebp - movl 76(%esp),%ecx - movl %esi,%ebx - rorl $11,%esi - movl %ecx,%edi - rorl $2,%ecx - xorl %ebx,%esi - shrl $3,%ebx - rorl $7,%esi - xorl %edi,%ecx - xorl %esi,%ebx - rorl $17,%ecx - addl 84(%esp),%ebx - shrl $10,%edi - addl 56(%esp),%ebx - movl %edx,%esi - xorl %ecx,%edi - movl (%esp),%ecx - rorl $14,%edx - addl %edi,%ebx - movl 4(%esp),%edi - xorl %esi,%edx - movl %ebx,84(%esp) - xorl %edi,%ecx - rorl $5,%edx - andl %esi,%ecx - movl %esi,28(%esp) - xorl %esi,%edx - addl 8(%esp),%ebx - xorl %ecx,%edi - rorl $6,%edx - movl %ebp,%esi - addl %edi,%ebx - rorl $9,%esi - movl %ebp,%ecx - movl 16(%esp),%edi - xorl %ebp,%esi - movl %ebp,12(%esp) - xorl %edi,%ebp - rorl $11,%esi - andl %ebp,%eax - leal 2756734187(%ebx,%edx,1),%edx - xorl %ecx,%esi - xorl %edi,%eax - movl 92(%esp),%ecx - rorl $2,%esi - addl %edx,%eax - addl 24(%esp),%edx - addl %esi,%eax - movl 80(%esp),%esi - movl %ecx,%ebx - rorl $11,%ecx - movl %esi,%edi - rorl $2,%esi - xorl %ebx,%ecx - shrl $3,%ebx - rorl $7,%ecx - xorl %edi,%esi - xorl %ecx,%ebx - rorl $17,%esi - addl 88(%esp),%ebx - shrl $10,%edi - addl 60(%esp),%ebx - movl %edx,%ecx - xorl %esi,%edi - movl 28(%esp),%esi - rorl $14,%edx - addl %edi,%ebx - movl (%esp),%edi - xorl %ecx,%edx - xorl %edi,%esi - rorl $5,%edx - andl %ecx,%esi - movl %ecx,24(%esp) - xorl %ecx,%edx - addl 4(%esp),%ebx - xorl %esi,%edi - rorl $6,%edx - movl %eax,%ecx - addl %edi,%ebx - rorl $9,%ecx - movl %eax,%esi - movl 12(%esp),%edi - xorl %eax,%ecx - movl %eax,8(%esp) - xorl %edi,%eax - rorl $11,%ecx - andl %eax,%ebp - leal 3204031479(%ebx,%edx,1),%edx - xorl %esi,%ecx - xorl %edi,%ebp - movl 32(%esp),%esi - rorl $2,%ecx - addl %edx,%ebp - addl 20(%esp),%edx - addl %ecx,%ebp - movl 84(%esp),%ecx - movl %esi,%ebx - rorl $11,%esi - movl %ecx,%edi - rorl $2,%ecx - xorl %ebx,%esi - shrl $3,%ebx - rorl $7,%esi - xorl %edi,%ecx - xorl %esi,%ebx - rorl $17,%ecx - addl 92(%esp),%ebx - shrl $10,%edi - addl 64(%esp),%ebx - movl %edx,%esi - xorl %ecx,%edi - movl 24(%esp),%ecx - rorl $14,%edx - addl %edi,%ebx - movl 28(%esp),%edi - xorl %esi,%edx - xorl %edi,%ecx - rorl $5,%edx - andl %esi,%ecx - movl %esi,20(%esp) - xorl %esi,%edx - addl (%esp),%ebx - xorl %ecx,%edi - rorl $6,%edx - movl %ebp,%esi - addl %edi,%ebx - rorl $9,%esi - movl %ebp,%ecx - movl 8(%esp),%edi - xorl %ebp,%esi - movl %ebp,4(%esp) - xorl %edi,%ebp - rorl $11,%esi - andl %ebp,%eax - leal 3329325298(%ebx,%edx,1),%edx - xorl %ecx,%esi - xorl %edi,%eax - rorl $2,%esi - addl %edx,%eax - addl 16(%esp),%edx - addl %esi,%eax - movl 96(%esp),%esi - xorl %edi,%ebp - movl 12(%esp),%ecx - addl (%esi),%eax - addl 4(%esi),%ebp - addl 8(%esi),%edi - addl 12(%esi),%ecx - movl %eax,(%esi) - movl %ebp,4(%esi) - movl %edi,8(%esi) - movl %ecx,12(%esi) - movl %ebp,4(%esp) - xorl %edi,%ebp - movl %edi,8(%esp) - movl %ecx,12(%esp) - movl 20(%esp),%edi - movl 24(%esp),%ebx - movl 28(%esp),%ecx - addl 16(%esi),%edx - addl 20(%esi),%edi - addl 24(%esi),%ebx - addl 28(%esi),%ecx - movl %edx,16(%esi) - movl %edi,20(%esi) - movl %ebx,24(%esi) - movl %ecx,28(%esi) - movl %edi,20(%esp) - movl 100(%esp),%edi - movl %ebx,24(%esp) - movl %ecx,28(%esp) - cmpl 104(%esp),%edi - jb .L009grand_loop - movl 108(%esp),%esp - popl %edi - popl %esi - popl %ebx - popl %ebp - ret -.size sha256_block_data_order,.-.L_sha256_block_data_order_begin -.comm _gnutls_x86_cpuid_s,16,4 .section .note.GNU-stack,"",%progbits diff --git a/lib/accelerated/x86/elf/sha512-ssse3-x86.s b/lib/accelerated/x86/elf/sha512-ssse3-x86.s index 7fa849a267..e8eeefe7ad 100644 --- a/lib/accelerated/x86/elf/sha512-ssse3-x86.s +++ b/lib/accelerated/x86/elf/sha512-ssse3-x86.s @@ -594,8 +594,6 @@ sha512_block_data_order: .long 4234509866,1501505948 .long 987167468,1607167915 .long 1246189591,1816402316 -.long 67438087,66051 -.long 202182159,134810123 .size sha512_block_data_order,.-.L_sha512_block_data_order_begin .byte 83,72,65,53,49,50,32,98,108,111,99,107,32,116,114,97 .byte 110,115,102,111,114,109,32,102,111,114,32,120,56,54,44,32 diff --git a/lib/accelerated/x86/elf/sha512-ssse3-x86_64.s b/lib/accelerated/x86/elf/sha512-ssse3-x86_64.s index 7808a1b29d..22f55fe953 100644 --- a/lib/accelerated/x86/elf/sha512-ssse3-x86_64.s +++ b/lib/accelerated/x86/elf/sha512-ssse3-x86_64.s @@ -39,17 +39,10 @@ # .text - .globl sha256_block_data_order .type sha256_block_data_order,@function .align 16 sha256_block_data_order: - leaq _gnutls_x86_cpuid_s(%rip),%r11 - movl 0(%r11),%r9d - movl 4(%r11),%r10d - movl 8(%r11),%r11d - testl $512,%r10d - jnz .Lssse3_shortcut pushq %rbx pushq %rbp pushq %r12 @@ -67,6 +60,8 @@ sha256_block_data_order: movq %r11,64+24(%rsp) .Lprologue: + leaq K256(%rip),%rbp + movl 0(%rdi),%eax movl 4(%rdi),%ebx movl 8(%rdi),%ecx @@ -79,1632 +74,1694 @@ sha256_block_data_order: .align 16 .Lloop: - movl %ebx,%edi - leaq K256(%rip),%rbp - xorl %ecx,%edi + xorq %rdi,%rdi movl 0(%rsi),%r12d movl %r8d,%r13d movl %eax,%r14d bswapl %r12d rorl $14,%r13d movl %r9d,%r15d + movl %r12d,0(%rsp) - xorl %r8d,%r13d rorl $9,%r14d + xorl %r8d,%r13d xorl %r10d,%r15d - movl %r12d,0(%rsp) - xorl %eax,%r14d - andl %r8d,%r15d - rorl $5,%r13d addl %r11d,%r12d - xorl %r10d,%r15d + xorl %eax,%r14d + + addl (%rbp,%rdi,4),%r12d + andl %r8d,%r15d + movl %ebx,%r11d rorl $11,%r14d xorl %r8d,%r13d - addl %r15d,%r12d + xorl %r10d,%r15d - movl %eax,%r15d - addl (%rbp),%r12d + xorl %ecx,%r11d xorl %eax,%r14d + addl %r15d,%r12d + movl %ebx,%r15d - xorl %ebx,%r15d rorl $6,%r13d - movl %ebx,%r11d + andl %eax,%r11d + andl %ecx,%r15d - andl %r15d,%edi rorl $2,%r14d addl %r13d,%r12d + addl %r15d,%r11d - xorl %edi,%r11d addl %r12d,%edx addl %r12d,%r11d - - leaq 4(%rbp),%rbp + leaq 1(%rdi),%rdi addl %r14d,%r11d + movl 4(%rsi),%r12d movl %edx,%r13d movl %r11d,%r14d bswapl %r12d rorl $14,%r13d - movl %r8d,%edi + movl %r8d,%r15d + movl %r12d,4(%rsp) - xorl %edx,%r13d rorl $9,%r14d - xorl %r9d,%edi - - movl %r12d,4(%rsp) - xorl %r11d,%r14d - andl %edx,%edi + xorl %edx,%r13d + xorl %r9d,%r15d rorl $5,%r13d addl %r10d,%r12d - xorl %r9d,%edi + xorl %r11d,%r14d + + addl (%rbp,%rdi,4),%r12d + andl %edx,%r15d + movl %eax,%r10d rorl $11,%r14d xorl %edx,%r13d - addl %edi,%r12d + xorl %r9d,%r15d - movl %r11d,%edi - addl (%rbp),%r12d + xorl %ebx,%r10d xorl %r11d,%r14d + addl %r15d,%r12d + movl %eax,%r15d - xorl %eax,%edi rorl $6,%r13d - movl %eax,%r10d + andl %r11d,%r10d + andl %ebx,%r15d - andl %edi,%r15d rorl $2,%r14d addl %r13d,%r12d + addl %r15d,%r10d - xorl %r15d,%r10d addl %r12d,%ecx addl %r12d,%r10d - - leaq 4(%rbp),%rbp + leaq 1(%rdi),%rdi addl %r14d,%r10d + movl 8(%rsi),%r12d movl %ecx,%r13d movl %r10d,%r14d bswapl %r12d rorl $14,%r13d movl %edx,%r15d + movl %r12d,8(%rsp) - xorl %ecx,%r13d rorl $9,%r14d + xorl %ecx,%r13d xorl %r8d,%r15d - movl %r12d,8(%rsp) - xorl %r10d,%r14d - andl %ecx,%r15d - rorl $5,%r13d addl %r9d,%r12d - xorl %r8d,%r15d + xorl %r10d,%r14d + + addl (%rbp,%rdi,4),%r12d + andl %ecx,%r15d + movl %r11d,%r9d rorl $11,%r14d xorl %ecx,%r13d - addl %r15d,%r12d + xorl %r8d,%r15d - movl %r10d,%r15d - addl (%rbp),%r12d + xorl %eax,%r9d xorl %r10d,%r14d + addl %r15d,%r12d + movl %r11d,%r15d - xorl %r11d,%r15d rorl $6,%r13d - movl %r11d,%r9d + andl %r10d,%r9d + andl %eax,%r15d - andl %r15d,%edi rorl $2,%r14d addl %r13d,%r12d + addl %r15d,%r9d - xorl %edi,%r9d addl %r12d,%ebx addl %r12d,%r9d - - leaq 4(%rbp),%rbp + leaq 1(%rdi),%rdi addl %r14d,%r9d + movl 12(%rsi),%r12d movl %ebx,%r13d movl %r9d,%r14d bswapl %r12d rorl $14,%r13d - movl %ecx,%edi + movl %ecx,%r15d + movl %r12d,12(%rsp) - xorl %ebx,%r13d rorl $9,%r14d - xorl %edx,%edi - - movl %r12d,12(%rsp) - xorl %r9d,%r14d - andl %ebx,%edi + xorl %ebx,%r13d + xorl %edx,%r15d rorl $5,%r13d addl %r8d,%r12d - xorl %edx,%edi + xorl %r9d,%r14d + + addl (%rbp,%rdi,4),%r12d + andl %ebx,%r15d + movl %r10d,%r8d rorl $11,%r14d xorl %ebx,%r13d - addl %edi,%r12d + xorl %edx,%r15d - movl %r9d,%edi - addl (%rbp),%r12d + xorl %r11d,%r8d xorl %r9d,%r14d + addl %r15d,%r12d + movl %r10d,%r15d - xorl %r10d,%edi rorl $6,%r13d - movl %r10d,%r8d + andl %r9d,%r8d + andl %r11d,%r15d - andl %edi,%r15d rorl $2,%r14d addl %r13d,%r12d + addl %r15d,%r8d - xorl %r15d,%r8d addl %r12d,%eax addl %r12d,%r8d - - leaq 20(%rbp),%rbp + leaq 1(%rdi),%rdi addl %r14d,%r8d + movl 16(%rsi),%r12d movl %eax,%r13d movl %r8d,%r14d bswapl %r12d rorl $14,%r13d movl %ebx,%r15d + movl %r12d,16(%rsp) - xorl %eax,%r13d rorl $9,%r14d + xorl %eax,%r13d xorl %ecx,%r15d - movl %r12d,16(%rsp) - xorl %r8d,%r14d - andl %eax,%r15d - rorl $5,%r13d addl %edx,%r12d - xorl %ecx,%r15d + xorl %r8d,%r14d + + addl (%rbp,%rdi,4),%r12d + andl %eax,%r15d + movl %r9d,%edx rorl $11,%r14d xorl %eax,%r13d - addl %r15d,%r12d + xorl %ecx,%r15d - movl %r8d,%r15d - addl (%rbp),%r12d + xorl %r10d,%edx xorl %r8d,%r14d + addl %r15d,%r12d + movl %r9d,%r15d - xorl %r9d,%r15d rorl $6,%r13d - movl %r9d,%edx + andl %r8d,%edx + andl %r10d,%r15d - andl %r15d,%edi rorl $2,%r14d addl %r13d,%r12d + addl %r15d,%edx - xorl %edi,%edx addl %r12d,%r11d addl %r12d,%edx - - leaq 4(%rbp),%rbp + leaq 1(%rdi),%rdi addl %r14d,%edx + movl 20(%rsi),%r12d movl %r11d,%r13d movl %edx,%r14d bswapl %r12d rorl $14,%r13d - movl %eax,%edi + movl %eax,%r15d + movl %r12d,20(%rsp) - xorl %r11d,%r13d rorl $9,%r14d - xorl %ebx,%edi - - movl %r12d,20(%rsp) - xorl %edx,%r14d - andl %r11d,%edi + xorl %r11d,%r13d + xorl %ebx,%r15d rorl $5,%r13d addl %ecx,%r12d - xorl %ebx,%edi + xorl %edx,%r14d + + addl (%rbp,%rdi,4),%r12d + andl %r11d,%r15d + movl %r8d,%ecx rorl $11,%r14d xorl %r11d,%r13d - addl %edi,%r12d + xorl %ebx,%r15d - movl %edx,%edi - addl (%rbp),%r12d + xorl %r9d,%ecx xorl %edx,%r14d + addl %r15d,%r12d + movl %r8d,%r15d - xorl %r8d,%edi rorl $6,%r13d - movl %r8d,%ecx + andl %edx,%ecx + andl %r9d,%r15d - andl %edi,%r15d rorl $2,%r14d addl %r13d,%r12d + addl %r15d,%ecx - xorl %r15d,%ecx addl %r12d,%r10d addl %r12d,%ecx - - leaq 4(%rbp),%rbp + leaq 1(%rdi),%rdi addl %r14d,%ecx + movl 24(%rsi),%r12d movl %r10d,%r13d movl %ecx,%r14d bswapl %r12d rorl $14,%r13d movl %r11d,%r15d + movl %r12d,24(%rsp) - xorl %r10d,%r13d rorl $9,%r14d + xorl %r10d,%r13d xorl %eax,%r15d - movl %r12d,24(%rsp) - xorl %ecx,%r14d - andl %r10d,%r15d - rorl $5,%r13d addl %ebx,%r12d - xorl %eax,%r15d + xorl %ecx,%r14d + + addl (%rbp,%rdi,4),%r12d + andl %r10d,%r15d + movl %edx,%ebx rorl $11,%r14d xorl %r10d,%r13d - addl %r15d,%r12d + xorl %eax,%r15d - movl %ecx,%r15d - addl (%rbp),%r12d + xorl %r8d,%ebx xorl %ecx,%r14d + addl %r15d,%r12d + movl %edx,%r15d - xorl %edx,%r15d rorl $6,%r13d - movl %edx,%ebx + andl %ecx,%ebx + andl %r8d,%r15d - andl %r15d,%edi rorl $2,%r14d addl %r13d,%r12d + addl %r15d,%ebx - xorl %edi,%ebx addl %r12d,%r9d addl %r12d,%ebx - - leaq 4(%rbp),%rbp + leaq 1(%rdi),%rdi addl %r14d,%ebx + movl 28(%rsi),%r12d movl %r9d,%r13d movl %ebx,%r14d bswapl %r12d rorl $14,%r13d - movl %r10d,%edi + movl %r10d,%r15d + movl %r12d,28(%rsp) - xorl %r9d,%r13d rorl $9,%r14d - xorl %r11d,%edi - - movl %r12d,28(%rsp) - xorl %ebx,%r14d - andl %r9d,%edi + xorl %r9d,%r13d + xorl %r11d,%r15d rorl $5,%r13d addl %eax,%r12d - xorl %r11d,%edi + xorl %ebx,%r14d + + addl (%rbp,%rdi,4),%r12d + andl %r9d,%r15d + movl %ecx,%eax rorl $11,%r14d xorl %r9d,%r13d - addl %edi,%r12d + xorl %r11d,%r15d - movl %ebx,%edi - addl (%rbp),%r12d + xorl %edx,%eax xorl %ebx,%r14d + addl %r15d,%r12d + movl %ecx,%r15d - xorl %ecx,%edi rorl $6,%r13d - movl %ecx,%eax + andl %ebx,%eax + andl %edx,%r15d - andl %edi,%r15d rorl $2,%r14d addl %r13d,%r12d + addl %r15d,%eax - xorl %r15d,%eax addl %r12d,%r8d addl %r12d,%eax - - leaq 20(%rbp),%rbp + leaq 1(%rdi),%rdi addl %r14d,%eax + movl 32(%rsi),%r12d movl %r8d,%r13d movl %eax,%r14d bswapl %r12d rorl $14,%r13d movl %r9d,%r15d + movl %r12d,32(%rsp) - xorl %r8d,%r13d rorl $9,%r14d + xorl %r8d,%r13d xorl %r10d,%r15d - movl %r12d,32(%rsp) - xorl %eax,%r14d - andl %r8d,%r15d - rorl $5,%r13d addl %r11d,%r12d - xorl %r10d,%r15d + xorl %eax,%r14d + + addl (%rbp,%rdi,4),%r12d + andl %r8d,%r15d + movl %ebx,%r11d rorl $11,%r14d xorl %r8d,%r13d - addl %r15d,%r12d + xorl %r10d,%r15d - movl %eax,%r15d - addl (%rbp),%r12d + xorl %ecx,%r11d xorl %eax,%r14d + addl %r15d,%r12d + movl %ebx,%r15d - xorl %ebx,%r15d rorl $6,%r13d - movl %ebx,%r11d + andl %eax,%r11d + andl %ecx,%r15d - andl %r15d,%edi rorl $2,%r14d addl %r13d,%r12d + addl %r15d,%r11d - xorl %edi,%r11d addl %r12d,%edx addl %r12d,%r11d - - leaq 4(%rbp),%rbp + leaq 1(%rdi),%rdi addl %r14d,%r11d + movl 36(%rsi),%r12d movl %edx,%r13d movl %r11d,%r14d bswapl %r12d rorl $14,%r13d - movl %r8d,%edi + movl %r8d,%r15d + movl %r12d,36(%rsp) - xorl %edx,%r13d rorl $9,%r14d - xorl %r9d,%edi - - movl %r12d,36(%rsp) - xorl %r11d,%r14d - andl %edx,%edi + xorl %edx,%r13d + xorl %r9d,%r15d rorl $5,%r13d addl %r10d,%r12d - xorl %r9d,%edi + xorl %r11d,%r14d + + addl (%rbp,%rdi,4),%r12d + andl %edx,%r15d + movl %eax,%r10d rorl $11,%r14d xorl %edx,%r13d - addl %edi,%r12d + xorl %r9d,%r15d - movl %r11d,%edi - addl (%rbp),%r12d + xorl %ebx,%r10d xorl %r11d,%r14d + addl %r15d,%r12d + movl %eax,%r15d - xorl %eax,%edi rorl $6,%r13d - movl %eax,%r10d + andl %r11d,%r10d + andl %ebx,%r15d - andl %edi,%r15d rorl $2,%r14d addl %r13d,%r12d + addl %r15d,%r10d - xorl %r15d,%r10d addl %r12d,%ecx addl %r12d,%r10d - - leaq 4(%rbp),%rbp + leaq 1(%rdi),%rdi addl %r14d,%r10d + movl 40(%rsi),%r12d movl %ecx,%r13d movl %r10d,%r14d bswapl %r12d rorl $14,%r13d movl %edx,%r15d + movl %r12d,40(%rsp) - xorl %ecx,%r13d rorl $9,%r14d + xorl %ecx,%r13d xorl %r8d,%r15d - movl %r12d,40(%rsp) - xorl %r10d,%r14d - andl %ecx,%r15d - rorl $5,%r13d addl %r9d,%r12d - xorl %r8d,%r15d + xorl %r10d,%r14d + + addl (%rbp,%rdi,4),%r12d + andl %ecx,%r15d + movl %r11d,%r9d rorl $11,%r14d xorl %ecx,%r13d - addl %r15d,%r12d + xorl %r8d,%r15d - movl %r10d,%r15d - addl (%rbp),%r12d + xorl %eax,%r9d xorl %r10d,%r14d + addl %r15d,%r12d + movl %r11d,%r15d - xorl %r11d,%r15d rorl $6,%r13d - movl %r11d,%r9d + andl %r10d,%r9d + andl %eax,%r15d - andl %r15d,%edi rorl $2,%r14d addl %r13d,%r12d + addl %r15d,%r9d - xorl %edi,%r9d addl %r12d,%ebx addl %r12d,%r9d - - leaq 4(%rbp),%rbp + leaq 1(%rdi),%rdi addl %r14d,%r9d + movl 44(%rsi),%r12d movl %ebx,%r13d movl %r9d,%r14d bswapl %r12d rorl $14,%r13d - movl %ecx,%edi + movl %ecx,%r15d + movl %r12d,44(%rsp) - xorl %ebx,%r13d rorl $9,%r14d - xorl %edx,%edi - - movl %r12d,44(%rsp) - xorl %r9d,%r14d - andl %ebx,%edi + xorl %ebx,%r13d + xorl %edx,%r15d rorl $5,%r13d addl %r8d,%r12d - xorl %edx,%edi + xorl %r9d,%r14d + + addl (%rbp,%rdi,4),%r12d + andl %ebx,%r15d + movl %r10d,%r8d rorl $11,%r14d xorl %ebx,%r13d - addl %edi,%r12d + xorl %edx,%r15d - movl %r9d,%edi - addl (%rbp),%r12d + xorl %r11d,%r8d xorl %r9d,%r14d + addl %r15d,%r12d + movl %r10d,%r15d - xorl %r10d,%edi rorl $6,%r13d - movl %r10d,%r8d + andl %r9d,%r8d + andl %r11d,%r15d - andl %edi,%r15d rorl $2,%r14d addl %r13d,%r12d + addl %r15d,%r8d - xorl %r15d,%r8d addl %r12d,%eax addl %r12d,%r8d - - leaq 20(%rbp),%rbp + leaq 1(%rdi),%rdi addl %r14d,%r8d + movl 48(%rsi),%r12d movl %eax,%r13d movl %r8d,%r14d bswapl %r12d rorl $14,%r13d movl %ebx,%r15d + movl %r12d,48(%rsp) - xorl %eax,%r13d rorl $9,%r14d + xorl %eax,%r13d xorl %ecx,%r15d - movl %r12d,48(%rsp) - xorl %r8d,%r14d - andl %eax,%r15d - rorl $5,%r13d addl %edx,%r12d - xorl %ecx,%r15d + xorl %r8d,%r14d + + addl (%rbp,%rdi,4),%r12d + andl %eax,%r15d + movl %r9d,%edx rorl $11,%r14d xorl %eax,%r13d - addl %r15d,%r12d + xorl %ecx,%r15d - movl %r8d,%r15d - addl (%rbp),%r12d + xorl %r10d,%edx xorl %r8d,%r14d + addl %r15d,%r12d + movl %r9d,%r15d - xorl %r9d,%r15d rorl $6,%r13d - movl %r9d,%edx + andl %r8d,%edx + andl %r10d,%r15d - andl %r15d,%edi rorl $2,%r14d addl %r13d,%r12d + addl %r15d,%edx - xorl %edi,%edx addl %r12d,%r11d addl %r12d,%edx - - leaq 4(%rbp),%rbp + leaq 1(%rdi),%rdi addl %r14d,%edx + movl 52(%rsi),%r12d movl %r11d,%r13d movl %edx,%r14d bswapl %r12d rorl $14,%r13d - movl %eax,%edi + movl %eax,%r15d + movl %r12d,52(%rsp) - xorl %r11d,%r13d rorl $9,%r14d - xorl %ebx,%edi - - movl %r12d,52(%rsp) - xorl %edx,%r14d - andl %r11d,%edi + xorl %r11d,%r13d + xorl %ebx,%r15d rorl $5,%r13d addl %ecx,%r12d - xorl %ebx,%edi + xorl %edx,%r14d + + addl (%rbp,%rdi,4),%r12d + andl %r11d,%r15d + movl %r8d,%ecx rorl $11,%r14d xorl %r11d,%r13d - addl %edi,%r12d + xorl %ebx,%r15d - movl %edx,%edi - addl (%rbp),%r12d + xorl %r9d,%ecx xorl %edx,%r14d + addl %r15d,%r12d + movl %r8d,%r15d - xorl %r8d,%edi rorl $6,%r13d - movl %r8d,%ecx + andl %edx,%ecx + andl %r9d,%r15d - andl %edi,%r15d rorl $2,%r14d addl %r13d,%r12d + addl %r15d,%ecx - xorl %r15d,%ecx addl %r12d,%r10d addl %r12d,%ecx - - leaq 4(%rbp),%rbp + leaq 1(%rdi),%rdi addl %r14d,%ecx + movl 56(%rsi),%r12d movl %r10d,%r13d movl %ecx,%r14d bswapl %r12d rorl $14,%r13d movl %r11d,%r15d + movl %r12d,56(%rsp) - xorl %r10d,%r13d rorl $9,%r14d + xorl %r10d,%r13d xorl %eax,%r15d - movl %r12d,56(%rsp) - xorl %ecx,%r14d - andl %r10d,%r15d - rorl $5,%r13d addl %ebx,%r12d - xorl %eax,%r15d + xorl %ecx,%r14d + + addl (%rbp,%rdi,4),%r12d + andl %r10d,%r15d + movl %edx,%ebx rorl $11,%r14d xorl %r10d,%r13d - addl %r15d,%r12d + xorl %eax,%r15d - movl %ecx,%r15d - addl (%rbp),%r12d + xorl %r8d,%ebx xorl %ecx,%r14d + addl %r15d,%r12d + movl %edx,%r15d - xorl %edx,%r15d rorl $6,%r13d - movl %edx,%ebx + andl %ecx,%ebx + andl %r8d,%r15d - andl %r15d,%edi rorl $2,%r14d addl %r13d,%r12d + addl %r15d,%ebx - xorl %edi,%ebx addl %r12d,%r9d addl %r12d,%ebx - - leaq 4(%rbp),%rbp + leaq 1(%rdi),%rdi addl %r14d,%ebx + movl 60(%rsi),%r12d movl %r9d,%r13d movl %ebx,%r14d bswapl %r12d rorl $14,%r13d - movl %r10d,%edi + movl %r10d,%r15d + movl %r12d,60(%rsp) - xorl %r9d,%r13d rorl $9,%r14d - xorl %r11d,%edi - - movl %r12d,60(%rsp) - xorl %ebx,%r14d - andl %r9d,%edi + xorl %r9d,%r13d + xorl %r11d,%r15d rorl $5,%r13d addl %eax,%r12d - xorl %r11d,%edi + xorl %ebx,%r14d + + addl (%rbp,%rdi,4),%r12d + andl %r9d,%r15d + movl %ecx,%eax rorl $11,%r14d xorl %r9d,%r13d - addl %edi,%r12d + xorl %r11d,%r15d - movl %ebx,%edi - addl (%rbp),%r12d + xorl %edx,%eax xorl %ebx,%r14d + addl %r15d,%r12d + movl %ecx,%r15d - xorl %ecx,%edi rorl $6,%r13d - movl %ecx,%eax + andl %ebx,%eax + andl %edx,%r15d - andl %edi,%r15d rorl $2,%r14d addl %r13d,%r12d + addl %r15d,%eax - xorl %r15d,%eax addl %r12d,%r8d addl %r12d,%eax + leaq 1(%rdi),%rdi + addl %r14d,%eax - leaq 20(%rbp),%rbp jmp .Lrounds_16_xx .align 16 .Lrounds_16_xx: movl 4(%rsp),%r13d - movl 56(%rsp),%r15d - + movl 56(%rsp),%r14d movl %r13d,%r12d - rorl $11,%r13d - addl %r14d,%eax - movl %r15d,%r14d - rorl $2,%r15d + movl %r14d,%r15d + + rorl $11,%r12d + xorl %r13d,%r12d + shrl $3,%r13d + rorl $7,%r12d xorl %r12d,%r13d - shrl $3,%r12d - rorl $7,%r13d + movl 36(%rsp),%r12d + + rorl $2,%r15d xorl %r14d,%r15d shrl $10,%r14d rorl $17,%r15d - xorl %r13d,%r12d - xorl %r14d,%r15d - addl 36(%rsp),%r12d + addl %r13d,%r12d + xorl %r15d,%r14d addl 0(%rsp),%r12d movl %r8d,%r13d - addl %r15d,%r12d + addl %r14d,%r12d movl %eax,%r14d rorl $14,%r13d movl %r9d,%r15d + movl %r12d,0(%rsp) - xorl %r8d,%r13d rorl $9,%r14d + xorl %r8d,%r13d xorl %r10d,%r15d - movl %r12d,0(%rsp) - xorl %eax,%r14d - andl %r8d,%r15d - rorl $5,%r13d addl %r11d,%r12d - xorl %r10d,%r15d + xorl %eax,%r14d + + addl (%rbp,%rdi,4),%r12d + andl %r8d,%r15d + movl %ebx,%r11d rorl $11,%r14d xorl %r8d,%r13d - addl %r15d,%r12d + xorl %r10d,%r15d - movl %eax,%r15d - addl (%rbp),%r12d + xorl %ecx,%r11d xorl %eax,%r14d + addl %r15d,%r12d + movl %ebx,%r15d - xorl %ebx,%r15d rorl $6,%r13d - movl %ebx,%r11d + andl %eax,%r11d + andl %ecx,%r15d - andl %r15d,%edi rorl $2,%r14d addl %r13d,%r12d + addl %r15d,%r11d - xorl %edi,%r11d addl %r12d,%edx addl %r12d,%r11d + leaq 1(%rdi),%rdi + addl %r14d,%r11d - leaq 4(%rbp),%rbp movl 8(%rsp),%r13d - movl 60(%rsp),%edi - + movl 60(%rsp),%r14d movl %r13d,%r12d - rorl $11,%r13d - addl %r14d,%r11d - movl %edi,%r14d - rorl $2,%edi + movl %r14d,%r15d + + rorl $11,%r12d + xorl %r13d,%r12d + shrl $3,%r13d + rorl $7,%r12d xorl %r12d,%r13d - shrl $3,%r12d - rorl $7,%r13d - xorl %r14d,%edi + movl 40(%rsp),%r12d + + rorl $2,%r15d + xorl %r14d,%r15d shrl $10,%r14d - rorl $17,%edi - xorl %r13d,%r12d - xorl %r14d,%edi - addl 40(%rsp),%r12d + rorl $17,%r15d + addl %r13d,%r12d + xorl %r15d,%r14d addl 4(%rsp),%r12d movl %edx,%r13d - addl %edi,%r12d + addl %r14d,%r12d movl %r11d,%r14d rorl $14,%r13d - movl %r8d,%edi + movl %r8d,%r15d + movl %r12d,4(%rsp) - xorl %edx,%r13d rorl $9,%r14d - xorl %r9d,%edi - - movl %r12d,4(%rsp) - xorl %r11d,%r14d - andl %edx,%edi + xorl %edx,%r13d + xorl %r9d,%r15d rorl $5,%r13d addl %r10d,%r12d - xorl %r9d,%edi + xorl %r11d,%r14d + + addl (%rbp,%rdi,4),%r12d + andl %edx,%r15d + movl %eax,%r10d rorl $11,%r14d xorl %edx,%r13d - addl %edi,%r12d + xorl %r9d,%r15d - movl %r11d,%edi - addl (%rbp),%r12d + xorl %ebx,%r10d xorl %r11d,%r14d + addl %r15d,%r12d + movl %eax,%r15d - xorl %eax,%edi rorl $6,%r13d - movl %eax,%r10d + andl %r11d,%r10d + andl %ebx,%r15d - andl %edi,%r15d rorl $2,%r14d addl %r13d,%r12d + addl %r15d,%r10d - xorl %r15d,%r10d addl %r12d,%ecx addl %r12d,%r10d + leaq 1(%rdi),%rdi + addl %r14d,%r10d - leaq 4(%rbp),%rbp movl 12(%rsp),%r13d - movl 0(%rsp),%r15d - + movl 0(%rsp),%r14d movl %r13d,%r12d - rorl $11,%r13d - addl %r14d,%r10d - movl %r15d,%r14d - rorl $2,%r15d + movl %r14d,%r15d + + rorl $11,%r12d + xorl %r13d,%r12d + shrl $3,%r13d + rorl $7,%r12d xorl %r12d,%r13d - shrl $3,%r12d - rorl $7,%r13d + movl 44(%rsp),%r12d + + rorl $2,%r15d xorl %r14d,%r15d shrl $10,%r14d rorl $17,%r15d - xorl %r13d,%r12d - xorl %r14d,%r15d - addl 44(%rsp),%r12d + addl %r13d,%r12d + xorl %r15d,%r14d addl 8(%rsp),%r12d movl %ecx,%r13d - addl %r15d,%r12d + addl %r14d,%r12d movl %r10d,%r14d rorl $14,%r13d movl %edx,%r15d + movl %r12d,8(%rsp) - xorl %ecx,%r13d rorl $9,%r14d + xorl %ecx,%r13d xorl %r8d,%r15d - movl %r12d,8(%rsp) - xorl %r10d,%r14d - andl %ecx,%r15d - rorl $5,%r13d addl %r9d,%r12d - xorl %r8d,%r15d + xorl %r10d,%r14d + + addl (%rbp,%rdi,4),%r12d + andl %ecx,%r15d + movl %r11d,%r9d rorl $11,%r14d xorl %ecx,%r13d - addl %r15d,%r12d + xorl %r8d,%r15d - movl %r10d,%r15d - addl (%rbp),%r12d + xorl %eax,%r9d xorl %r10d,%r14d + addl %r15d,%r12d + movl %r11d,%r15d - xorl %r11d,%r15d rorl $6,%r13d - movl %r11d,%r9d + andl %r10d,%r9d + andl %eax,%r15d - andl %r15d,%edi rorl $2,%r14d addl %r13d,%r12d + addl %r15d,%r9d - xorl %edi,%r9d addl %r12d,%ebx addl %r12d,%r9d + leaq 1(%rdi),%rdi + addl %r14d,%r9d - leaq 4(%rbp),%rbp movl 16(%rsp),%r13d - movl 4(%rsp),%edi - + movl 4(%rsp),%r14d movl %r13d,%r12d - rorl $11,%r13d - addl %r14d,%r9d - movl %edi,%r14d - rorl $2,%edi + movl %r14d,%r15d + + rorl $11,%r12d + xorl %r13d,%r12d + shrl $3,%r13d + rorl $7,%r12d xorl %r12d,%r13d - shrl $3,%r12d - rorl $7,%r13d - xorl %r14d,%edi + movl 48(%rsp),%r12d + + rorl $2,%r15d + xorl %r14d,%r15d shrl $10,%r14d - rorl $17,%edi - xorl %r13d,%r12d - xorl %r14d,%edi - addl 48(%rsp),%r12d + rorl $17,%r15d + addl %r13d,%r12d + xorl %r15d,%r14d addl 12(%rsp),%r12d movl %ebx,%r13d - addl %edi,%r12d + addl %r14d,%r12d movl %r9d,%r14d rorl $14,%r13d - movl %ecx,%edi + movl %ecx,%r15d + movl %r12d,12(%rsp) - xorl %ebx,%r13d rorl $9,%r14d - xorl %edx,%edi - - movl %r12d,12(%rsp) - xorl %r9d,%r14d - andl %ebx,%edi + xorl %ebx,%r13d + xorl %edx,%r15d rorl $5,%r13d addl %r8d,%r12d - xorl %edx,%edi + xorl %r9d,%r14d + + addl (%rbp,%rdi,4),%r12d + andl %ebx,%r15d + movl %r10d,%r8d rorl $11,%r14d xorl %ebx,%r13d - addl %edi,%r12d + xorl %edx,%r15d - movl %r9d,%edi - addl (%rbp),%r12d + xorl %r11d,%r8d xorl %r9d,%r14d + addl %r15d,%r12d + movl %r10d,%r15d - xorl %r10d,%edi rorl $6,%r13d - movl %r10d,%r8d + andl %r9d,%r8d + andl %r11d,%r15d - andl %edi,%r15d rorl $2,%r14d addl %r13d,%r12d + addl %r15d,%r8d - xorl %r15d,%r8d addl %r12d,%eax addl %r12d,%r8d + leaq 1(%rdi),%rdi + addl %r14d,%r8d - leaq 20(%rbp),%rbp movl 20(%rsp),%r13d - movl 8(%rsp),%r15d - + movl 8(%rsp),%r14d movl %r13d,%r12d - rorl $11,%r13d - addl %r14d,%r8d - movl %r15d,%r14d - rorl $2,%r15d + movl %r14d,%r15d + rorl $11,%r12d + xorl %r13d,%r12d + shrl $3,%r13d + + rorl $7,%r12d xorl %r12d,%r13d - shrl $3,%r12d - rorl $7,%r13d + movl 52(%rsp),%r12d + + rorl $2,%r15d xorl %r14d,%r15d shrl $10,%r14d rorl $17,%r15d - xorl %r13d,%r12d - xorl %r14d,%r15d - addl 52(%rsp),%r12d + addl %r13d,%r12d + xorl %r15d,%r14d addl 16(%rsp),%r12d movl %eax,%r13d - addl %r15d,%r12d + addl %r14d,%r12d movl %r8d,%r14d rorl $14,%r13d movl %ebx,%r15d + movl %r12d,16(%rsp) - xorl %eax,%r13d rorl $9,%r14d + xorl %eax,%r13d xorl %ecx,%r15d - movl %r12d,16(%rsp) - xorl %r8d,%r14d - andl %eax,%r15d - rorl $5,%r13d addl %edx,%r12d - xorl %ecx,%r15d + xorl %r8d,%r14d + + addl (%rbp,%rdi,4),%r12d + andl %eax,%r15d + movl %r9d,%edx rorl $11,%r14d xorl %eax,%r13d - addl %r15d,%r12d + xorl %ecx,%r15d - movl %r8d,%r15d - addl (%rbp),%r12d + xorl %r10d,%edx xorl %r8d,%r14d + addl %r15d,%r12d + movl %r9d,%r15d - xorl %r9d,%r15d rorl $6,%r13d - movl %r9d,%edx + andl %r8d,%edx + andl %r10d,%r15d - andl %r15d,%edi rorl $2,%r14d addl %r13d,%r12d + addl %r15d,%edx - xorl %edi,%edx addl %r12d,%r11d addl %r12d,%edx + leaq 1(%rdi),%rdi + addl %r14d,%edx - leaq 4(%rbp),%rbp movl 24(%rsp),%r13d - movl 12(%rsp),%edi - + movl 12(%rsp),%r14d movl %r13d,%r12d - rorl $11,%r13d - addl %r14d,%edx - movl %edi,%r14d - rorl $2,%edi + movl %r14d,%r15d + rorl $11,%r12d + xorl %r13d,%r12d + shrl $3,%r13d + + rorl $7,%r12d xorl %r12d,%r13d - shrl $3,%r12d - rorl $7,%r13d - xorl %r14d,%edi + movl 56(%rsp),%r12d + + rorl $2,%r15d + xorl %r14d,%r15d shrl $10,%r14d - rorl $17,%edi - xorl %r13d,%r12d - xorl %r14d,%edi - addl 56(%rsp),%r12d + rorl $17,%r15d + addl %r13d,%r12d + xorl %r15d,%r14d addl 20(%rsp),%r12d movl %r11d,%r13d - addl %edi,%r12d + addl %r14d,%r12d movl %edx,%r14d rorl $14,%r13d - movl %eax,%edi + movl %eax,%r15d + movl %r12d,20(%rsp) - xorl %r11d,%r13d rorl $9,%r14d - xorl %ebx,%edi - - movl %r12d,20(%rsp) - xorl %edx,%r14d - andl %r11d,%edi + xorl %r11d,%r13d + xorl %ebx,%r15d rorl $5,%r13d addl %ecx,%r12d - xorl %ebx,%edi + xorl %edx,%r14d + + addl (%rbp,%rdi,4),%r12d + andl %r11d,%r15d + movl %r8d,%ecx rorl $11,%r14d xorl %r11d,%r13d - addl %edi,%r12d + xorl %ebx,%r15d - movl %edx,%edi - addl (%rbp),%r12d + xorl %r9d,%ecx xorl %edx,%r14d + addl %r15d,%r12d + movl %r8d,%r15d - xorl %r8d,%edi rorl $6,%r13d - movl %r8d,%ecx + andl %edx,%ecx + andl %r9d,%r15d - andl %edi,%r15d rorl $2,%r14d addl %r13d,%r12d + addl %r15d,%ecx - xorl %r15d,%ecx addl %r12d,%r10d addl %r12d,%ecx + leaq 1(%rdi),%rdi + addl %r14d,%ecx - leaq 4(%rbp),%rbp movl 28(%rsp),%r13d - movl 16(%rsp),%r15d - + movl 16(%rsp),%r14d movl %r13d,%r12d - rorl $11,%r13d - addl %r14d,%ecx - movl %r15d,%r14d - rorl $2,%r15d + movl %r14d,%r15d + + rorl $11,%r12d + xorl %r13d,%r12d + shrl $3,%r13d + rorl $7,%r12d xorl %r12d,%r13d - shrl $3,%r12d - rorl $7,%r13d - xorl %r14d,%r15d + movl 60(%rsp),%r12d + + rorl $2,%r15d + xorl %r14d,%r15d shrl $10,%r14d rorl $17,%r15d - xorl %r13d,%r12d - xorl %r14d,%r15d - addl 60(%rsp),%r12d + addl %r13d,%r12d + xorl %r15d,%r14d addl 24(%rsp),%r12d movl %r10d,%r13d - addl %r15d,%r12d + addl %r14d,%r12d movl %ecx,%r14d rorl $14,%r13d movl %r11d,%r15d + movl %r12d,24(%rsp) - xorl %r10d,%r13d rorl $9,%r14d + xorl %r10d,%r13d xorl %eax,%r15d - movl %r12d,24(%rsp) - xorl %ecx,%r14d - andl %r10d,%r15d - rorl $5,%r13d addl %ebx,%r12d - xorl %eax,%r15d + xorl %ecx,%r14d + + addl (%rbp,%rdi,4),%r12d + andl %r10d,%r15d + movl %edx,%ebx rorl $11,%r14d xorl %r10d,%r13d - addl %r15d,%r12d + xorl %eax,%r15d - movl %ecx,%r15d - addl (%rbp),%r12d + xorl %r8d,%ebx xorl %ecx,%r14d + addl %r15d,%r12d + movl %edx,%r15d - xorl %edx,%r15d rorl $6,%r13d - movl %edx,%ebx + andl %ecx,%ebx + andl %r8d,%r15d - andl %r15d,%edi rorl $2,%r14d addl %r13d,%r12d + addl %r15d,%ebx - xorl %edi,%ebx addl %r12d,%r9d addl %r12d,%ebx + leaq 1(%rdi),%rdi + addl %r14d,%ebx - leaq 4(%rbp),%rbp movl 32(%rsp),%r13d - movl 20(%rsp),%edi - + movl 20(%rsp),%r14d movl %r13d,%r12d - rorl $11,%r13d - addl %r14d,%ebx - movl %edi,%r14d - rorl $2,%edi + movl %r14d,%r15d + + rorl $11,%r12d + xorl %r13d,%r12d + shrl $3,%r13d + rorl $7,%r12d xorl %r12d,%r13d - shrl $3,%r12d - rorl $7,%r13d - xorl %r14d,%edi + movl 0(%rsp),%r12d + + rorl $2,%r15d + xorl %r14d,%r15d shrl $10,%r14d - rorl $17,%edi - xorl %r13d,%r12d - xorl %r14d,%edi - addl 0(%rsp),%r12d + rorl $17,%r15d + addl %r13d,%r12d + xorl %r15d,%r14d addl 28(%rsp),%r12d movl %r9d,%r13d - addl %edi,%r12d + addl %r14d,%r12d movl %ebx,%r14d rorl $14,%r13d - movl %r10d,%edi + movl %r10d,%r15d + movl %r12d,28(%rsp) - xorl %r9d,%r13d rorl $9,%r14d - xorl %r11d,%edi - - movl %r12d,28(%rsp) - xorl %ebx,%r14d - andl %r9d,%edi + xorl %r9d,%r13d + xorl %r11d,%r15d rorl $5,%r13d addl %eax,%r12d - xorl %r11d,%edi + xorl %ebx,%r14d + + addl (%rbp,%rdi,4),%r12d + andl %r9d,%r15d + movl %ecx,%eax rorl $11,%r14d xorl %r9d,%r13d - addl %edi,%r12d + xorl %r11d,%r15d - movl %ebx,%edi - addl (%rbp),%r12d + xorl %edx,%eax xorl %ebx,%r14d + addl %r15d,%r12d + movl %ecx,%r15d - xorl %ecx,%edi rorl $6,%r13d - movl %ecx,%eax + andl %ebx,%eax + andl %edx,%r15d - andl %edi,%r15d rorl $2,%r14d addl %r13d,%r12d + addl %r15d,%eax - xorl %r15d,%eax addl %r12d,%r8d addl %r12d,%eax + leaq 1(%rdi),%rdi + addl %r14d,%eax - leaq 20(%rbp),%rbp movl 36(%rsp),%r13d - movl 24(%rsp),%r15d - + movl 24(%rsp),%r14d movl %r13d,%r12d - rorl $11,%r13d - addl %r14d,%eax - movl %r15d,%r14d - rorl $2,%r15d + movl %r14d,%r15d + rorl $11,%r12d + xorl %r13d,%r12d + shrl $3,%r13d + + rorl $7,%r12d xorl %r12d,%r13d - shrl $3,%r12d - rorl $7,%r13d + movl 4(%rsp),%r12d + + rorl $2,%r15d xorl %r14d,%r15d shrl $10,%r14d rorl $17,%r15d - xorl %r13d,%r12d - xorl %r14d,%r15d - addl 4(%rsp),%r12d + addl %r13d,%r12d + xorl %r15d,%r14d addl 32(%rsp),%r12d movl %r8d,%r13d - addl %r15d,%r12d + addl %r14d,%r12d movl %eax,%r14d rorl $14,%r13d movl %r9d,%r15d + movl %r12d,32(%rsp) - xorl %r8d,%r13d rorl $9,%r14d + xorl %r8d,%r13d xorl %r10d,%r15d - movl %r12d,32(%rsp) - xorl %eax,%r14d - andl %r8d,%r15d - rorl $5,%r13d addl %r11d,%r12d - xorl %r10d,%r15d + xorl %eax,%r14d + + addl (%rbp,%rdi,4),%r12d + andl %r8d,%r15d + movl %ebx,%r11d rorl $11,%r14d xorl %r8d,%r13d - addl %r15d,%r12d + xorl %r10d,%r15d - movl %eax,%r15d - addl (%rbp),%r12d + xorl %ecx,%r11d xorl %eax,%r14d + addl %r15d,%r12d + movl %ebx,%r15d - xorl %ebx,%r15d rorl $6,%r13d - movl %ebx,%r11d + andl %eax,%r11d + andl %ecx,%r15d - andl %r15d,%edi rorl $2,%r14d addl %r13d,%r12d + addl %r15d,%r11d - xorl %edi,%r11d addl %r12d,%edx addl %r12d,%r11d + leaq 1(%rdi),%rdi + addl %r14d,%r11d - leaq 4(%rbp),%rbp movl 40(%rsp),%r13d - movl 28(%rsp),%edi - + movl 28(%rsp),%r14d movl %r13d,%r12d - rorl $11,%r13d - addl %r14d,%r11d - movl %edi,%r14d - rorl $2,%edi + movl %r14d,%r15d + rorl $11,%r12d + xorl %r13d,%r12d + shrl $3,%r13d + + rorl $7,%r12d xorl %r12d,%r13d - shrl $3,%r12d - rorl $7,%r13d - xorl %r14d,%edi + movl 8(%rsp),%r12d + + rorl $2,%r15d + xorl %r14d,%r15d shrl $10,%r14d - rorl $17,%edi - xorl %r13d,%r12d - xorl %r14d,%edi - addl 8(%rsp),%r12d + rorl $17,%r15d + addl %r13d,%r12d + xorl %r15d,%r14d addl 36(%rsp),%r12d movl %edx,%r13d - addl %edi,%r12d + addl %r14d,%r12d movl %r11d,%r14d rorl $14,%r13d - movl %r8d,%edi + movl %r8d,%r15d + movl %r12d,36(%rsp) - xorl %edx,%r13d rorl $9,%r14d - xorl %r9d,%edi - - movl %r12d,36(%rsp) - xorl %r11d,%r14d - andl %edx,%edi + xorl %edx,%r13d + xorl %r9d,%r15d rorl $5,%r13d addl %r10d,%r12d - xorl %r9d,%edi + xorl %r11d,%r14d + + addl (%rbp,%rdi,4),%r12d + andl %edx,%r15d + movl %eax,%r10d rorl $11,%r14d xorl %edx,%r13d - addl %edi,%r12d + xorl %r9d,%r15d - movl %r11d,%edi - addl (%rbp),%r12d + xorl %ebx,%r10d xorl %r11d,%r14d + addl %r15d,%r12d + movl %eax,%r15d - xorl %eax,%edi rorl $6,%r13d - movl %eax,%r10d + andl %r11d,%r10d + andl %ebx,%r15d - andl %edi,%r15d rorl $2,%r14d addl %r13d,%r12d + addl %r15d,%r10d - xorl %r15d,%r10d addl %r12d,%ecx addl %r12d,%r10d + leaq 1(%rdi),%rdi + addl %r14d,%r10d - leaq 4(%rbp),%rbp movl 44(%rsp),%r13d - movl 32(%rsp),%r15d - + movl 32(%rsp),%r14d movl %r13d,%r12d - rorl $11,%r13d - addl %r14d,%r10d - movl %r15d,%r14d - rorl $2,%r15d + movl %r14d,%r15d + + rorl $11,%r12d + xorl %r13d,%r12d + shrl $3,%r13d + rorl $7,%r12d xorl %r12d,%r13d - shrl $3,%r12d - rorl $7,%r13d + movl 12(%rsp),%r12d + + rorl $2,%r15d xorl %r14d,%r15d shrl $10,%r14d rorl $17,%r15d - xorl %r13d,%r12d - xorl %r14d,%r15d - addl 12(%rsp),%r12d + addl %r13d,%r12d + xorl %r15d,%r14d addl 40(%rsp),%r12d movl %ecx,%r13d - addl %r15d,%r12d + addl %r14d,%r12d movl %r10d,%r14d rorl $14,%r13d movl %edx,%r15d + movl %r12d,40(%rsp) - xorl %ecx,%r13d rorl $9,%r14d + xorl %ecx,%r13d xorl %r8d,%r15d - movl %r12d,40(%rsp) - xorl %r10d,%r14d - andl %ecx,%r15d - rorl $5,%r13d addl %r9d,%r12d - xorl %r8d,%r15d + xorl %r10d,%r14d + + addl (%rbp,%rdi,4),%r12d + andl %ecx,%r15d + movl %r11d,%r9d rorl $11,%r14d xorl %ecx,%r13d - addl %r15d,%r12d + xorl %r8d,%r15d - movl %r10d,%r15d - addl (%rbp),%r12d + xorl %eax,%r9d xorl %r10d,%r14d + addl %r15d,%r12d + movl %r11d,%r15d - xorl %r11d,%r15d rorl $6,%r13d - movl %r11d,%r9d + andl %r10d,%r9d + andl %eax,%r15d - andl %r15d,%edi rorl $2,%r14d addl %r13d,%r12d + addl %r15d,%r9d - xorl %edi,%r9d addl %r12d,%ebx addl %r12d,%r9d + leaq 1(%rdi),%rdi + addl %r14d,%r9d - leaq 4(%rbp),%rbp movl 48(%rsp),%r13d - movl 36(%rsp),%edi - + movl 36(%rsp),%r14d movl %r13d,%r12d - rorl $11,%r13d - addl %r14d,%r9d - movl %edi,%r14d - rorl $2,%edi + movl %r14d,%r15d + + rorl $11,%r12d + xorl %r13d,%r12d + shrl $3,%r13d + rorl $7,%r12d xorl %r12d,%r13d - shrl $3,%r12d - rorl $7,%r13d - xorl %r14d,%edi + movl 16(%rsp),%r12d + + rorl $2,%r15d + xorl %r14d,%r15d shrl $10,%r14d - rorl $17,%edi - xorl %r13d,%r12d - xorl %r14d,%edi - addl 16(%rsp),%r12d + rorl $17,%r15d + addl %r13d,%r12d + xorl %r15d,%r14d addl 44(%rsp),%r12d movl %ebx,%r13d - addl %edi,%r12d + addl %r14d,%r12d movl %r9d,%r14d rorl $14,%r13d - movl %ecx,%edi + movl %ecx,%r15d + movl %r12d,44(%rsp) - xorl %ebx,%r13d rorl $9,%r14d - xorl %edx,%edi - - movl %r12d,44(%rsp) - xorl %r9d,%r14d - andl %ebx,%edi + xorl %ebx,%r13d + xorl %edx,%r15d rorl $5,%r13d addl %r8d,%r12d - xorl %edx,%edi + xorl %r9d,%r14d + + addl (%rbp,%rdi,4),%r12d + andl %ebx,%r15d + movl %r10d,%r8d rorl $11,%r14d xorl %ebx,%r13d - addl %edi,%r12d + xorl %edx,%r15d - movl %r9d,%edi - addl (%rbp),%r12d + xorl %r11d,%r8d xorl %r9d,%r14d + addl %r15d,%r12d + movl %r10d,%r15d - xorl %r10d,%edi rorl $6,%r13d - movl %r10d,%r8d + andl %r9d,%r8d + andl %r11d,%r15d - andl %edi,%r15d rorl $2,%r14d addl %r13d,%r12d + addl %r15d,%r8d - xorl %r15d,%r8d addl %r12d,%eax addl %r12d,%r8d + leaq 1(%rdi),%rdi + addl %r14d,%r8d - leaq 20(%rbp),%rbp movl 52(%rsp),%r13d - movl 40(%rsp),%r15d - + movl 40(%rsp),%r14d movl %r13d,%r12d - rorl $11,%r13d - addl %r14d,%r8d - movl %r15d,%r14d - rorl $2,%r15d + movl %r14d,%r15d + + rorl $11,%r12d + xorl %r13d,%r12d + shrl $3,%r13d + rorl $7,%r12d xorl %r12d,%r13d - shrl $3,%r12d - rorl $7,%r13d + movl 20(%rsp),%r12d + + rorl $2,%r15d xorl %r14d,%r15d shrl $10,%r14d rorl $17,%r15d - xorl %r13d,%r12d - xorl %r14d,%r15d - addl 20(%rsp),%r12d + addl %r13d,%r12d + xorl %r15d,%r14d addl 48(%rsp),%r12d movl %eax,%r13d - addl %r15d,%r12d + addl %r14d,%r12d movl %r8d,%r14d rorl $14,%r13d movl %ebx,%r15d + movl %r12d,48(%rsp) - xorl %eax,%r13d rorl $9,%r14d + xorl %eax,%r13d xorl %ecx,%r15d - movl %r12d,48(%rsp) - xorl %r8d,%r14d - andl %eax,%r15d - rorl $5,%r13d addl %edx,%r12d - xorl %ecx,%r15d + xorl %r8d,%r14d + + addl (%rbp,%rdi,4),%r12d + andl %eax,%r15d + movl %r9d,%edx rorl $11,%r14d xorl %eax,%r13d - addl %r15d,%r12d + xorl %ecx,%r15d - movl %r8d,%r15d - addl (%rbp),%r12d + xorl %r10d,%edx xorl %r8d,%r14d + addl %r15d,%r12d + movl %r9d,%r15d - xorl %r9d,%r15d rorl $6,%r13d - movl %r9d,%edx + andl %r8d,%edx + andl %r10d,%r15d - andl %r15d,%edi rorl $2,%r14d addl %r13d,%r12d + addl %r15d,%edx - xorl %edi,%edx addl %r12d,%r11d addl %r12d,%edx + leaq 1(%rdi),%rdi + addl %r14d,%edx - leaq 4(%rbp),%rbp movl 56(%rsp),%r13d - movl 44(%rsp),%edi - + movl 44(%rsp),%r14d movl %r13d,%r12d - rorl $11,%r13d - addl %r14d,%edx - movl %edi,%r14d - rorl $2,%edi + movl %r14d,%r15d + + rorl $11,%r12d + xorl %r13d,%r12d + shrl $3,%r13d + rorl $7,%r12d xorl %r12d,%r13d - shrl $3,%r12d - rorl $7,%r13d - xorl %r14d,%edi + movl 24(%rsp),%r12d + + rorl $2,%r15d + xorl %r14d,%r15d shrl $10,%r14d - rorl $17,%edi - xorl %r13d,%r12d - xorl %r14d,%edi - addl 24(%rsp),%r12d + rorl $17,%r15d + addl %r13d,%r12d + xorl %r15d,%r14d addl 52(%rsp),%r12d movl %r11d,%r13d - addl %edi,%r12d + addl %r14d,%r12d movl %edx,%r14d rorl $14,%r13d - movl %eax,%edi + movl %eax,%r15d + movl %r12d,52(%rsp) - xorl %r11d,%r13d rorl $9,%r14d - xorl %ebx,%edi - - movl %r12d,52(%rsp) - xorl %edx,%r14d - andl %r11d,%edi + xorl %r11d,%r13d + xorl %ebx,%r15d rorl $5,%r13d addl %ecx,%r12d - xorl %ebx,%edi + xorl %edx,%r14d + + addl (%rbp,%rdi,4),%r12d + andl %r11d,%r15d + movl %r8d,%ecx rorl $11,%r14d xorl %r11d,%r13d - addl %edi,%r12d + xorl %ebx,%r15d - movl %edx,%edi - addl (%rbp),%r12d + xorl %r9d,%ecx xorl %edx,%r14d + addl %r15d,%r12d + movl %r8d,%r15d - xorl %r8d,%edi rorl $6,%r13d - movl %r8d,%ecx + andl %edx,%ecx + andl %r9d,%r15d - andl %edi,%r15d rorl $2,%r14d addl %r13d,%r12d + addl %r15d,%ecx - xorl %r15d,%ecx addl %r12d,%r10d addl %r12d,%ecx + leaq 1(%rdi),%rdi + addl %r14d,%ecx - leaq 4(%rbp),%rbp movl 60(%rsp),%r13d - movl 48(%rsp),%r15d - + movl 48(%rsp),%r14d movl %r13d,%r12d - rorl $11,%r13d - addl %r14d,%ecx - movl %r15d,%r14d - rorl $2,%r15d + movl %r14d,%r15d + + rorl $11,%r12d + xorl %r13d,%r12d + shrl $3,%r13d + rorl $7,%r12d xorl %r12d,%r13d - shrl $3,%r12d - rorl $7,%r13d + movl 28(%rsp),%r12d + + rorl $2,%r15d xorl %r14d,%r15d shrl $10,%r14d rorl $17,%r15d - xorl %r13d,%r12d - xorl %r14d,%r15d - addl 28(%rsp),%r12d + addl %r13d,%r12d + xorl %r15d,%r14d addl 56(%rsp),%r12d movl %r10d,%r13d - addl %r15d,%r12d + addl %r14d,%r12d movl %ecx,%r14d rorl $14,%r13d movl %r11d,%r15d + movl %r12d,56(%rsp) - xorl %r10d,%r13d rorl $9,%r14d + xorl %r10d,%r13d xorl %eax,%r15d - movl %r12d,56(%rsp) - xorl %ecx,%r14d - andl %r10d,%r15d - rorl $5,%r13d addl %ebx,%r12d - xorl %eax,%r15d + xorl %ecx,%r14d + + addl (%rbp,%rdi,4),%r12d + andl %r10d,%r15d + movl %edx,%ebx rorl $11,%r14d xorl %r10d,%r13d - addl %r15d,%r12d + xorl %eax,%r15d - movl %ecx,%r15d - addl (%rbp),%r12d + xorl %r8d,%ebx xorl %ecx,%r14d + addl %r15d,%r12d + movl %edx,%r15d - xorl %edx,%r15d rorl $6,%r13d - movl %edx,%ebx + andl %ecx,%ebx + andl %r8d,%r15d - andl %r15d,%edi rorl $2,%r14d addl %r13d,%r12d + addl %r15d,%ebx - xorl %edi,%ebx addl %r12d,%r9d addl %r12d,%ebx + leaq 1(%rdi),%rdi + addl %r14d,%ebx - leaq 4(%rbp),%rbp movl 0(%rsp),%r13d - movl 52(%rsp),%edi - + movl 52(%rsp),%r14d movl %r13d,%r12d - rorl $11,%r13d - addl %r14d,%ebx - movl %edi,%r14d - rorl $2,%edi + movl %r14d,%r15d + + rorl $11,%r12d + xorl %r13d,%r12d + shrl $3,%r13d + rorl $7,%r12d xorl %r12d,%r13d - shrl $3,%r12d - rorl $7,%r13d - xorl %r14d,%edi + movl 32(%rsp),%r12d + + rorl $2,%r15d + xorl %r14d,%r15d shrl $10,%r14d - rorl $17,%edi - xorl %r13d,%r12d - xorl %r14d,%edi - addl 32(%rsp),%r12d + rorl $17,%r15d + addl %r13d,%r12d + xorl %r15d,%r14d addl 60(%rsp),%r12d movl %r9d,%r13d - addl %edi,%r12d + addl %r14d,%r12d movl %ebx,%r14d rorl $14,%r13d - movl %r10d,%edi + movl %r10d,%r15d + movl %r12d,60(%rsp) - xorl %r9d,%r13d rorl $9,%r14d - xorl %r11d,%edi - - movl %r12d,60(%rsp) - xorl %ebx,%r14d - andl %r9d,%edi + xorl %r9d,%r13d + xorl %r11d,%r15d rorl $5,%r13d addl %eax,%r12d - xorl %r11d,%edi + xorl %ebx,%r14d + + addl (%rbp,%rdi,4),%r12d + andl %r9d,%r15d + movl %ecx,%eax rorl $11,%r14d xorl %r9d,%r13d - addl %edi,%r12d + xorl %r11d,%r15d - movl %ebx,%edi - addl (%rbp),%r12d + xorl %edx,%eax xorl %ebx,%r14d + addl %r15d,%r12d + movl %ecx,%r15d - xorl %ecx,%edi rorl $6,%r13d - movl %ecx,%eax + andl %ebx,%eax + andl %edx,%r15d - andl %edi,%r15d rorl $2,%r14d addl %r13d,%r12d + addl %r15d,%eax - xorl %r15d,%eax addl %r12d,%r8d addl %r12d,%eax + leaq 1(%rdi),%rdi + addl %r14d,%eax - leaq 20(%rbp),%rbp - cmpb $0,3(%rbp) - jnz .Lrounds_16_xx + cmpq $64,%rdi + jb .Lrounds_16_xx movq 64+0(%rsp),%rdi - addl %r14d,%eax leaq 64(%rsi),%rsi addl 0(%rdi),%eax @@ -1743,1139 +1800,20 @@ sha256_block_data_order: .type K256,@object K256: .long 0x428a2f98,0x71374491,0xb5c0fbcf,0xe9b5dba5 -.long 0x428a2f98,0x71374491,0xb5c0fbcf,0xe9b5dba5 -.long 0x3956c25b,0x59f111f1,0x923f82a4,0xab1c5ed5 .long 0x3956c25b,0x59f111f1,0x923f82a4,0xab1c5ed5 .long 0xd807aa98,0x12835b01,0x243185be,0x550c7dc3 -.long 0xd807aa98,0x12835b01,0x243185be,0x550c7dc3 -.long 0x72be5d74,0x80deb1fe,0x9bdc06a7,0xc19bf174 .long 0x72be5d74,0x80deb1fe,0x9bdc06a7,0xc19bf174 .long 0xe49b69c1,0xefbe4786,0x0fc19dc6,0x240ca1cc -.long 0xe49b69c1,0xefbe4786,0x0fc19dc6,0x240ca1cc -.long 0x2de92c6f,0x4a7484aa,0x5cb0a9dc,0x76f988da .long 0x2de92c6f,0x4a7484aa,0x5cb0a9dc,0x76f988da .long 0x983e5152,0xa831c66d,0xb00327c8,0xbf597fc7 -.long 0x983e5152,0xa831c66d,0xb00327c8,0xbf597fc7 -.long 0xc6e00bf3,0xd5a79147,0x06ca6351,0x14292967 .long 0xc6e00bf3,0xd5a79147,0x06ca6351,0x14292967 .long 0x27b70a85,0x2e1b2138,0x4d2c6dfc,0x53380d13 -.long 0x27b70a85,0x2e1b2138,0x4d2c6dfc,0x53380d13 -.long 0x650a7354,0x766a0abb,0x81c2c92e,0x92722c85 .long 0x650a7354,0x766a0abb,0x81c2c92e,0x92722c85 .long 0xa2bfe8a1,0xa81a664b,0xc24b8b70,0xc76c51a3 -.long 0xa2bfe8a1,0xa81a664b,0xc24b8b70,0xc76c51a3 -.long 0xd192e819,0xd6990624,0xf40e3585,0x106aa070 .long 0xd192e819,0xd6990624,0xf40e3585,0x106aa070 .long 0x19a4c116,0x1e376c08,0x2748774c,0x34b0bcb5 -.long 0x19a4c116,0x1e376c08,0x2748774c,0x34b0bcb5 -.long 0x391c0cb3,0x4ed8aa4a,0x5b9cca4f,0x682e6ff3 .long 0x391c0cb3,0x4ed8aa4a,0x5b9cca4f,0x682e6ff3 .long 0x748f82ee,0x78a5636f,0x84c87814,0x8cc70208 -.long 0x748f82ee,0x78a5636f,0x84c87814,0x8cc70208 -.long 0x90befffa,0xa4506ceb,0xbef9a3f7,0xc67178f2 .long 0x90befffa,0xa4506ceb,0xbef9a3f7,0xc67178f2 -.long 0x00010203,0x04050607,0x08090a0b,0x0c0d0e0f -.long 0x00010203,0x04050607,0x08090a0b,0x0c0d0e0f -.long 0x03020100,0x0b0a0908,0xffffffff,0xffffffff -.long 0x03020100,0x0b0a0908,0xffffffff,0xffffffff -.long 0xffffffff,0xffffffff,0x03020100,0x0b0a0908 -.long 0xffffffff,0xffffffff,0x03020100,0x0b0a0908 -.byte 83,72,65,50,53,54,32,98,108,111,99,107,32,116,114,97,110,115,102,111,114,109,32,102,111,114,32,120,56,54,95,54,52,44,32,67,82,89,80,84,79,71,65,77,83,32,98,121,32,60,97,112,112,114,111,64,111,112,101,110,115,115,108,46,111,114,103,62,0 -.type sha256_block_data_order_ssse3,@function -.align 64 -sha256_block_data_order_ssse3: -.Lssse3_shortcut: - pushq %rbx - pushq %rbp - pushq %r12 - pushq %r13 - pushq %r14 - pushq %r15 - movq %rsp,%r11 - shlq $4,%rdx - subq $96,%rsp - leaq (%rsi,%rdx,4),%rdx - andq $-64,%rsp - movq %rdi,64+0(%rsp) - movq %rsi,64+8(%rsp) - movq %rdx,64+16(%rsp) - movq %r11,64+24(%rsp) -.Lprologue_ssse3: - - movl 0(%rdi),%eax - movl 4(%rdi),%ebx - movl 8(%rdi),%ecx - movl 12(%rdi),%edx - movl 16(%rdi),%r8d - movl 20(%rdi),%r9d - movl 24(%rdi),%r10d - movl 28(%rdi),%r11d - - - jmp .Lloop_ssse3 -.align 16 -.Lloop_ssse3: - movdqa K256+512(%rip),%xmm7 - movdqu 0(%rsi),%xmm0 - movdqu 16(%rsi),%xmm1 - movdqu 32(%rsi),%xmm2 - movdqu 48(%rsi),%xmm3 -.byte 102,15,56,0,199 - leaq K256(%rip),%rbp -.byte 102,15,56,0,207 - movdqa 0(%rbp),%xmm4 -.byte 102,15,56,0,215 - movdqa 32(%rbp),%xmm5 - paddd %xmm0,%xmm4 - movdqa 64(%rbp),%xmm6 -.byte 102,15,56,0,223 - movdqa 96(%rbp),%xmm7 - paddd %xmm1,%xmm5 - paddd %xmm2,%xmm6 - paddd %xmm3,%xmm7 - movdqa %xmm4,0(%rsp) - movl %eax,%r14d - movdqa %xmm5,16(%rsp) - movl %ebx,%edi - movdqa %xmm6,32(%rsp) - xorl %ecx,%edi - movdqa %xmm7,48(%rsp) - movl %r8d,%r13d - jmp .Lssse3_00_47 - -.align 16 -.Lssse3_00_47: - subq $-32*4,%rbp - rorl $14,%r13d - movdqa %xmm1,%xmm4 - movl %r14d,%eax - movl %r9d,%r12d - movdqa %xmm3,%xmm7 - rorl $9,%r14d - xorl %r8d,%r13d - xorl %r10d,%r12d - rorl $5,%r13d - xorl %eax,%r14d -.byte 102,15,58,15,224,4 - andl %r8d,%r12d - xorl %r8d,%r13d -.byte 102,15,58,15,250,4 - addl 0(%rsp),%r11d - movl %eax,%r15d - xorl %r10d,%r12d - rorl $11,%r14d - movdqa %xmm4,%xmm5 - xorl %ebx,%r15d - addl %r12d,%r11d - movdqa %xmm4,%xmm6 - rorl $6,%r13d - andl %r15d,%edi - psrld $3,%xmm4 - xorl %eax,%r14d - addl %r13d,%r11d - xorl %ebx,%edi - paddd %xmm7,%xmm0 - rorl $2,%r14d - addl %r11d,%edx - psrld $7,%xmm6 - addl %edi,%r11d - movl %edx,%r13d - pshufd $250,%xmm3,%xmm7 - addl %r11d,%r14d - rorl $14,%r13d - pslld $14,%xmm5 - movl %r14d,%r11d - movl %r8d,%r12d - pxor %xmm6,%xmm4 - rorl $9,%r14d - xorl %edx,%r13d - xorl %r9d,%r12d - rorl $5,%r13d - psrld $11,%xmm6 - xorl %r11d,%r14d - pxor %xmm5,%xmm4 - andl %edx,%r12d - xorl %edx,%r13d - pslld $11,%xmm5 - addl 4(%rsp),%r10d - movl %r11d,%edi - pxor %xmm6,%xmm4 - xorl %r9d,%r12d - rorl $11,%r14d - movdqa %xmm7,%xmm6 - xorl %eax,%edi - addl %r12d,%r10d - pxor %xmm5,%xmm4 - rorl $6,%r13d - andl %edi,%r15d - xorl %r11d,%r14d - psrld $10,%xmm7 - addl %r13d,%r10d - xorl %eax,%r15d - paddd %xmm4,%xmm0 - rorl $2,%r14d - addl %r10d,%ecx - psrlq $17,%xmm6 - addl %r15d,%r10d - movl %ecx,%r13d - addl %r10d,%r14d - pxor %xmm6,%xmm7 - rorl $14,%r13d - movl %r14d,%r10d - movl %edx,%r12d - rorl $9,%r14d - psrlq $2,%xmm6 - xorl %ecx,%r13d - xorl %r8d,%r12d - pxor %xmm6,%xmm7 - rorl $5,%r13d - xorl %r10d,%r14d - andl %ecx,%r12d - pshufd $128,%xmm7,%xmm7 - xorl %ecx,%r13d - addl 8(%rsp),%r9d - movl %r10d,%r15d - psrldq $8,%xmm7 - xorl %r8d,%r12d - rorl $11,%r14d - xorl %r11d,%r15d - addl %r12d,%r9d - rorl $6,%r13d - paddd %xmm7,%xmm0 - andl %r15d,%edi - xorl %r10d,%r14d - addl %r13d,%r9d - pshufd $80,%xmm0,%xmm7 - xorl %r11d,%edi - rorl $2,%r14d - addl %r9d,%ebx - movdqa %xmm7,%xmm6 - addl %edi,%r9d - movl %ebx,%r13d - psrld $10,%xmm7 - addl %r9d,%r14d - rorl $14,%r13d - psrlq $17,%xmm6 - movl %r14d,%r9d - movl %ecx,%r12d - pxor %xmm6,%xmm7 - rorl $9,%r14d - xorl %ebx,%r13d - xorl %edx,%r12d - rorl $5,%r13d - xorl %r9d,%r14d - psrlq $2,%xmm6 - andl %ebx,%r12d - xorl %ebx,%r13d - addl 12(%rsp),%r8d - pxor %xmm6,%xmm7 - movl %r9d,%edi - xorl %edx,%r12d - rorl $11,%r14d - pshufd $8,%xmm7,%xmm7 - xorl %r10d,%edi - addl %r12d,%r8d - movdqa 0(%rbp),%xmm6 - rorl $6,%r13d - andl %edi,%r15d - pslldq $8,%xmm7 - xorl %r9d,%r14d - addl %r13d,%r8d - xorl %r10d,%r15d - paddd %xmm7,%xmm0 - rorl $2,%r14d - addl %r8d,%eax - addl %r15d,%r8d - paddd %xmm0,%xmm6 - movl %eax,%r13d - addl %r8d,%r14d - movdqa %xmm6,0(%rsp) - rorl $14,%r13d - movdqa %xmm2,%xmm4 - movl %r14d,%r8d - movl %ebx,%r12d - movdqa %xmm0,%xmm7 - rorl $9,%r14d - xorl %eax,%r13d - xorl %ecx,%r12d - rorl $5,%r13d - xorl %r8d,%r14d -.byte 102,15,58,15,225,4 - andl %eax,%r12d - xorl %eax,%r13d -.byte 102,15,58,15,251,4 - addl 16(%rsp),%edx - movl %r8d,%r15d - xorl %ecx,%r12d - rorl $11,%r14d - movdqa %xmm4,%xmm5 - xorl %r9d,%r15d - addl %r12d,%edx - movdqa %xmm4,%xmm6 - rorl $6,%r13d - andl %r15d,%edi - psrld $3,%xmm4 - xorl %r8d,%r14d - addl %r13d,%edx - xorl %r9d,%edi - paddd %xmm7,%xmm1 - rorl $2,%r14d - addl %edx,%r11d - psrld $7,%xmm6 - addl %edi,%edx - movl %r11d,%r13d - pshufd $250,%xmm0,%xmm7 - addl %edx,%r14d - rorl $14,%r13d - pslld $14,%xmm5 - movl %r14d,%edx - movl %eax,%r12d - pxor %xmm6,%xmm4 - rorl $9,%r14d - xorl %r11d,%r13d - xorl %ebx,%r12d - rorl $5,%r13d - psrld $11,%xmm6 - xorl %edx,%r14d - pxor %xmm5,%xmm4 - andl %r11d,%r12d - xorl %r11d,%r13d - pslld $11,%xmm5 - addl 20(%rsp),%ecx - movl %edx,%edi - pxor %xmm6,%xmm4 - xorl %ebx,%r12d - rorl $11,%r14d - movdqa %xmm7,%xmm6 - xorl %r8d,%edi - addl %r12d,%ecx - pxor %xmm5,%xmm4 - rorl $6,%r13d - andl %edi,%r15d - xorl %edx,%r14d - psrld $10,%xmm7 - addl %r13d,%ecx - xorl %r8d,%r15d - paddd %xmm4,%xmm1 - rorl $2,%r14d - addl %ecx,%r10d - psrlq $17,%xmm6 - addl %r15d,%ecx - movl %r10d,%r13d - addl %ecx,%r14d - pxor %xmm6,%xmm7 - rorl $14,%r13d - movl %r14d,%ecx - movl %r11d,%r12d - rorl $9,%r14d - psrlq $2,%xmm6 - xorl %r10d,%r13d - xorl %eax,%r12d - pxor %xmm6,%xmm7 - rorl $5,%r13d - xorl %ecx,%r14d - andl %r10d,%r12d - pshufd $128,%xmm7,%xmm7 - xorl %r10d,%r13d - addl 24(%rsp),%ebx - movl %ecx,%r15d - psrldq $8,%xmm7 - xorl %eax,%r12d - rorl $11,%r14d - xorl %edx,%r15d - addl %r12d,%ebx - rorl $6,%r13d - paddd %xmm7,%xmm1 - andl %r15d,%edi - xorl %ecx,%r14d - addl %r13d,%ebx - pshufd $80,%xmm1,%xmm7 - xorl %edx,%edi - rorl $2,%r14d - addl %ebx,%r9d - movdqa %xmm7,%xmm6 - addl %edi,%ebx - movl %r9d,%r13d - psrld $10,%xmm7 - addl %ebx,%r14d - rorl $14,%r13d - psrlq $17,%xmm6 - movl %r14d,%ebx - movl %r10d,%r12d - pxor %xmm6,%xmm7 - rorl $9,%r14d - xorl %r9d,%r13d - xorl %r11d,%r12d - rorl $5,%r13d - xorl %ebx,%r14d - psrlq $2,%xmm6 - andl %r9d,%r12d - xorl %r9d,%r13d - addl 28(%rsp),%eax - pxor %xmm6,%xmm7 - movl %ebx,%edi - xorl %r11d,%r12d - rorl $11,%r14d - pshufd $8,%xmm7,%xmm7 - xorl %ecx,%edi - addl %r12d,%eax - movdqa 32(%rbp),%xmm6 - rorl $6,%r13d - andl %edi,%r15d - pslldq $8,%xmm7 - xorl %ebx,%r14d - addl %r13d,%eax - xorl %ecx,%r15d - paddd %xmm7,%xmm1 - rorl $2,%r14d - addl %eax,%r8d - addl %r15d,%eax - paddd %xmm1,%xmm6 - movl %r8d,%r13d - addl %eax,%r14d - movdqa %xmm6,16(%rsp) - rorl $14,%r13d - movdqa %xmm3,%xmm4 - movl %r14d,%eax - movl %r9d,%r12d - movdqa %xmm1,%xmm7 - rorl $9,%r14d - xorl %r8d,%r13d - xorl %r10d,%r12d - rorl $5,%r13d - xorl %eax,%r14d -.byte 102,15,58,15,226,4 - andl %r8d,%r12d - xorl %r8d,%r13d -.byte 102,15,58,15,248,4 - addl 32(%rsp),%r11d - movl %eax,%r15d - xorl %r10d,%r12d - rorl $11,%r14d - movdqa %xmm4,%xmm5 - xorl %ebx,%r15d - addl %r12d,%r11d - movdqa %xmm4,%xmm6 - rorl $6,%r13d - andl %r15d,%edi - psrld $3,%xmm4 - xorl %eax,%r14d - addl %r13d,%r11d - xorl %ebx,%edi - paddd %xmm7,%xmm2 - rorl $2,%r14d - addl %r11d,%edx - psrld $7,%xmm6 - addl %edi,%r11d - movl %edx,%r13d - pshufd $250,%xmm1,%xmm7 - addl %r11d,%r14d - rorl $14,%r13d - pslld $14,%xmm5 - movl %r14d,%r11d - movl %r8d,%r12d - pxor %xmm6,%xmm4 - rorl $9,%r14d - xorl %edx,%r13d - xorl %r9d,%r12d - rorl $5,%r13d - psrld $11,%xmm6 - xorl %r11d,%r14d - pxor %xmm5,%xmm4 - andl %edx,%r12d - xorl %edx,%r13d - pslld $11,%xmm5 - addl 36(%rsp),%r10d - movl %r11d,%edi - pxor %xmm6,%xmm4 - xorl %r9d,%r12d - rorl $11,%r14d - movdqa %xmm7,%xmm6 - xorl %eax,%edi - addl %r12d,%r10d - pxor %xmm5,%xmm4 - rorl $6,%r13d - andl %edi,%r15d - xorl %r11d,%r14d - psrld $10,%xmm7 - addl %r13d,%r10d - xorl %eax,%r15d - paddd %xmm4,%xmm2 - rorl $2,%r14d - addl %r10d,%ecx - psrlq $17,%xmm6 - addl %r15d,%r10d - movl %ecx,%r13d - addl %r10d,%r14d - pxor %xmm6,%xmm7 - rorl $14,%r13d - movl %r14d,%r10d - movl %edx,%r12d - rorl $9,%r14d - psrlq $2,%xmm6 - xorl %ecx,%r13d - xorl %r8d,%r12d - pxor %xmm6,%xmm7 - rorl $5,%r13d - xorl %r10d,%r14d - andl %ecx,%r12d - pshufd $128,%xmm7,%xmm7 - xorl %ecx,%r13d - addl 40(%rsp),%r9d - movl %r10d,%r15d - psrldq $8,%xmm7 - xorl %r8d,%r12d - rorl $11,%r14d - xorl %r11d,%r15d - addl %r12d,%r9d - rorl $6,%r13d - paddd %xmm7,%xmm2 - andl %r15d,%edi - xorl %r10d,%r14d - addl %r13d,%r9d - pshufd $80,%xmm2,%xmm7 - xorl %r11d,%edi - rorl $2,%r14d - addl %r9d,%ebx - movdqa %xmm7,%xmm6 - addl %edi,%r9d - movl %ebx,%r13d - psrld $10,%xmm7 - addl %r9d,%r14d - rorl $14,%r13d - psrlq $17,%xmm6 - movl %r14d,%r9d - movl %ecx,%r12d - pxor %xmm6,%xmm7 - rorl $9,%r14d - xorl %ebx,%r13d - xorl %edx,%r12d - rorl $5,%r13d - xorl %r9d,%r14d - psrlq $2,%xmm6 - andl %ebx,%r12d - xorl %ebx,%r13d - addl 44(%rsp),%r8d - pxor %xmm6,%xmm7 - movl %r9d,%edi - xorl %edx,%r12d - rorl $11,%r14d - pshufd $8,%xmm7,%xmm7 - xorl %r10d,%edi - addl %r12d,%r8d - movdqa 64(%rbp),%xmm6 - rorl $6,%r13d - andl %edi,%r15d - pslldq $8,%xmm7 - xorl %r9d,%r14d - addl %r13d,%r8d - xorl %r10d,%r15d - paddd %xmm7,%xmm2 - rorl $2,%r14d - addl %r8d,%eax - addl %r15d,%r8d - paddd %xmm2,%xmm6 - movl %eax,%r13d - addl %r8d,%r14d - movdqa %xmm6,32(%rsp) - rorl $14,%r13d - movdqa %xmm0,%xmm4 - movl %r14d,%r8d - movl %ebx,%r12d - movdqa %xmm2,%xmm7 - rorl $9,%r14d - xorl %eax,%r13d - xorl %ecx,%r12d - rorl $5,%r13d - xorl %r8d,%r14d -.byte 102,15,58,15,227,4 - andl %eax,%r12d - xorl %eax,%r13d -.byte 102,15,58,15,249,4 - addl 48(%rsp),%edx - movl %r8d,%r15d - xorl %ecx,%r12d - rorl $11,%r14d - movdqa %xmm4,%xmm5 - xorl %r9d,%r15d - addl %r12d,%edx - movdqa %xmm4,%xmm6 - rorl $6,%r13d - andl %r15d,%edi - psrld $3,%xmm4 - xorl %r8d,%r14d - addl %r13d,%edx - xorl %r9d,%edi - paddd %xmm7,%xmm3 - rorl $2,%r14d - addl %edx,%r11d - psrld $7,%xmm6 - addl %edi,%edx - movl %r11d,%r13d - pshufd $250,%xmm2,%xmm7 - addl %edx,%r14d - rorl $14,%r13d - pslld $14,%xmm5 - movl %r14d,%edx - movl %eax,%r12d - pxor %xmm6,%xmm4 - rorl $9,%r14d - xorl %r11d,%r13d - xorl %ebx,%r12d - rorl $5,%r13d - psrld $11,%xmm6 - xorl %edx,%r14d - pxor %xmm5,%xmm4 - andl %r11d,%r12d - xorl %r11d,%r13d - pslld $11,%xmm5 - addl 52(%rsp),%ecx - movl %edx,%edi - pxor %xmm6,%xmm4 - xorl %ebx,%r12d - rorl $11,%r14d - movdqa %xmm7,%xmm6 - xorl %r8d,%edi - addl %r12d,%ecx - pxor %xmm5,%xmm4 - rorl $6,%r13d - andl %edi,%r15d - xorl %edx,%r14d - psrld $10,%xmm7 - addl %r13d,%ecx - xorl %r8d,%r15d - paddd %xmm4,%xmm3 - rorl $2,%r14d - addl %ecx,%r10d - psrlq $17,%xmm6 - addl %r15d,%ecx - movl %r10d,%r13d - addl %ecx,%r14d - pxor %xmm6,%xmm7 - rorl $14,%r13d - movl %r14d,%ecx - movl %r11d,%r12d - rorl $9,%r14d - psrlq $2,%xmm6 - xorl %r10d,%r13d - xorl %eax,%r12d - pxor %xmm6,%xmm7 - rorl $5,%r13d - xorl %ecx,%r14d - andl %r10d,%r12d - pshufd $128,%xmm7,%xmm7 - xorl %r10d,%r13d - addl 56(%rsp),%ebx - movl %ecx,%r15d - psrldq $8,%xmm7 - xorl %eax,%r12d - rorl $11,%r14d - xorl %edx,%r15d - addl %r12d,%ebx - rorl $6,%r13d - paddd %xmm7,%xmm3 - andl %r15d,%edi - xorl %ecx,%r14d - addl %r13d,%ebx - pshufd $80,%xmm3,%xmm7 - xorl %edx,%edi - rorl $2,%r14d - addl %ebx,%r9d - movdqa %xmm7,%xmm6 - addl %edi,%ebx - movl %r9d,%r13d - psrld $10,%xmm7 - addl %ebx,%r14d - rorl $14,%r13d - psrlq $17,%xmm6 - movl %r14d,%ebx - movl %r10d,%r12d - pxor %xmm6,%xmm7 - rorl $9,%r14d - xorl %r9d,%r13d - xorl %r11d,%r12d - rorl $5,%r13d - xorl %ebx,%r14d - psrlq $2,%xmm6 - andl %r9d,%r12d - xorl %r9d,%r13d - addl 60(%rsp),%eax - pxor %xmm6,%xmm7 - movl %ebx,%edi - xorl %r11d,%r12d - rorl $11,%r14d - pshufd $8,%xmm7,%xmm7 - xorl %ecx,%edi - addl %r12d,%eax - movdqa 96(%rbp),%xmm6 - rorl $6,%r13d - andl %edi,%r15d - pslldq $8,%xmm7 - xorl %ebx,%r14d - addl %r13d,%eax - xorl %ecx,%r15d - paddd %xmm7,%xmm3 - rorl $2,%r14d - addl %eax,%r8d - addl %r15d,%eax - paddd %xmm3,%xmm6 - movl %r8d,%r13d - addl %eax,%r14d - movdqa %xmm6,48(%rsp) - cmpb $0,131(%rbp) - jne .Lssse3_00_47 - rorl $14,%r13d - movl %r14d,%eax - movl %r9d,%r12d - rorl $9,%r14d - xorl %r8d,%r13d - xorl %r10d,%r12d - rorl $5,%r13d - xorl %eax,%r14d - andl %r8d,%r12d - xorl %r8d,%r13d - addl 0(%rsp),%r11d - movl %eax,%r15d - xorl %r10d,%r12d - rorl $11,%r14d - xorl %ebx,%r15d - addl %r12d,%r11d - rorl $6,%r13d - andl %r15d,%edi - xorl %eax,%r14d - addl %r13d,%r11d - xorl %ebx,%edi - rorl $2,%r14d - addl %r11d,%edx - addl %edi,%r11d - movl %edx,%r13d - addl %r11d,%r14d - rorl $14,%r13d - movl %r14d,%r11d - movl %r8d,%r12d - rorl $9,%r14d - xorl %edx,%r13d - xorl %r9d,%r12d - rorl $5,%r13d - xorl %r11d,%r14d - andl %edx,%r12d - xorl %edx,%r13d - addl 4(%rsp),%r10d - movl %r11d,%edi - xorl %r9d,%r12d - rorl $11,%r14d - xorl %eax,%edi - addl %r12d,%r10d - rorl $6,%r13d - andl %edi,%r15d - xorl %r11d,%r14d - addl %r13d,%r10d - xorl %eax,%r15d - rorl $2,%r14d - addl %r10d,%ecx - addl %r15d,%r10d - movl %ecx,%r13d - addl %r10d,%r14d - rorl $14,%r13d - movl %r14d,%r10d - movl %edx,%r12d - rorl $9,%r14d - xorl %ecx,%r13d - xorl %r8d,%r12d - rorl $5,%r13d - xorl %r10d,%r14d - andl %ecx,%r12d - xorl %ecx,%r13d - addl 8(%rsp),%r9d - movl %r10d,%r15d - xorl %r8d,%r12d - rorl $11,%r14d - xorl %r11d,%r15d - addl %r12d,%r9d - rorl $6,%r13d - andl %r15d,%edi - xorl %r10d,%r14d - addl %r13d,%r9d - xorl %r11d,%edi - rorl $2,%r14d - addl %r9d,%ebx - addl %edi,%r9d - movl %ebx,%r13d - addl %r9d,%r14d - rorl $14,%r13d - movl %r14d,%r9d - movl %ecx,%r12d - rorl $9,%r14d - xorl %ebx,%r13d - xorl %edx,%r12d - rorl $5,%r13d - xorl %r9d,%r14d - andl %ebx,%r12d - xorl %ebx,%r13d - addl 12(%rsp),%r8d - movl %r9d,%edi - xorl %edx,%r12d - rorl $11,%r14d - xorl %r10d,%edi - addl %r12d,%r8d - rorl $6,%r13d - andl %edi,%r15d - xorl %r9d,%r14d - addl %r13d,%r8d - xorl %r10d,%r15d - rorl $2,%r14d - addl %r8d,%eax - addl %r15d,%r8d - movl %eax,%r13d - addl %r8d,%r14d - rorl $14,%r13d - movl %r14d,%r8d - movl %ebx,%r12d - rorl $9,%r14d - xorl %eax,%r13d - xorl %ecx,%r12d - rorl $5,%r13d - xorl %r8d,%r14d - andl %eax,%r12d - xorl %eax,%r13d - addl 16(%rsp),%edx - movl %r8d,%r15d - xorl %ecx,%r12d - rorl $11,%r14d - xorl %r9d,%r15d - addl %r12d,%edx - rorl $6,%r13d - andl %r15d,%edi - xorl %r8d,%r14d - addl %r13d,%edx - xorl %r9d,%edi - rorl $2,%r14d - addl %edx,%r11d - addl %edi,%edx - movl %r11d,%r13d - addl %edx,%r14d - rorl $14,%r13d - movl %r14d,%edx - movl %eax,%r12d - rorl $9,%r14d - xorl %r11d,%r13d - xorl %ebx,%r12d - rorl $5,%r13d - xorl %edx,%r14d - andl %r11d,%r12d - xorl %r11d,%r13d - addl 20(%rsp),%ecx - movl %edx,%edi - xorl %ebx,%r12d - rorl $11,%r14d - xorl %r8d,%edi - addl %r12d,%ecx - rorl $6,%r13d - andl %edi,%r15d - xorl %edx,%r14d - addl %r13d,%ecx - xorl %r8d,%r15d - rorl $2,%r14d - addl %ecx,%r10d - addl %r15d,%ecx - movl %r10d,%r13d - addl %ecx,%r14d - rorl $14,%r13d - movl %r14d,%ecx - movl %r11d,%r12d - rorl $9,%r14d - xorl %r10d,%r13d - xorl %eax,%r12d - rorl $5,%r13d - xorl %ecx,%r14d - andl %r10d,%r12d - xorl %r10d,%r13d - addl 24(%rsp),%ebx - movl %ecx,%r15d - xorl %eax,%r12d - rorl $11,%r14d - xorl %edx,%r15d - addl %r12d,%ebx - rorl $6,%r13d - andl %r15d,%edi - xorl %ecx,%r14d - addl %r13d,%ebx - xorl %edx,%edi - rorl $2,%r14d - addl %ebx,%r9d - addl %edi,%ebx - movl %r9d,%r13d - addl %ebx,%r14d - rorl $14,%r13d - movl %r14d,%ebx - movl %r10d,%r12d - rorl $9,%r14d - xorl %r9d,%r13d - xorl %r11d,%r12d - rorl $5,%r13d - xorl %ebx,%r14d - andl %r9d,%r12d - xorl %r9d,%r13d - addl 28(%rsp),%eax - movl %ebx,%edi - xorl %r11d,%r12d - rorl $11,%r14d - xorl %ecx,%edi - addl %r12d,%eax - rorl $6,%r13d - andl %edi,%r15d - xorl %ebx,%r14d - addl %r13d,%eax - xorl %ecx,%r15d - rorl $2,%r14d - addl %eax,%r8d - addl %r15d,%eax - movl %r8d,%r13d - addl %eax,%r14d - rorl $14,%r13d - movl %r14d,%eax - movl %r9d,%r12d - rorl $9,%r14d - xorl %r8d,%r13d - xorl %r10d,%r12d - rorl $5,%r13d - xorl %eax,%r14d - andl %r8d,%r12d - xorl %r8d,%r13d - addl 32(%rsp),%r11d - movl %eax,%r15d - xorl %r10d,%r12d - rorl $11,%r14d - xorl %ebx,%r15d - addl %r12d,%r11d - rorl $6,%r13d - andl %r15d,%edi - xorl %eax,%r14d - addl %r13d,%r11d - xorl %ebx,%edi - rorl $2,%r14d - addl %r11d,%edx - addl %edi,%r11d - movl %edx,%r13d - addl %r11d,%r14d - rorl $14,%r13d - movl %r14d,%r11d - movl %r8d,%r12d - rorl $9,%r14d - xorl %edx,%r13d - xorl %r9d,%r12d - rorl $5,%r13d - xorl %r11d,%r14d - andl %edx,%r12d - xorl %edx,%r13d - addl 36(%rsp),%r10d - movl %r11d,%edi - xorl %r9d,%r12d - rorl $11,%r14d - xorl %eax,%edi - addl %r12d,%r10d - rorl $6,%r13d - andl %edi,%r15d - xorl %r11d,%r14d - addl %r13d,%r10d - xorl %eax,%r15d - rorl $2,%r14d - addl %r10d,%ecx - addl %r15d,%r10d - movl %ecx,%r13d - addl %r10d,%r14d - rorl $14,%r13d - movl %r14d,%r10d - movl %edx,%r12d - rorl $9,%r14d - xorl %ecx,%r13d - xorl %r8d,%r12d - rorl $5,%r13d - xorl %r10d,%r14d - andl %ecx,%r12d - xorl %ecx,%r13d - addl 40(%rsp),%r9d - movl %r10d,%r15d - xorl %r8d,%r12d - rorl $11,%r14d - xorl %r11d,%r15d - addl %r12d,%r9d - rorl $6,%r13d - andl %r15d,%edi - xorl %r10d,%r14d - addl %r13d,%r9d - xorl %r11d,%edi - rorl $2,%r14d - addl %r9d,%ebx - addl %edi,%r9d - movl %ebx,%r13d - addl %r9d,%r14d - rorl $14,%r13d - movl %r14d,%r9d - movl %ecx,%r12d - rorl $9,%r14d - xorl %ebx,%r13d - xorl %edx,%r12d - rorl $5,%r13d - xorl %r9d,%r14d - andl %ebx,%r12d - xorl %ebx,%r13d - addl 44(%rsp),%r8d - movl %r9d,%edi - xorl %edx,%r12d - rorl $11,%r14d - xorl %r10d,%edi - addl %r12d,%r8d - rorl $6,%r13d - andl %edi,%r15d - xorl %r9d,%r14d - addl %r13d,%r8d - xorl %r10d,%r15d - rorl $2,%r14d - addl %r8d,%eax - addl %r15d,%r8d - movl %eax,%r13d - addl %r8d,%r14d - rorl $14,%r13d - movl %r14d,%r8d - movl %ebx,%r12d - rorl $9,%r14d - xorl %eax,%r13d - xorl %ecx,%r12d - rorl $5,%r13d - xorl %r8d,%r14d - andl %eax,%r12d - xorl %eax,%r13d - addl 48(%rsp),%edx - movl %r8d,%r15d - xorl %ecx,%r12d - rorl $11,%r14d - xorl %r9d,%r15d - addl %r12d,%edx - rorl $6,%r13d - andl %r15d,%edi - xorl %r8d,%r14d - addl %r13d,%edx - xorl %r9d,%edi - rorl $2,%r14d - addl %edx,%r11d - addl %edi,%edx - movl %r11d,%r13d - addl %edx,%r14d - rorl $14,%r13d - movl %r14d,%edx - movl %eax,%r12d - rorl $9,%r14d - xorl %r11d,%r13d - xorl %ebx,%r12d - rorl $5,%r13d - xorl %edx,%r14d - andl %r11d,%r12d - xorl %r11d,%r13d - addl 52(%rsp),%ecx - movl %edx,%edi - xorl %ebx,%r12d - rorl $11,%r14d - xorl %r8d,%edi - addl %r12d,%ecx - rorl $6,%r13d - andl %edi,%r15d - xorl %edx,%r14d - addl %r13d,%ecx - xorl %r8d,%r15d - rorl $2,%r14d - addl %ecx,%r10d - addl %r15d,%ecx - movl %r10d,%r13d - addl %ecx,%r14d - rorl $14,%r13d - movl %r14d,%ecx - movl %r11d,%r12d - rorl $9,%r14d - xorl %r10d,%r13d - xorl %eax,%r12d - rorl $5,%r13d - xorl %ecx,%r14d - andl %r10d,%r12d - xorl %r10d,%r13d - addl 56(%rsp),%ebx - movl %ecx,%r15d - xorl %eax,%r12d - rorl $11,%r14d - xorl %edx,%r15d - addl %r12d,%ebx - rorl $6,%r13d - andl %r15d,%edi - xorl %ecx,%r14d - addl %r13d,%ebx - xorl %edx,%edi - rorl $2,%r14d - addl %ebx,%r9d - addl %edi,%ebx - movl %r9d,%r13d - addl %ebx,%r14d - rorl $14,%r13d - movl %r14d,%ebx - movl %r10d,%r12d - rorl $9,%r14d - xorl %r9d,%r13d - xorl %r11d,%r12d - rorl $5,%r13d - xorl %ebx,%r14d - andl %r9d,%r12d - xorl %r9d,%r13d - addl 60(%rsp),%eax - movl %ebx,%edi - xorl %r11d,%r12d - rorl $11,%r14d - xorl %ecx,%edi - addl %r12d,%eax - rorl $6,%r13d - andl %edi,%r15d - xorl %ebx,%r14d - addl %r13d,%eax - xorl %ecx,%r15d - rorl $2,%r14d - addl %eax,%r8d - addl %r15d,%eax - movl %r8d,%r13d - addl %eax,%r14d - movq 64+0(%rsp),%rdi - movl %r14d,%eax - - addl 0(%rdi),%eax - leaq 64(%rsi),%rsi - addl 4(%rdi),%ebx - addl 8(%rdi),%ecx - addl 12(%rdi),%edx - addl 16(%rdi),%r8d - addl 20(%rdi),%r9d - addl 24(%rdi),%r10d - addl 28(%rdi),%r11d - - cmpq 64+16(%rsp),%rsi - - movl %eax,0(%rdi) - movl %ebx,4(%rdi) - movl %ecx,8(%rdi) - movl %edx,12(%rdi) - movl %r8d,16(%rdi) - movl %r9d,20(%rdi) - movl %r10d,24(%rdi) - movl %r11d,28(%rdi) - jb .Lloop_ssse3 - - movq 64+24(%rsp),%rsi - movq (%rsi),%r15 - movq 8(%rsi),%r14 - movq 16(%rsi),%r13 - movq 24(%rsi),%r12 - movq 32(%rsi),%rbp - movq 40(%rsi),%rbx - leaq 48(%rsi),%rsp -.Lepilogue_ssse3: - .byte 0xf3,0xc3 -.size sha256_block_data_order_ssse3,.-sha256_block_data_order_ssse3 - .section .note.GNU-stack,"",%progbits diff --git a/lib/accelerated/x86/macosx/aes-ssse3-x86.s b/lib/accelerated/x86/macosx/aes-ssse3-x86.s index 2f498875d6..33f33fd4f3 100644 --- a/lib/accelerated/x86/macosx/aes-ssse3-x86.s +++ b/lib/accelerated/x86/macosx/aes-ssse3-x86.s @@ -82,33 +82,33 @@ __vpaes_encrypt_core: movdqa %xmm6,%xmm1 movdqa (%ebp),%xmm2 pandn %xmm0,%xmm1 - pand %xmm6,%xmm0 movdqu (%edx),%xmm5 + psrld $4,%xmm1 + pand %xmm6,%xmm0 .byte 102,15,56,0,208 movdqa 16(%ebp),%xmm0 +.byte 102,15,56,0,193 pxor %xmm5,%xmm2 - psrld $4,%xmm1 + pxor %xmm2,%xmm0 addl $16,%edx -.byte 102,15,56,0,193 leal 192(%ebp),%ebx - pxor %xmm2,%xmm0 jmp L000enc_entry .align 4,0x90 L001enc_loop: movdqa 32(%ebp),%xmm4 - movdqa 48(%ebp),%xmm0 .byte 102,15,56,0,226 -.byte 102,15,56,0,195 pxor %xmm5,%xmm4 - movdqa 64(%ebp),%xmm5 + movdqa 48(%ebp),%xmm0 +.byte 102,15,56,0,195 pxor %xmm4,%xmm0 - movdqa -64(%ebx,%ecx,1),%xmm1 + movdqa 64(%ebp),%xmm5 .byte 102,15,56,0,234 + movdqa -64(%ebx,%ecx,1),%xmm1 movdqa 80(%ebp),%xmm2 - movdqa (%ebx,%ecx,1),%xmm4 .byte 102,15,56,0,211 - movdqa %xmm0,%xmm3 pxor %xmm5,%xmm2 + movdqa (%ebx,%ecx,1),%xmm4 + movdqa %xmm0,%xmm3 .byte 102,15,56,0,193 addl $16,%edx pxor %xmm2,%xmm0 @@ -117,28 +117,28 @@ L001enc_loop: pxor %xmm0,%xmm3 .byte 102,15,56,0,193 andl $48,%ecx - subl $1,%eax pxor %xmm3,%xmm0 + subl $1,%eax L000enc_entry: movdqa %xmm6,%xmm1 - movdqa -32(%ebp),%xmm5 pandn %xmm0,%xmm1 psrld $4,%xmm1 pand %xmm6,%xmm0 + movdqa -32(%ebp),%xmm5 .byte 102,15,56,0,232 - movdqa %xmm7,%xmm3 pxor %xmm1,%xmm0 + movdqa %xmm7,%xmm3 .byte 102,15,56,0,217 - movdqa %xmm7,%xmm4 pxor %xmm5,%xmm3 + movdqa %xmm7,%xmm4 .byte 102,15,56,0,224 - movdqa %xmm7,%xmm2 pxor %xmm5,%xmm4 + movdqa %xmm7,%xmm2 .byte 102,15,56,0,211 - movdqa %xmm7,%xmm3 pxor %xmm0,%xmm2 -.byte 102,15,56,0,220 + movdqa %xmm7,%xmm3 movdqu (%edx),%xmm5 +.byte 102,15,56,0,220 pxor %xmm1,%xmm3 jnz L001enc_loop movdqa 96(%ebp),%xmm4 @@ -152,8 +152,8 @@ L000enc_entry: ret .align 4 __vpaes_decrypt_core: - leal 608(%ebp),%ebx movl 240(%edx),%eax + leal 608(%ebp),%ebx movdqa %xmm6,%xmm1 movdqa -64(%ebx),%xmm2 pandn %xmm0,%xmm1 @@ -176,56 +176,56 @@ __vpaes_decrypt_core: .align 4,0x90 L003dec_loop: movdqa -32(%ebx),%xmm4 - movdqa -16(%ebx),%xmm1 .byte 102,15,56,0,226 -.byte 102,15,56,0,203 + pxor %xmm0,%xmm4 + movdqa -16(%ebx),%xmm0 +.byte 102,15,56,0,195 pxor %xmm4,%xmm0 + addl $16,%edx +.byte 102,15,56,0,197 movdqa (%ebx),%xmm4 - pxor %xmm1,%xmm0 - movdqa 16(%ebx),%xmm1 .byte 102,15,56,0,226 -.byte 102,15,56,0,197 -.byte 102,15,56,0,203 + pxor %xmm0,%xmm4 + movdqa 16(%ebx),%xmm0 +.byte 102,15,56,0,195 pxor %xmm4,%xmm0 + subl $1,%eax +.byte 102,15,56,0,197 movdqa 32(%ebx),%xmm4 - pxor %xmm1,%xmm0 - movdqa 48(%ebx),%xmm1 .byte 102,15,56,0,226 -.byte 102,15,56,0,197 -.byte 102,15,56,0,203 + pxor %xmm0,%xmm4 + movdqa 48(%ebx),%xmm0 +.byte 102,15,56,0,195 pxor %xmm4,%xmm0 +.byte 102,15,56,0,197 movdqa 64(%ebx),%xmm4 - pxor %xmm1,%xmm0 - movdqa 80(%ebx),%xmm1 .byte 102,15,56,0,226 -.byte 102,15,56,0,197 -.byte 102,15,56,0,203 + pxor %xmm0,%xmm4 + movdqa 80(%ebx),%xmm0 +.byte 102,15,56,0,195 pxor %xmm4,%xmm0 - addl $16,%edx .byte 102,15,58,15,237,12 - pxor %xmm1,%xmm0 - subl $1,%eax L002dec_entry: movdqa %xmm6,%xmm1 - movdqa -32(%ebp),%xmm2 pandn %xmm0,%xmm1 - pand %xmm6,%xmm0 psrld $4,%xmm1 + pand %xmm6,%xmm0 + movdqa -32(%ebp),%xmm2 .byte 102,15,56,0,208 - movdqa %xmm7,%xmm3 pxor %xmm1,%xmm0 + movdqa %xmm7,%xmm3 .byte 102,15,56,0,217 - movdqa %xmm7,%xmm4 pxor %xmm2,%xmm3 + movdqa %xmm7,%xmm4 .byte 102,15,56,0,224 pxor %xmm2,%xmm4 movdqa %xmm7,%xmm2 .byte 102,15,56,0,211 - movdqa %xmm7,%xmm3 pxor %xmm0,%xmm2 + movdqa %xmm7,%xmm3 .byte 102,15,56,0,220 - movdqu (%edx),%xmm0 pxor %xmm1,%xmm3 + movdqu (%edx),%xmm0 jnz L003dec_loop movdqa 96(%ebx),%xmm4 .byte 102,15,56,0,226 @@ -330,12 +330,12 @@ L013schedule_mangle_last_dec: ret .align 4 __vpaes_schedule_192_smear: - pshufd $128,%xmm6,%xmm1 + pshufd $128,%xmm6,%xmm0 + pxor %xmm0,%xmm6 pshufd $254,%xmm7,%xmm0 - pxor %xmm1,%xmm6 - pxor %xmm1,%xmm1 pxor %xmm0,%xmm6 movdqa %xmm6,%xmm0 + pxor %xmm1,%xmm1 movhlps %xmm1,%xmm6 ret .align 4 @@ -588,8 +588,6 @@ L_vpaes_cbc_encrypt_begin: movl 24(%esp),%edi movl 28(%esp),%eax movl 32(%esp),%edx - subl $16,%eax - jc L020cbc_abort leal -56(%esp),%ebx movl 36(%esp),%ebp andl $-16,%ebx @@ -599,17 +597,18 @@ L_vpaes_cbc_encrypt_begin: subl %esi,%edi movl %ebx,48(%esp) movl %edi,(%esp) + subl $16,%eax movl %edx,4(%esp) movl %ebp,8(%esp) movl %eax,%edi - leal L_vpaes_consts+0x30-L021pic_point,%ebp + leal L_vpaes_consts+0x30-L020pic_point,%ebp call __vpaes_preheat -L021pic_point: +L020pic_point: cmpl $0,%ecx - je L022cbc_dec_loop - jmp L023cbc_enc_loop + je L021cbc_dec_loop + jmp L022cbc_enc_loop .align 4,0x90 -L023cbc_enc_loop: +L022cbc_enc_loop: movdqu (%esi),%xmm0 pxor %xmm1,%xmm0 call __vpaes_encrypt_core @@ -619,10 +618,10 @@ L023cbc_enc_loop: movdqu %xmm0,(%ebx,%esi,1) leal 16(%esi),%esi subl $16,%edi - jnc L023cbc_enc_loop - jmp L024cbc_done + jnc L022cbc_enc_loop + jmp L023cbc_done .align 4,0x90 -L022cbc_dec_loop: +L021cbc_dec_loop: movdqu (%esi),%xmm0 movdqa %xmm1,16(%esp) movdqa %xmm0,32(%esp) @@ -634,16 +633,14 @@ L022cbc_dec_loop: movdqu %xmm0,(%ebx,%esi,1) leal 16(%esi),%esi subl $16,%edi - jnc L022cbc_dec_loop -L024cbc_done: + jnc L021cbc_dec_loop +L023cbc_done: movl 8(%esp),%ebx movl 48(%esp),%esp movdqu %xmm1,(%ebx) -L020cbc_abort: popl %edi popl %esi popl %ebx popl %ebp ret -.section .note.GNU-stack,"",%progbits diff --git a/lib/accelerated/x86/macosx/aes-ssse3-x86_64.s b/lib/accelerated/x86/macosx/aes-ssse3-x86_64.s index b5c775a566..1fff24b967 100644 --- a/lib/accelerated/x86/macosx/aes-ssse3-x86_64.s +++ b/lib/accelerated/x86/macosx/aes-ssse3-x86_64.s @@ -43,8 +43,8 @@ _vpaes_encrypt_core: movdqa L$k_ipt+16(%rip),%xmm0 .byte 102,15,56,0,193 pxor %xmm5,%xmm2 - addq $16,%r9 pxor %xmm2,%xmm0 + addq $16,%r9 leaq L$k_mc_backward(%rip),%r10 jmp L$enc_entry @@ -52,19 +52,19 @@ _vpaes_encrypt_core: L$enc_loop: movdqa %xmm13,%xmm4 - movdqa %xmm12,%xmm0 .byte 102,15,56,0,226 -.byte 102,15,56,0,195 pxor %xmm5,%xmm4 - movdqa %xmm15,%xmm5 + movdqa %xmm12,%xmm0 +.byte 102,15,56,0,195 pxor %xmm4,%xmm0 - movdqa -64(%r11,%r10,1),%xmm1 + movdqa %xmm15,%xmm5 .byte 102,15,56,0,234 - movdqa (%r11,%r10,1),%xmm4 + movdqa -64(%r11,%r10,1),%xmm1 movdqa %xmm14,%xmm2 .byte 102,15,56,0,211 - movdqa %xmm0,%xmm3 pxor %xmm5,%xmm2 + movdqa (%r11,%r10,1),%xmm4 + movdqa %xmm0,%xmm3 .byte 102,15,56,0,193 addq $16,%r9 pxor %xmm2,%xmm0 @@ -73,30 +73,30 @@ L$enc_loop: pxor %xmm0,%xmm3 .byte 102,15,56,0,193 andq $48,%r11 - subq $1,%rax pxor %xmm3,%xmm0 + subq $1,%rax L$enc_entry: movdqa %xmm9,%xmm1 - movdqa %xmm11,%xmm5 pandn %xmm0,%xmm1 psrld $4,%xmm1 pand %xmm9,%xmm0 + movdqa %xmm11,%xmm5 .byte 102,15,56,0,232 - movdqa %xmm10,%xmm3 pxor %xmm1,%xmm0 + movdqa %xmm10,%xmm3 .byte 102,15,56,0,217 - movdqa %xmm10,%xmm4 pxor %xmm5,%xmm3 + movdqa %xmm10,%xmm4 .byte 102,15,56,0,224 - movdqa %xmm10,%xmm2 pxor %xmm5,%xmm4 + movdqa %xmm10,%xmm2 .byte 102,15,56,0,211 - movdqa %xmm10,%xmm3 pxor %xmm0,%xmm2 -.byte 102,15,56,0,220 + movdqa %xmm10,%xmm3 movdqu (%r9),%xmm5 +.byte 102,15,56,0,220 pxor %xmm1,%xmm3 jnz L$enc_loop @@ -149,61 +149,62 @@ L$dec_loop: movdqa -32(%r10),%xmm4 - movdqa -16(%r10),%xmm1 .byte 102,15,56,0,226 -.byte 102,15,56,0,203 + pxor %xmm0,%xmm4 + movdqa -16(%r10),%xmm0 +.byte 102,15,56,0,195 pxor %xmm4,%xmm0 - movdqa 0(%r10),%xmm4 - pxor %xmm1,%xmm0 - movdqa 16(%r10),%xmm1 + addq $16,%r9 -.byte 102,15,56,0,226 .byte 102,15,56,0,197 -.byte 102,15,56,0,203 + movdqa 0(%r10),%xmm4 +.byte 102,15,56,0,226 + pxor %xmm0,%xmm4 + movdqa 16(%r10),%xmm0 +.byte 102,15,56,0,195 pxor %xmm4,%xmm0 - movdqa 32(%r10),%xmm4 - pxor %xmm1,%xmm0 - movdqa 48(%r10),%xmm1 + subq $1,%rax -.byte 102,15,56,0,226 .byte 102,15,56,0,197 -.byte 102,15,56,0,203 + movdqa 32(%r10),%xmm4 +.byte 102,15,56,0,226 + pxor %xmm0,%xmm4 + movdqa 48(%r10),%xmm0 +.byte 102,15,56,0,195 pxor %xmm4,%xmm0 - movdqa 64(%r10),%xmm4 - pxor %xmm1,%xmm0 - movdqa 80(%r10),%xmm1 -.byte 102,15,56,0,226 .byte 102,15,56,0,197 -.byte 102,15,56,0,203 + movdqa 64(%r10),%xmm4 +.byte 102,15,56,0,226 + pxor %xmm0,%xmm4 + movdqa 80(%r10),%xmm0 +.byte 102,15,56,0,195 pxor %xmm4,%xmm0 - addq $16,%r9 + .byte 102,15,58,15,237,12 - pxor %xmm1,%xmm0 - subq $1,%rax L$dec_entry: movdqa %xmm9,%xmm1 pandn %xmm0,%xmm1 - movdqa %xmm11,%xmm2 psrld $4,%xmm1 pand %xmm9,%xmm0 + movdqa %xmm11,%xmm2 .byte 102,15,56,0,208 - movdqa %xmm10,%xmm3 pxor %xmm1,%xmm0 + movdqa %xmm10,%xmm3 .byte 102,15,56,0,217 - movdqa %xmm10,%xmm4 pxor %xmm2,%xmm3 + movdqa %xmm10,%xmm4 .byte 102,15,56,0,224 pxor %xmm2,%xmm4 movdqa %xmm10,%xmm2 .byte 102,15,56,0,211 - movdqa %xmm10,%xmm3 pxor %xmm0,%xmm2 + movdqa %xmm10,%xmm3 .byte 102,15,56,0,220 - movdqu (%r9),%xmm0 pxor %xmm1,%xmm3 + movdqu (%r9),%xmm0 jnz L$dec_loop @@ -211,7 +212,7 @@ L$dec_entry: .byte 102,15,56,0,226 pxor %xmm0,%xmm4 movdqa 112(%r10),%xmm0 - movdqa -352(%r11),%xmm2 + movdqa L$k_sr-L$k_dsbd(%r11),%xmm2 .byte 102,15,56,0,195 pxor %xmm4,%xmm0 .byte 102,15,56,0,194 @@ -231,7 +232,7 @@ _vpaes_schedule_core: - call _vpaes_preheat + call _vpaes_preheat movdqa L$k_rcon(%rip),%xmm8 movdqu (%rdi),%xmm0 @@ -277,7 +278,7 @@ L$oop_schedule_128: call _vpaes_schedule_round decq %rsi jz L$schedule_mangle_last - call _vpaes_schedule_mangle + call _vpaes_schedule_mangle jmp L$oop_schedule_128 @@ -298,7 +299,7 @@ L$oop_schedule_128: .p2align 4 L$schedule_192: movdqu 8(%rdi),%xmm0 - call _vpaes_schedule_transform + call _vpaes_schedule_transform movdqa %xmm0,%xmm6 pxor %xmm4,%xmm4 movhlps %xmm4,%xmm6 @@ -307,13 +308,13 @@ L$schedule_192: L$oop_schedule_192: call _vpaes_schedule_round .byte 102,15,58,15,198,8 - call _vpaes_schedule_mangle + call _vpaes_schedule_mangle call _vpaes_schedule_192_smear - call _vpaes_schedule_mangle + call _vpaes_schedule_mangle call _vpaes_schedule_round decq %rsi jz L$schedule_mangle_last - call _vpaes_schedule_mangle + call _vpaes_schedule_mangle call _vpaes_schedule_192_smear jmp L$oop_schedule_192 @@ -330,18 +331,18 @@ L$oop_schedule_192: .p2align 4 L$schedule_256: movdqu 16(%rdi),%xmm0 - call _vpaes_schedule_transform + call _vpaes_schedule_transform movl $7,%esi L$oop_schedule_256: - call _vpaes_schedule_mangle + call _vpaes_schedule_mangle movdqa %xmm0,%xmm6 call _vpaes_schedule_round decq %rsi jz L$schedule_mangle_last - call _vpaes_schedule_mangle + call _vpaes_schedule_mangle pshufd $255,%xmm0,%xmm0 @@ -379,7 +380,7 @@ L$schedule_mangle_last: L$schedule_mangle_last_dec: addq $-16,%rdx pxor L$k_s63(%rip),%xmm0 - call _vpaes_schedule_transform + call _vpaes_schedule_transform movdqu %xmm0,(%rdx) @@ -411,12 +412,12 @@ L$schedule_mangle_last_dec: .p2align 4 _vpaes_schedule_192_smear: - pshufd $128,%xmm6,%xmm1 + pshufd $128,%xmm6,%xmm0 + pxor %xmm0,%xmm6 pshufd $254,%xmm7,%xmm0 - pxor %xmm1,%xmm6 - pxor %xmm1,%xmm1 pxor %xmm0,%xmm6 movdqa %xmm6,%xmm0 + pxor %xmm1,%xmm1 movhlps %xmm1,%xmm6 .byte 0xf3,0xc3 @@ -679,10 +680,9 @@ _vpaes_decrypt: .p2align 4 _vpaes_cbc_encrypt: xchgq %rcx,%rdx - subq $16,%rcx - jc L$cbc_abort movdqu (%r8),%xmm6 subq %rdi,%rsi + subq $16,%rcx call _vpaes_preheat cmpl $0,%r9d je L$cbc_dec_loop @@ -711,7 +711,6 @@ L$cbc_dec_loop: jnc L$cbc_dec_loop L$cbc_done: movdqu %xmm6,(%r8) -L$cbc_abort: .byte 0xf3,0xc3 @@ -838,4 +837,3 @@ L$k_dsbo: .p2align 6 -.section .note.GNU-stack,"",%progbits diff --git a/lib/accelerated/x86/macosx/aesni-x86.s b/lib/accelerated/x86/macosx/aesni-x86.s index 58d58d64c1..09ca1cbc5c 100644 --- a/lib/accelerated/x86/macosx/aesni-x86.s +++ b/lib/accelerated/x86/macosx/aesni-x86.s @@ -2145,4 +2145,3 @@ L100dec_key_ret: .byte 32,98,121,32,60,97,112,112,114,111,64,111,112,101,110,115 .byte 115,108,46,111,114,103,62,0 -.section .note.GNU-stack,"",%progbits diff --git a/lib/accelerated/x86/macosx/aesni-x86_64.s b/lib/accelerated/x86/macosx/aesni-x86_64.s index 2cf61b487d..7089c7c9d6 100644 --- a/lib/accelerated/x86/macosx/aesni-x86_64.s +++ b/lib/accelerated/x86/macosx/aesni-x86_64.s @@ -53,7 +53,7 @@ L$oop_enc1_1: decl %eax movups (%rdx),%xmm1 leaq 16(%rdx),%rdx - jnz L$oop_enc1_1 + jnz L$oop_enc1_1 .byte 102,15,56,221,209 movups %xmm2,(%rsi) .byte 0xf3,0xc3 @@ -74,7 +74,7 @@ L$oop_dec1_2: decl %eax movups (%rdx),%xmm1 leaq 16(%rdx),%rdx - jnz L$oop_dec1_2 + jnz L$oop_dec1_2 .byte 102,15,56,223,209 movups %xmm2,(%rsi) .byte 0xf3,0xc3 @@ -583,7 +583,7 @@ L$oop_enc1_3: decl %eax movups (%rcx),%xmm1 leaq 16(%rcx),%rcx - jnz L$oop_enc1_3 + jnz L$oop_enc1_3 .byte 102,15,56,221,209 movups %xmm2,(%rsi) jmp L$ecb_ret @@ -728,7 +728,7 @@ L$oop_dec1_4: decl %eax movups (%rcx),%xmm1 leaq 16(%rcx),%rcx - jnz L$oop_dec1_4 + jnz L$oop_dec1_4 .byte 102,15,56,223,209 movups %xmm2,(%rsi) jmp L$ecb_ret @@ -857,7 +857,7 @@ L$oop_enc1_5: decl %eax movups (%rcx),%xmm1 leaq 16(%rcx),%rcx - jnz L$oop_enc1_5 + jnz L$oop_enc1_5 .byte 102,15,56,221,209 movups (%rdi),%xmm8 paddq %xmm6,%xmm9 @@ -916,7 +916,7 @@ L$oop_enc1_6: decl %eax movups (%r11),%xmm1 leaq 16(%r11),%r11 - jnz L$oop_enc1_6 + jnz L$oop_enc1_6 .byte 102,15,56,221,217 movups %xmm3,(%r9) .byte 0xf3,0xc3 @@ -925,412 +925,199 @@ L$oop_enc1_6: .p2align 4 _aesni_ctr32_encrypt_blocks: - leaq (%rsp),%rax - pushq %rbp - subq $128,%rsp - andq $-16,%rsp - leaq -8(%rax),%rbp - cmpq $1,%rdx je L$ctr32_one_shortcut - movdqu (%r8),%xmm2 - movdqu (%rcx),%xmm0 - movl 12(%r8),%r8d - pxor %xmm0,%xmm2 - movl 12(%rcx),%r11d - movdqa %xmm2,0(%rsp) - bswapl %r8d - movdqa %xmm2,%xmm3 - movdqa %xmm2,%xmm4 - movdqa %xmm2,%xmm5 - movdqa %xmm2,64(%rsp) - movdqa %xmm2,80(%rsp) - movdqa %xmm2,96(%rsp) - movdqa %xmm2,112(%rsp) + movdqu (%r8),%xmm14 + movdqa L$bswap_mask(%rip),%xmm15 + xorl %eax,%eax +.byte 102,69,15,58,22,242,3 +.byte 102,68,15,58,34,240,3 movl 240(%rcx),%eax - - leaq 1(%r8),%r9 - leaq 2(%r8),%r10 - bswapl %r9d - bswapl %r10d - xorl %r11d,%r9d - xorl %r11d,%r10d -.byte 102,65,15,58,34,217,3 - leaq 3(%r8),%r9 - movdqa %xmm3,16(%rsp) -.byte 102,65,15,58,34,226,3 - bswapl %r9d - leaq 4(%r8),%r10 - movdqa %xmm4,32(%rsp) - xorl %r11d,%r9d bswapl %r10d -.byte 102,65,15,58,34,233,3 - xorl %r11d,%r10d - movdqa %xmm5,48(%rsp) - leaq 5(%r8),%r9 - movl %r10d,64+12(%rsp) - bswapl %r9d - leaq 6(%r8),%r10 - xorl %r11d,%r9d - bswapl %r10d - movl %r9d,80+12(%rsp) - xorl %r11d,%r10d - leaq 7(%r8),%r9 - movl %r10d,96+12(%rsp) - bswapl %r9d - xorl %r11d,%r9d - movl %r9d,112+12(%rsp) + pxor %xmm12,%xmm12 + pxor %xmm13,%xmm13 +.byte 102,69,15,58,34,226,0 + leaq 3(%r10),%r11 +.byte 102,69,15,58,34,235,0 + incl %r10d +.byte 102,69,15,58,34,226,1 + incq %r11 +.byte 102,69,15,58,34,235,1 + incl %r10d +.byte 102,69,15,58,34,226,2 + incq %r11 +.byte 102,69,15,58,34,235,2 + movdqa %xmm12,-40(%rsp) +.byte 102,69,15,56,0,231 + movdqa %xmm13,-24(%rsp) +.byte 102,69,15,56,0,239 + + pshufd $192,%xmm12,%xmm2 + pshufd $128,%xmm12,%xmm3 + pshufd $64,%xmm12,%xmm4 + cmpq $6,%rdx + jb L$ctr32_tail + shrl $1,%eax + movq %rcx,%r11 + movl %eax,%r10d + subq $6,%rdx + jmp L$ctr32_loop6 - movups 16(%rcx),%xmm1 +.p2align 4 +L$ctr32_loop6: + pshufd $192,%xmm13,%xmm5 + por %xmm14,%xmm2 + movups (%r11),%xmm0 + pshufd $128,%xmm13,%xmm6 + por %xmm14,%xmm3 + movups 16(%r11),%xmm1 + pshufd $64,%xmm13,%xmm7 + por %xmm14,%xmm4 + por %xmm14,%xmm5 + xorps %xmm0,%xmm2 + por %xmm14,%xmm6 + por %xmm14,%xmm7 - movdqa 64(%rsp),%xmm6 - movdqa 80(%rsp),%xmm7 - cmpq $8,%rdx - jb L$ctr32_tail - leaq 128(%rcx),%rcx - subq $8,%rdx - jmp L$ctr32_loop8 -.p2align 5 -L$ctr32_loop8: - addl $8,%r8d - movdqa 96(%rsp),%xmm8 -.byte 102,15,56,220,209 - movl %r8d,%r9d - movdqa 112(%rsp),%xmm9 -.byte 102,15,56,220,217 - bswapl %r9d - movups 32-128(%rcx),%xmm0 -.byte 102,15,56,220,225 - xorl %r11d,%r9d -.byte 102,15,56,220,233 - movl %r9d,0+12(%rsp) - leaq 1(%r8),%r9 -.byte 102,15,56,220,241 -.byte 102,15,56,220,249 -.byte 102,68,15,56,220,193 -.byte 102,68,15,56,220,201 - movups 48-128(%rcx),%xmm1 -.byte 102,15,56,220,208 -.byte 102,15,56,220,216 - bswapl %r9d -.byte 102,15,56,220,224 - xorl %r11d,%r9d -.byte 102,15,56,220,232 - movl %r9d,16+12(%rsp) - leaq 2(%r8),%r9 -.byte 102,15,56,220,240 -.byte 102,15,56,220,248 -.byte 102,68,15,56,220,192 -.byte 102,68,15,56,220,200 - movups 64-128(%rcx),%xmm0 -.byte 102,15,56,220,209 -.byte 102,15,56,220,217 - bswapl %r9d -.byte 102,15,56,220,225 - xorl %r11d,%r9d -.byte 102,15,56,220,233 - movl %r9d,32+12(%rsp) - leaq 3(%r8),%r9 -.byte 102,15,56,220,241 -.byte 102,15,56,220,249 -.byte 102,68,15,56,220,193 -.byte 102,68,15,56,220,201 - movups 80-128(%rcx),%xmm1 -.byte 102,15,56,220,208 -.byte 102,15,56,220,216 - bswapl %r9d -.byte 102,15,56,220,224 - xorl %r11d,%r9d -.byte 102,15,56,220,232 - movl %r9d,48+12(%rsp) - leaq 4(%r8),%r9 -.byte 102,15,56,220,240 -.byte 102,15,56,220,248 -.byte 102,68,15,56,220,192 -.byte 102,68,15,56,220,200 - movups 96-128(%rcx),%xmm0 -.byte 102,15,56,220,209 -.byte 102,15,56,220,217 - bswapl %r9d -.byte 102,15,56,220,225 - xorl %r11d,%r9d -.byte 102,15,56,220,233 - movl %r9d,64+12(%rsp) - leaq 5(%r8),%r9 -.byte 102,15,56,220,241 -.byte 102,15,56,220,249 -.byte 102,68,15,56,220,193 -.byte 102,68,15,56,220,201 - movups 112-128(%rcx),%xmm1 -.byte 102,15,56,220,208 -.byte 102,15,56,220,216 - bswapl %r9d -.byte 102,15,56,220,224 - xorl %r11d,%r9d -.byte 102,15,56,220,232 - movl %r9d,80+12(%rsp) - leaq 6(%r8),%r9 -.byte 102,15,56,220,240 -.byte 102,15,56,220,248 -.byte 102,68,15,56,220,192 -.byte 102,68,15,56,220,200 - movups 128-128(%rcx),%xmm0 + pxor %xmm0,%xmm3 .byte 102,15,56,220,209 + leaq 32(%r11),%rcx + pxor %xmm0,%xmm4 .byte 102,15,56,220,217 - bswapl %r9d + movdqa L$increment32(%rip),%xmm13 + pxor %xmm0,%xmm5 .byte 102,15,56,220,225 - xorl %r11d,%r9d + movdqa -40(%rsp),%xmm12 + pxor %xmm0,%xmm6 .byte 102,15,56,220,233 - movl %r9d,96+12(%rsp) - leaq 7(%r8),%r9 + pxor %xmm0,%xmm7 + movups (%rcx),%xmm0 + decl %eax .byte 102,15,56,220,241 .byte 102,15,56,220,249 -.byte 102,68,15,56,220,193 -.byte 102,68,15,56,220,201 - movups 144-128(%rcx),%xmm1 -.byte 102,15,56,220,208 -.byte 102,15,56,220,216 - bswapl %r9d -.byte 102,15,56,220,224 - xorl %r11d,%r9d -.byte 102,15,56,220,232 - movl %r9d,112+12(%rsp) -.byte 102,15,56,220,240 -.byte 102,15,56,220,248 -.byte 102,68,15,56,220,192 - movdqu 0(%rdi),%xmm10 -.byte 102,68,15,56,220,200 - movups 160-128(%rcx),%xmm0 - - cmpl $11,%eax - jb L$ctr32_enc_done - + jmp L$ctr32_enc_loop6_enter +.p2align 4 +L$ctr32_enc_loop6: .byte 102,15,56,220,209 .byte 102,15,56,220,217 + decl %eax .byte 102,15,56,220,225 .byte 102,15,56,220,233 .byte 102,15,56,220,241 .byte 102,15,56,220,249 -.byte 102,68,15,56,220,193 -.byte 102,68,15,56,220,201 - movups 176-128(%rcx),%xmm1 - +L$ctr32_enc_loop6_enter: + movups 16(%rcx),%xmm1 .byte 102,15,56,220,208 .byte 102,15,56,220,216 + leaq 32(%rcx),%rcx .byte 102,15,56,220,224 .byte 102,15,56,220,232 .byte 102,15,56,220,240 .byte 102,15,56,220,248 -.byte 102,68,15,56,220,192 -.byte 102,68,15,56,220,200 - movups 192-128(%rcx),%xmm0 - je L$ctr32_enc_done - -.byte 102,15,56,220,209 -.byte 102,15,56,220,217 -.byte 102,15,56,220,225 -.byte 102,15,56,220,233 -.byte 102,15,56,220,241 -.byte 102,15,56,220,249 -.byte 102,68,15,56,220,193 -.byte 102,68,15,56,220,201 - movups 208-128(%rcx),%xmm1 + movups (%rcx),%xmm0 + jnz L$ctr32_enc_loop6 -.byte 102,15,56,220,208 -.byte 102,15,56,220,216 -.byte 102,15,56,220,224 -.byte 102,15,56,220,232 -.byte 102,15,56,220,240 -.byte 102,15,56,220,248 -.byte 102,68,15,56,220,192 -.byte 102,68,15,56,220,200 - movups 224-128(%rcx),%xmm0 - -L$ctr32_enc_done: - movdqu 16(%rdi),%xmm11 - pxor %xmm0,%xmm10 - movdqu 32(%rdi),%xmm12 - pxor %xmm0,%xmm11 - movdqu 48(%rdi),%xmm13 - pxor %xmm0,%xmm12 - movdqu 64(%rdi),%xmm14 - pxor %xmm0,%xmm13 - movdqu 80(%rdi),%xmm15 - pxor %xmm0,%xmm14 .byte 102,15,56,220,209 - pxor %xmm0,%xmm15 + paddd %xmm13,%xmm12 .byte 102,15,56,220,217 + paddd -24(%rsp),%xmm13 .byte 102,15,56,220,225 + movdqa %xmm12,-40(%rsp) .byte 102,15,56,220,233 + movdqa %xmm13,-24(%rsp) .byte 102,15,56,220,241 +.byte 102,69,15,56,0,231 .byte 102,15,56,220,249 -.byte 102,68,15,56,220,193 -.byte 102,68,15,56,220,201 - movdqu 96(%rdi),%xmm1 - -.byte 102,65,15,56,221,210 - pxor %xmm0,%xmm1 - movdqu 112(%rdi),%xmm10 - leaq 128(%rdi),%rdi -.byte 102,65,15,56,221,219 - pxor %xmm0,%xmm10 - movdqa 0(%rsp),%xmm11 -.byte 102,65,15,56,221,228 - movdqa 16(%rsp),%xmm12 -.byte 102,65,15,56,221,237 - movdqa 32(%rsp),%xmm13 -.byte 102,65,15,56,221,246 - movdqa 48(%rsp),%xmm14 -.byte 102,65,15,56,221,255 - movdqa 64(%rsp),%xmm15 -.byte 102,68,15,56,221,193 - movdqa 80(%rsp),%xmm0 -.byte 102,69,15,56,221,202 - movups 16-128(%rcx),%xmm1 +.byte 102,69,15,56,0,239 - movups %xmm2,(%rsi) - movdqa %xmm11,%xmm2 - movups %xmm3,16(%rsi) - movdqa %xmm12,%xmm3 - movups %xmm4,32(%rsi) - movdqa %xmm13,%xmm4 - movups %xmm5,48(%rsi) - movdqa %xmm14,%xmm5 - movups %xmm6,64(%rsi) - movdqa %xmm15,%xmm6 - movups %xmm7,80(%rsi) - movdqa %xmm0,%xmm7 - movups %xmm8,96(%rsi) - movups %xmm9,112(%rsi) - leaq 128(%rsi),%rsi +.byte 102,15,56,221,208 + movups (%rdi),%xmm8 +.byte 102,15,56,221,216 + movups 16(%rdi),%xmm9 +.byte 102,15,56,221,224 + movups 32(%rdi),%xmm10 +.byte 102,15,56,221,232 + movups 48(%rdi),%xmm11 +.byte 102,15,56,221,240 + movups 64(%rdi),%xmm1 +.byte 102,15,56,221,248 + movups 80(%rdi),%xmm0 + leaq 96(%rdi),%rdi - subq $8,%rdx - jnc L$ctr32_loop8 + xorps %xmm2,%xmm8 + pshufd $192,%xmm12,%xmm2 + xorps %xmm3,%xmm9 + pshufd $128,%xmm12,%xmm3 + movups %xmm8,(%rsi) + xorps %xmm4,%xmm10 + pshufd $64,%xmm12,%xmm4 + movups %xmm9,16(%rsi) + xorps %xmm5,%xmm11 + movups %xmm10,32(%rsi) + xorps %xmm6,%xmm1 + movups %xmm11,48(%rsi) + xorps %xmm7,%xmm0 + movups %xmm1,64(%rsi) + movups %xmm0,80(%rsi) + leaq 96(%rsi),%rsi + movl %r10d,%eax + subq $6,%rdx + jnc L$ctr32_loop6 - addq $8,%rdx + addq $6,%rdx jz L$ctr32_done - leaq -128(%rcx),%rcx + movq %r11,%rcx + leal 1(%rax,%rax,1),%eax L$ctr32_tail: - leaq 16(%rcx),%rcx - cmpq $4,%rdx - jb L$ctr32_loop3 - je L$ctr32_loop4 - - movdqa 96(%rsp),%xmm8 - pxor %xmm9,%xmm9 - - movups 16(%rcx),%xmm0 -.byte 102,15,56,220,209 - leaq 16(%rcx),%rcx -.byte 102,15,56,220,217 - shrl $1,%eax -.byte 102,15,56,220,225 - decl %eax -.byte 102,15,56,220,233 - movups (%rdi),%xmm10 -.byte 102,15,56,220,241 - movups 16(%rdi),%xmm11 -.byte 102,15,56,220,249 - movups 32(%rdi),%xmm12 -.byte 102,68,15,56,220,193 - movups 16(%rcx),%xmm1 - - call L$enc_loop8_enter - - movdqu 48(%rdi),%xmm13 - pxor %xmm10,%xmm2 - movdqu 64(%rdi),%xmm10 - pxor %xmm11,%xmm3 - movdqu %xmm2,(%rsi) - pxor %xmm12,%xmm4 - movdqu %xmm3,16(%rsi) - pxor %xmm13,%xmm5 - movdqu %xmm4,32(%rsi) - pxor %xmm10,%xmm6 - movdqu %xmm5,48(%rsi) - movdqu %xmm6,64(%rsi) - cmpq $6,%rdx - jb L$ctr32_done - - movups 80(%rdi),%xmm11 - xorps %xmm11,%xmm7 - movups %xmm7,80(%rsi) - je L$ctr32_done - - movups 96(%rdi),%xmm12 - xorps %xmm12,%xmm8 - movups %xmm8,96(%rsi) - jmp L$ctr32_done + por %xmm14,%xmm2 + movups (%rdi),%xmm8 + cmpq $2,%rdx + jb L$ctr32_one -.p2align 5 -L$ctr32_loop4: -.byte 102,15,56,220,209 - leaq 16(%rcx),%rcx -.byte 102,15,56,220,217 -.byte 102,15,56,220,225 -.byte 102,15,56,220,233 - movups (%rcx),%xmm1 - decl %eax - jnz L$ctr32_loop4 -.byte 102,15,56,221,209 - movups (%rdi),%xmm10 -.byte 102,15,56,221,217 - movups 16(%rdi),%xmm11 -.byte 102,15,56,221,225 - movups 32(%rdi),%xmm12 -.byte 102,15,56,221,233 - movups 48(%rdi),%xmm13 + por %xmm14,%xmm3 + movups 16(%rdi),%xmm9 + je L$ctr32_two - xorps %xmm10,%xmm2 - movups %xmm2,(%rsi) - xorps %xmm11,%xmm3 - movups %xmm3,16(%rsi) - pxor %xmm12,%xmm4 - movdqu %xmm4,32(%rsi) - pxor %xmm13,%xmm5 - movdqu %xmm5,48(%rsi) - jmp L$ctr32_done + pshufd $192,%xmm13,%xmm5 + por %xmm14,%xmm4 + movups 32(%rdi),%xmm10 + cmpq $4,%rdx + jb L$ctr32_three -.p2align 5 -L$ctr32_loop3: -.byte 102,15,56,220,209 - leaq 16(%rcx),%rcx -.byte 102,15,56,220,217 -.byte 102,15,56,220,225 - movups (%rcx),%xmm1 - decl %eax - jnz L$ctr32_loop3 -.byte 102,15,56,221,209 -.byte 102,15,56,221,217 -.byte 102,15,56,221,225 + pshufd $128,%xmm13,%xmm6 + por %xmm14,%xmm5 + movups 48(%rdi),%xmm11 + je L$ctr32_four - movups (%rdi),%xmm10 - xorps %xmm10,%xmm2 - movups %xmm2,(%rsi) - cmpq $2,%rdx - jb L$ctr32_done + por %xmm14,%xmm6 + xorps %xmm7,%xmm7 - movups 16(%rdi),%xmm11 - xorps %xmm11,%xmm3 - movups %xmm3,16(%rsi) - je L$ctr32_done + call _aesni_encrypt6 - movups 32(%rdi),%xmm12 - xorps %xmm12,%xmm4 - movups %xmm4,32(%rsi) + movups 64(%rdi),%xmm1 + xorps %xmm2,%xmm8 + xorps %xmm3,%xmm9 + movups %xmm8,(%rsi) + xorps %xmm4,%xmm10 + movups %xmm9,16(%rsi) + xorps %xmm5,%xmm11 + movups %xmm10,32(%rsi) + xorps %xmm6,%xmm1 + movups %xmm11,48(%rsi) + movups %xmm1,64(%rsi) jmp L$ctr32_done .p2align 4 L$ctr32_one_shortcut: movups (%r8),%xmm2 - movups (%rdi),%xmm10 + movups (%rdi),%xmm8 movl 240(%rcx),%eax +L$ctr32_one: movups (%rcx),%xmm0 movups 16(%rcx),%xmm1 leaq 32(%rcx),%rcx @@ -1340,28 +1127,53 @@ L$oop_enc1_7: decl %eax movups (%rcx),%xmm1 leaq 16(%rcx),%rcx - jnz L$oop_enc1_7 + jnz L$oop_enc1_7 .byte 102,15,56,221,209 - xorps %xmm10,%xmm2 - movups %xmm2,(%rsi) + xorps %xmm2,%xmm8 + movups %xmm8,(%rsi) + jmp L$ctr32_done + +.p2align 4 +L$ctr32_two: + xorps %xmm4,%xmm4 + call _aesni_encrypt3 + xorps %xmm2,%xmm8 + xorps %xmm3,%xmm9 + movups %xmm8,(%rsi) + movups %xmm9,16(%rsi) + jmp L$ctr32_done + +.p2align 4 +L$ctr32_three: + call _aesni_encrypt3 + xorps %xmm2,%xmm8 + xorps %xmm3,%xmm9 + movups %xmm8,(%rsi) + xorps %xmm4,%xmm10 + movups %xmm9,16(%rsi) + movups %xmm10,32(%rsi) jmp L$ctr32_done .p2align 4 +L$ctr32_four: + call _aesni_encrypt4 + xorps %xmm2,%xmm8 + xorps %xmm3,%xmm9 + movups %xmm8,(%rsi) + xorps %xmm4,%xmm10 + movups %xmm9,16(%rsi) + xorps %xmm5,%xmm11 + movups %xmm10,32(%rsi) + movups %xmm11,48(%rsi) + L$ctr32_done: - leaq (%rbp),%rsp - popq %rbp -L$ctr32_epilogue: .byte 0xf3,0xc3 .globl _aesni_xts_encrypt .p2align 4 _aesni_xts_encrypt: - leaq (%rsp),%rax - pushq %rbp - subq $112,%rsp - andq $-16,%rsp - leaq -8(%rax),%rbp + leaq -104(%rsp),%rsp movups (%r9),%xmm15 movl 240(%r8),%eax movl 240(%rcx),%r10d @@ -1374,268 +1186,230 @@ L$oop_enc1_8: decl %eax movups (%r8),%xmm1 leaq 16(%r8),%r8 - jnz L$oop_enc1_8 + jnz L$oop_enc1_8 .byte 102,68,15,56,221,249 - movups (%rcx),%xmm0 movq %rcx,%r11 movl %r10d,%eax - shll $4,%r10d movq %rdx,%r9 andq $-16,%rdx - movups 16(%rcx,%r10,1),%xmm1 - movl %eax,%r10d - movdqa L$xts_magic(%rip),%xmm8 - pshufd $95,%xmm15,%xmm9 - pxor %xmm0,%xmm1 - movdqa %xmm9,%xmm14 - paddd %xmm9,%xmm9 + pxor %xmm14,%xmm14 + pcmpgtd %xmm15,%xmm14 + pshufd $19,%xmm14,%xmm9 + pxor %xmm14,%xmm14 movdqa %xmm15,%xmm10 - psrad $31,%xmm14 paddq %xmm15,%xmm15 - pand %xmm8,%xmm14 - pxor %xmm0,%xmm10 - pxor %xmm14,%xmm15 - movdqa %xmm9,%xmm14 - paddd %xmm9,%xmm9 + pand %xmm8,%xmm9 + pcmpgtd %xmm15,%xmm14 + pxor %xmm9,%xmm15 + pshufd $19,%xmm14,%xmm9 + pxor %xmm14,%xmm14 movdqa %xmm15,%xmm11 - psrad $31,%xmm14 paddq %xmm15,%xmm15 - pand %xmm8,%xmm14 - pxor %xmm0,%xmm11 - pxor %xmm14,%xmm15 - movdqa %xmm9,%xmm14 - paddd %xmm9,%xmm9 + pand %xmm8,%xmm9 + pcmpgtd %xmm15,%xmm14 + pxor %xmm9,%xmm15 + pshufd $19,%xmm14,%xmm9 + pxor %xmm14,%xmm14 movdqa %xmm15,%xmm12 - psrad $31,%xmm14 paddq %xmm15,%xmm15 - pand %xmm8,%xmm14 - pxor %xmm0,%xmm12 - pxor %xmm14,%xmm15 - movdqa %xmm9,%xmm14 - paddd %xmm9,%xmm9 + pand %xmm8,%xmm9 + pcmpgtd %xmm15,%xmm14 + pxor %xmm9,%xmm15 + pshufd $19,%xmm14,%xmm9 + pxor %xmm14,%xmm14 movdqa %xmm15,%xmm13 - psrad $31,%xmm14 - paddq %xmm15,%xmm15 - pand %xmm8,%xmm14 - pxor %xmm0,%xmm13 - pxor %xmm14,%xmm15 - movdqa %xmm15,%xmm14 - psrad $31,%xmm9 paddq %xmm15,%xmm15 pand %xmm8,%xmm9 - pxor %xmm0,%xmm14 + pcmpgtd %xmm15,%xmm14 pxor %xmm9,%xmm15 - movaps %xmm1,96(%rsp) - subq $96,%rdx jc L$xts_enc_short shrl $1,%eax - subl $3,%eax - movups 16(%r11),%xmm1 + subl $1,%eax movl %eax,%r10d - leaq L$xts_magic(%rip),%r8 jmp L$xts_enc_grandloop -.p2align 5 +.p2align 4 L$xts_enc_grandloop: + pshufd $19,%xmm14,%xmm9 + movdqa %xmm15,%xmm14 + paddq %xmm15,%xmm15 movdqu 0(%rdi),%xmm2 - movdqa %xmm0,%xmm8 + pand %xmm8,%xmm9 movdqu 16(%rdi),%xmm3 - pxor %xmm10,%xmm2 + pxor %xmm9,%xmm15 + movdqu 32(%rdi),%xmm4 - pxor %xmm11,%xmm3 -.byte 102,15,56,220,209 + pxor %xmm10,%xmm2 movdqu 48(%rdi),%xmm5 - pxor %xmm12,%xmm4 -.byte 102,15,56,220,217 + pxor %xmm11,%xmm3 movdqu 64(%rdi),%xmm6 - pxor %xmm13,%xmm5 -.byte 102,15,56,220,225 + pxor %xmm12,%xmm4 movdqu 80(%rdi),%xmm7 - pxor %xmm15,%xmm8 - movdqa 96(%rsp),%xmm9 - pxor %xmm14,%xmm6 -.byte 102,15,56,220,233 - movups 32(%r11),%xmm0 leaq 96(%rdi),%rdi - pxor %xmm8,%xmm7 + pxor %xmm13,%xmm5 + movups (%r11),%xmm0 + pxor %xmm14,%xmm6 + pxor %xmm15,%xmm7 - pxor %xmm9,%xmm10 -.byte 102,15,56,220,241 - pxor %xmm9,%xmm11 - movdqa %xmm10,0(%rsp) -.byte 102,15,56,220,249 - movups 48(%r11),%xmm1 -.byte 102,15,56,220,208 - pxor %xmm9,%xmm12 + + movups 16(%r11),%xmm1 + pxor %xmm0,%xmm2 + pxor %xmm0,%xmm3 + movdqa %xmm10,0(%rsp) +.byte 102,15,56,220,209 + leaq 32(%r11),%rcx + pxor %xmm0,%xmm4 movdqa %xmm11,16(%rsp) -.byte 102,15,56,220,216 - pxor %xmm9,%xmm13 +.byte 102,15,56,220,217 + pxor %xmm0,%xmm5 movdqa %xmm12,32(%rsp) -.byte 102,15,56,220,224 - pxor %xmm9,%xmm14 -.byte 102,15,56,220,232 - pxor %xmm9,%xmm8 +.byte 102,15,56,220,225 + pxor %xmm0,%xmm6 + movdqa %xmm13,48(%rsp) +.byte 102,15,56,220,233 + pxor %xmm0,%xmm7 + movups (%rcx),%xmm0 + decl %eax movdqa %xmm14,64(%rsp) -.byte 102,15,56,220,240 - movdqa %xmm8,80(%rsp) -.byte 102,15,56,220,248 - movups 64(%r11),%xmm0 - leaq 64(%r11),%rcx - pshufd $95,%xmm15,%xmm9 - jmp L$xts_enc_loop6 -.p2align 5 +.byte 102,15,56,220,241 + movdqa %xmm15,80(%rsp) +.byte 102,15,56,220,249 + pxor %xmm14,%xmm14 + pcmpgtd %xmm15,%xmm14 + jmp L$xts_enc_loop6_enter + +.p2align 4 L$xts_enc_loop6: .byte 102,15,56,220,209 .byte 102,15,56,220,217 + decl %eax .byte 102,15,56,220,225 .byte 102,15,56,220,233 .byte 102,15,56,220,241 .byte 102,15,56,220,249 +L$xts_enc_loop6_enter: movups 16(%rcx),%xmm1 - leaq 32(%rcx),%rcx - .byte 102,15,56,220,208 .byte 102,15,56,220,216 + leaq 32(%rcx),%rcx .byte 102,15,56,220,224 .byte 102,15,56,220,232 .byte 102,15,56,220,240 .byte 102,15,56,220,248 movups (%rcx),%xmm0 - decl %eax jnz L$xts_enc_loop6 - movdqa (%r8),%xmm8 - movdqa %xmm9,%xmm14 - paddd %xmm9,%xmm9 -.byte 102,15,56,220,209 + pshufd $19,%xmm14,%xmm9 + pxor %xmm14,%xmm14 paddq %xmm15,%xmm15 - psrad $31,%xmm14 +.byte 102,15,56,220,209 + pand %xmm8,%xmm9 .byte 102,15,56,220,217 - pand %xmm8,%xmm14 - movups (%r11),%xmm10 + pcmpgtd %xmm15,%xmm14 .byte 102,15,56,220,225 + pxor %xmm9,%xmm15 .byte 102,15,56,220,233 - pxor %xmm14,%xmm15 .byte 102,15,56,220,241 - movaps %xmm10,%xmm11 .byte 102,15,56,220,249 movups 16(%rcx),%xmm1 - movdqa %xmm9,%xmm14 - paddd %xmm9,%xmm9 + pshufd $19,%xmm14,%xmm9 + pxor %xmm14,%xmm14 + movdqa %xmm15,%xmm10 + paddq %xmm15,%xmm15 .byte 102,15,56,220,208 - pxor %xmm15,%xmm10 - psrad $31,%xmm14 + pand %xmm8,%xmm9 .byte 102,15,56,220,216 - paddq %xmm15,%xmm15 - pand %xmm8,%xmm14 + pcmpgtd %xmm15,%xmm14 .byte 102,15,56,220,224 + pxor %xmm9,%xmm15 .byte 102,15,56,220,232 - pxor %xmm14,%xmm15 .byte 102,15,56,220,240 - movaps %xmm11,%xmm12 .byte 102,15,56,220,248 movups 32(%rcx),%xmm0 - movdqa %xmm9,%xmm14 - paddd %xmm9,%xmm9 + pshufd $19,%xmm14,%xmm9 + pxor %xmm14,%xmm14 + movdqa %xmm15,%xmm11 + paddq %xmm15,%xmm15 .byte 102,15,56,220,209 - pxor %xmm15,%xmm11 - psrad $31,%xmm14 + pand %xmm8,%xmm9 .byte 102,15,56,220,217 - paddq %xmm15,%xmm15 - pand %xmm8,%xmm14 + pcmpgtd %xmm15,%xmm14 .byte 102,15,56,220,225 - movdqa %xmm13,48(%rsp) + pxor %xmm9,%xmm15 .byte 102,15,56,220,233 - pxor %xmm14,%xmm15 .byte 102,15,56,220,241 - movaps %xmm12,%xmm13 .byte 102,15,56,220,249 - movups 48(%rcx),%xmm1 - - movdqa %xmm9,%xmm14 - paddd %xmm9,%xmm9 -.byte 102,15,56,220,208 - pxor %xmm15,%xmm12 - psrad $31,%xmm14 -.byte 102,15,56,220,216 - paddq %xmm15,%xmm15 - pand %xmm8,%xmm14 -.byte 102,15,56,220,224 -.byte 102,15,56,220,232 - pxor %xmm14,%xmm15 -.byte 102,15,56,220,240 - movaps %xmm13,%xmm14 -.byte 102,15,56,220,248 - movdqa %xmm9,%xmm0 - paddd %xmm9,%xmm9 -.byte 102,15,56,220,209 - pxor %xmm15,%xmm13 - psrad $31,%xmm0 -.byte 102,15,56,220,217 + pshufd $19,%xmm14,%xmm9 + pxor %xmm14,%xmm14 + movdqa %xmm15,%xmm12 paddq %xmm15,%xmm15 - pand %xmm8,%xmm0 -.byte 102,15,56,220,225 -.byte 102,15,56,220,233 - pxor %xmm0,%xmm15 - movups (%r11),%xmm0 -.byte 102,15,56,220,241 -.byte 102,15,56,220,249 - movups 16(%r11),%xmm1 +.byte 102,15,56,221,208 + pand %xmm8,%xmm9 +.byte 102,15,56,221,216 + pcmpgtd %xmm15,%xmm14 +.byte 102,15,56,221,224 + pxor %xmm9,%xmm15 +.byte 102,15,56,221,232 +.byte 102,15,56,221,240 +.byte 102,15,56,221,248 - pxor %xmm15,%xmm14 - psrad $31,%xmm9 -.byte 102,15,56,221,84,36,0 + pshufd $19,%xmm14,%xmm9 + pxor %xmm14,%xmm14 + movdqa %xmm15,%xmm13 paddq %xmm15,%xmm15 + xorps 0(%rsp),%xmm2 pand %xmm8,%xmm9 -.byte 102,15,56,221,92,36,16 -.byte 102,15,56,221,100,36,32 + xorps 16(%rsp),%xmm3 + pcmpgtd %xmm15,%xmm14 pxor %xmm9,%xmm15 -.byte 102,15,56,221,108,36,48 -.byte 102,15,56,221,116,36,64 -.byte 102,15,56,221,124,36,80 - movl %r10d,%eax + xorps 32(%rsp),%xmm4 + movups %xmm2,0(%rsi) + xorps 48(%rsp),%xmm5 + movups %xmm3,16(%rsi) + xorps 64(%rsp),%xmm6 + movups %xmm4,32(%rsi) + xorps 80(%rsp),%xmm7 + movups %xmm5,48(%rsi) + movl %r10d,%eax + movups %xmm6,64(%rsi) + movups %xmm7,80(%rsi) leaq 96(%rsi),%rsi - movups %xmm2,-96(%rsi) - movups %xmm3,-80(%rsi) - movups %xmm4,-64(%rsi) - movups %xmm5,-48(%rsi) - movups %xmm6,-32(%rsi) - movups %xmm7,-16(%rsi) subq $96,%rdx jnc L$xts_enc_grandloop - leal 7(%rax,%rax,1),%eax + leal 3(%rax,%rax,1),%eax movq %r11,%rcx movl %eax,%r10d L$xts_enc_short: - pxor %xmm0,%xmm10 addq $96,%rdx jz L$xts_enc_done - pxor %xmm0,%xmm11 cmpq $32,%rdx jb L$xts_enc_one - pxor %xmm0,%xmm12 je L$xts_enc_two - pxor %xmm0,%xmm13 cmpq $64,%rdx jb L$xts_enc_three - pxor %xmm0,%xmm14 je L$xts_enc_four + pshufd $19,%xmm14,%xmm9 + movdqa %xmm15,%xmm14 + paddq %xmm15,%xmm15 movdqu (%rdi),%xmm2 + pand %xmm8,%xmm9 movdqu 16(%rdi),%xmm3 + pxor %xmm9,%xmm15 + movdqu 32(%rdi),%xmm4 pxor %xmm10,%xmm2 movdqu 48(%rdi),%xmm5 @@ -1676,7 +1450,7 @@ L$oop_enc1_9: decl %eax movups (%rcx),%xmm1 leaq 16(%rcx),%rcx - jnz L$oop_enc1_9 + jnz L$oop_enc1_9 .byte 102,15,56,221,209 xorps %xmm10,%xmm2 movdqa %xmm11,%xmm10 @@ -1738,15 +1512,15 @@ L$xts_enc_four: call _aesni_encrypt4 - pxor %xmm10,%xmm2 - movdqa %xmm14,%xmm10 - pxor %xmm11,%xmm3 - pxor %xmm12,%xmm4 - movdqu %xmm2,(%rsi) - pxor %xmm13,%xmm5 - movdqu %xmm3,16(%rsi) - movdqu %xmm4,32(%rsi) - movdqu %xmm5,48(%rsi) + xorps %xmm10,%xmm2 + movdqa %xmm15,%xmm10 + xorps %xmm11,%xmm3 + xorps %xmm12,%xmm4 + movups %xmm2,(%rsi) + xorps %xmm13,%xmm5 + movups %xmm3,16(%rsi) + movups %xmm4,32(%rsi) + movups %xmm5,48(%rsi) leaq 64(%rsi),%rsi jmp L$xts_enc_done @@ -1781,14 +1555,13 @@ L$oop_enc1_10: decl %eax movups (%rcx),%xmm1 leaq 16(%rcx),%rcx - jnz L$oop_enc1_10 + jnz L$oop_enc1_10 .byte 102,15,56,221,209 xorps %xmm10,%xmm2 movups %xmm2,-16(%rsi) L$xts_enc_ret: - leaq (%rbp),%rsp - popq %rbp + leaq 104(%rsp),%rsp L$xts_enc_epilogue: .byte 0xf3,0xc3 @@ -1796,11 +1569,7 @@ L$xts_enc_epilogue: .p2align 4 _aesni_xts_decrypt: - leaq (%rsp),%rax - pushq %rbp - subq $112,%rsp - andq $-16,%rsp - leaq -8(%rax),%rbp + leaq -104(%rsp),%rsp movups (%r9),%xmm15 movl 240(%r8),%eax movl 240(%rcx),%r10d @@ -1813,7 +1582,7 @@ L$oop_enc1_11: decl %eax movups (%r8),%xmm1 leaq 16(%r8),%r8 - jnz L$oop_enc1_11 + jnz L$oop_enc1_11 .byte 102,68,15,56,221,249 xorl %eax,%eax testq $15,%rdx @@ -1821,266 +1590,228 @@ L$oop_enc1_11: shlq $4,%rax subq %rax,%rdx - movups (%rcx),%xmm0 movq %rcx,%r11 movl %r10d,%eax - shll $4,%r10d movq %rdx,%r9 andq $-16,%rdx - movups 16(%rcx,%r10,1),%xmm1 - movl %eax,%r10d - movdqa L$xts_magic(%rip),%xmm8 - pshufd $95,%xmm15,%xmm9 - pxor %xmm0,%xmm1 - movdqa %xmm9,%xmm14 - paddd %xmm9,%xmm9 + pxor %xmm14,%xmm14 + pcmpgtd %xmm15,%xmm14 + pshufd $19,%xmm14,%xmm9 + pxor %xmm14,%xmm14 movdqa %xmm15,%xmm10 - psrad $31,%xmm14 paddq %xmm15,%xmm15 - pand %xmm8,%xmm14 - pxor %xmm0,%xmm10 - pxor %xmm14,%xmm15 - movdqa %xmm9,%xmm14 - paddd %xmm9,%xmm9 + pand %xmm8,%xmm9 + pcmpgtd %xmm15,%xmm14 + pxor %xmm9,%xmm15 + pshufd $19,%xmm14,%xmm9 + pxor %xmm14,%xmm14 movdqa %xmm15,%xmm11 - psrad $31,%xmm14 paddq %xmm15,%xmm15 - pand %xmm8,%xmm14 - pxor %xmm0,%xmm11 - pxor %xmm14,%xmm15 - movdqa %xmm9,%xmm14 - paddd %xmm9,%xmm9 + pand %xmm8,%xmm9 + pcmpgtd %xmm15,%xmm14 + pxor %xmm9,%xmm15 + pshufd $19,%xmm14,%xmm9 + pxor %xmm14,%xmm14 movdqa %xmm15,%xmm12 - psrad $31,%xmm14 paddq %xmm15,%xmm15 - pand %xmm8,%xmm14 - pxor %xmm0,%xmm12 - pxor %xmm14,%xmm15 - movdqa %xmm9,%xmm14 - paddd %xmm9,%xmm9 + pand %xmm8,%xmm9 + pcmpgtd %xmm15,%xmm14 + pxor %xmm9,%xmm15 + pshufd $19,%xmm14,%xmm9 + pxor %xmm14,%xmm14 movdqa %xmm15,%xmm13 - psrad $31,%xmm14 - paddq %xmm15,%xmm15 - pand %xmm8,%xmm14 - pxor %xmm0,%xmm13 - pxor %xmm14,%xmm15 - movdqa %xmm15,%xmm14 - psrad $31,%xmm9 paddq %xmm15,%xmm15 pand %xmm8,%xmm9 - pxor %xmm0,%xmm14 + pcmpgtd %xmm15,%xmm14 pxor %xmm9,%xmm15 - movaps %xmm1,96(%rsp) - subq $96,%rdx jc L$xts_dec_short shrl $1,%eax - subl $3,%eax - movups 16(%r11),%xmm1 + subl $1,%eax movl %eax,%r10d - leaq L$xts_magic(%rip),%r8 jmp L$xts_dec_grandloop -.p2align 5 +.p2align 4 L$xts_dec_grandloop: + pshufd $19,%xmm14,%xmm9 + movdqa %xmm15,%xmm14 + paddq %xmm15,%xmm15 movdqu 0(%rdi),%xmm2 - movdqa %xmm0,%xmm8 + pand %xmm8,%xmm9 movdqu 16(%rdi),%xmm3 - pxor %xmm10,%xmm2 + pxor %xmm9,%xmm15 + movdqu 32(%rdi),%xmm4 - pxor %xmm11,%xmm3 -.byte 102,15,56,222,209 + pxor %xmm10,%xmm2 movdqu 48(%rdi),%xmm5 - pxor %xmm12,%xmm4 -.byte 102,15,56,222,217 + pxor %xmm11,%xmm3 movdqu 64(%rdi),%xmm6 - pxor %xmm13,%xmm5 -.byte 102,15,56,222,225 + pxor %xmm12,%xmm4 movdqu 80(%rdi),%xmm7 - pxor %xmm15,%xmm8 - movdqa 96(%rsp),%xmm9 - pxor %xmm14,%xmm6 -.byte 102,15,56,222,233 - movups 32(%r11),%xmm0 leaq 96(%rdi),%rdi - pxor %xmm8,%xmm7 + pxor %xmm13,%xmm5 + movups (%r11),%xmm0 + pxor %xmm14,%xmm6 + pxor %xmm15,%xmm7 - pxor %xmm9,%xmm10 -.byte 102,15,56,222,241 - pxor %xmm9,%xmm11 - movdqa %xmm10,0(%rsp) -.byte 102,15,56,222,249 - movups 48(%r11),%xmm1 -.byte 102,15,56,222,208 - pxor %xmm9,%xmm12 + + movups 16(%r11),%xmm1 + pxor %xmm0,%xmm2 + pxor %xmm0,%xmm3 + movdqa %xmm10,0(%rsp) +.byte 102,15,56,222,209 + leaq 32(%r11),%rcx + pxor %xmm0,%xmm4 movdqa %xmm11,16(%rsp) -.byte 102,15,56,222,216 - pxor %xmm9,%xmm13 +.byte 102,15,56,222,217 + pxor %xmm0,%xmm5 movdqa %xmm12,32(%rsp) -.byte 102,15,56,222,224 - pxor %xmm9,%xmm14 -.byte 102,15,56,222,232 - pxor %xmm9,%xmm8 +.byte 102,15,56,222,225 + pxor %xmm0,%xmm6 + movdqa %xmm13,48(%rsp) +.byte 102,15,56,222,233 + pxor %xmm0,%xmm7 + movups (%rcx),%xmm0 + decl %eax movdqa %xmm14,64(%rsp) -.byte 102,15,56,222,240 - movdqa %xmm8,80(%rsp) -.byte 102,15,56,222,248 - movups 64(%r11),%xmm0 - leaq 64(%r11),%rcx - pshufd $95,%xmm15,%xmm9 - jmp L$xts_dec_loop6 -.p2align 5 +.byte 102,15,56,222,241 + movdqa %xmm15,80(%rsp) +.byte 102,15,56,222,249 + pxor %xmm14,%xmm14 + pcmpgtd %xmm15,%xmm14 + jmp L$xts_dec_loop6_enter + +.p2align 4 L$xts_dec_loop6: .byte 102,15,56,222,209 .byte 102,15,56,222,217 + decl %eax .byte 102,15,56,222,225 .byte 102,15,56,222,233 .byte 102,15,56,222,241 .byte 102,15,56,222,249 +L$xts_dec_loop6_enter: movups 16(%rcx),%xmm1 - leaq 32(%rcx),%rcx - .byte 102,15,56,222,208 .byte 102,15,56,222,216 + leaq 32(%rcx),%rcx .byte 102,15,56,222,224 .byte 102,15,56,222,232 .byte 102,15,56,222,240 .byte 102,15,56,222,248 movups (%rcx),%xmm0 - decl %eax jnz L$xts_dec_loop6 - movdqa (%r8),%xmm8 - movdqa %xmm9,%xmm14 - paddd %xmm9,%xmm9 -.byte 102,15,56,222,209 + pshufd $19,%xmm14,%xmm9 + pxor %xmm14,%xmm14 paddq %xmm15,%xmm15 - psrad $31,%xmm14 +.byte 102,15,56,222,209 + pand %xmm8,%xmm9 .byte 102,15,56,222,217 - pand %xmm8,%xmm14 - movups (%r11),%xmm10 + pcmpgtd %xmm15,%xmm14 .byte 102,15,56,222,225 + pxor %xmm9,%xmm15 .byte 102,15,56,222,233 - pxor %xmm14,%xmm15 .byte 102,15,56,222,241 - movaps %xmm10,%xmm11 .byte 102,15,56,222,249 movups 16(%rcx),%xmm1 - movdqa %xmm9,%xmm14 - paddd %xmm9,%xmm9 + pshufd $19,%xmm14,%xmm9 + pxor %xmm14,%xmm14 + movdqa %xmm15,%xmm10 + paddq %xmm15,%xmm15 .byte 102,15,56,222,208 - pxor %xmm15,%xmm10 - psrad $31,%xmm14 + pand %xmm8,%xmm9 .byte 102,15,56,222,216 - paddq %xmm15,%xmm15 - pand %xmm8,%xmm14 + pcmpgtd %xmm15,%xmm14 .byte 102,15,56,222,224 + pxor %xmm9,%xmm15 .byte 102,15,56,222,232 - pxor %xmm14,%xmm15 .byte 102,15,56,222,240 - movaps %xmm11,%xmm12 .byte 102,15,56,222,248 movups 32(%rcx),%xmm0 - movdqa %xmm9,%xmm14 - paddd %xmm9,%xmm9 + pshufd $19,%xmm14,%xmm9 + pxor %xmm14,%xmm14 + movdqa %xmm15,%xmm11 + paddq %xmm15,%xmm15 .byte 102,15,56,222,209 - pxor %xmm15,%xmm11 - psrad $31,%xmm14 + pand %xmm8,%xmm9 .byte 102,15,56,222,217 - paddq %xmm15,%xmm15 - pand %xmm8,%xmm14 + pcmpgtd %xmm15,%xmm14 .byte 102,15,56,222,225 - movdqa %xmm13,48(%rsp) + pxor %xmm9,%xmm15 .byte 102,15,56,222,233 - pxor %xmm14,%xmm15 .byte 102,15,56,222,241 - movaps %xmm12,%xmm13 .byte 102,15,56,222,249 - movups 48(%rcx),%xmm1 - movdqa %xmm9,%xmm14 - paddd %xmm9,%xmm9 -.byte 102,15,56,222,208 - pxor %xmm15,%xmm12 - psrad $31,%xmm14 -.byte 102,15,56,222,216 + pshufd $19,%xmm14,%xmm9 + pxor %xmm14,%xmm14 + movdqa %xmm15,%xmm12 paddq %xmm15,%xmm15 - pand %xmm8,%xmm14 -.byte 102,15,56,222,224 -.byte 102,15,56,222,232 - pxor %xmm14,%xmm15 -.byte 102,15,56,222,240 - movaps %xmm13,%xmm14 -.byte 102,15,56,222,248 - - movdqa %xmm9,%xmm0 - paddd %xmm9,%xmm9 -.byte 102,15,56,222,209 - pxor %xmm15,%xmm13 - psrad $31,%xmm0 -.byte 102,15,56,222,217 - paddq %xmm15,%xmm15 - pand %xmm8,%xmm0 -.byte 102,15,56,222,225 -.byte 102,15,56,222,233 - pxor %xmm0,%xmm15 - movups (%r11),%xmm0 -.byte 102,15,56,222,241 -.byte 102,15,56,222,249 - movups 16(%r11),%xmm1 +.byte 102,15,56,223,208 + pand %xmm8,%xmm9 +.byte 102,15,56,223,216 + pcmpgtd %xmm15,%xmm14 +.byte 102,15,56,223,224 + pxor %xmm9,%xmm15 +.byte 102,15,56,223,232 +.byte 102,15,56,223,240 +.byte 102,15,56,223,248 - pxor %xmm15,%xmm14 - psrad $31,%xmm9 -.byte 102,15,56,223,84,36,0 + pshufd $19,%xmm14,%xmm9 + pxor %xmm14,%xmm14 + movdqa %xmm15,%xmm13 paddq %xmm15,%xmm15 + xorps 0(%rsp),%xmm2 pand %xmm8,%xmm9 -.byte 102,15,56,223,92,36,16 -.byte 102,15,56,223,100,36,32 + xorps 16(%rsp),%xmm3 + pcmpgtd %xmm15,%xmm14 pxor %xmm9,%xmm15 -.byte 102,15,56,223,108,36,48 -.byte 102,15,56,223,116,36,64 -.byte 102,15,56,223,124,36,80 - movl %r10d,%eax + xorps 32(%rsp),%xmm4 + movups %xmm2,0(%rsi) + xorps 48(%rsp),%xmm5 + movups %xmm3,16(%rsi) + xorps 64(%rsp),%xmm6 + movups %xmm4,32(%rsi) + xorps 80(%rsp),%xmm7 + movups %xmm5,48(%rsi) + movl %r10d,%eax + movups %xmm6,64(%rsi) + movups %xmm7,80(%rsi) leaq 96(%rsi),%rsi - movups %xmm2,-96(%rsi) - movups %xmm3,-80(%rsi) - movups %xmm4,-64(%rsi) - movups %xmm5,-48(%rsi) - movups %xmm6,-32(%rsi) - movups %xmm7,-16(%rsi) subq $96,%rdx jnc L$xts_dec_grandloop - leal 7(%rax,%rax,1),%eax + leal 3(%rax,%rax,1),%eax movq %r11,%rcx movl %eax,%r10d L$xts_dec_short: - pxor %xmm0,%xmm10 - pxor %xmm0,%xmm11 addq $96,%rdx jz L$xts_dec_done - pxor %xmm0,%xmm12 cmpq $32,%rdx jb L$xts_dec_one - pxor %xmm0,%xmm13 je L$xts_dec_two - pxor %xmm0,%xmm14 cmpq $64,%rdx jb L$xts_dec_three je L$xts_dec_four + pshufd $19,%xmm14,%xmm9 + movdqa %xmm15,%xmm14 + paddq %xmm15,%xmm15 movdqu (%rdi),%xmm2 + pand %xmm8,%xmm9 movdqu 16(%rdi),%xmm3 + pxor %xmm9,%xmm15 + movdqu 32(%rdi),%xmm4 pxor %xmm10,%xmm2 movdqu 48(%rdi),%xmm5 @@ -2130,7 +1861,7 @@ L$oop_dec1_12: decl %eax movups (%rcx),%xmm1 leaq 16(%rcx),%rcx - jnz L$oop_dec1_12 + jnz L$oop_dec1_12 .byte 102,15,56,223,209 xorps %xmm10,%xmm2 movdqa %xmm11,%xmm10 @@ -2173,7 +1904,7 @@ L$xts_dec_three: xorps %xmm10,%xmm2 movdqa %xmm13,%xmm10 xorps %xmm11,%xmm3 - movdqa %xmm14,%xmm11 + movdqa %xmm15,%xmm11 xorps %xmm12,%xmm4 movups %xmm2,(%rsi) movups %xmm3,16(%rsi) @@ -2183,8 +1914,14 @@ L$xts_dec_three: .p2align 4 L$xts_dec_four: + pshufd $19,%xmm14,%xmm9 + movdqa %xmm15,%xmm14 + paddq %xmm15,%xmm15 movups (%rdi),%xmm2 + pand %xmm8,%xmm9 movups 16(%rdi),%xmm3 + pxor %xmm9,%xmm15 + movups 32(%rdi),%xmm4 xorps %xmm10,%xmm2 movups 48(%rdi),%xmm5 @@ -2195,16 +1932,16 @@ L$xts_dec_four: call _aesni_decrypt4 - pxor %xmm10,%xmm2 + xorps %xmm10,%xmm2 movdqa %xmm14,%xmm10 - pxor %xmm11,%xmm3 + xorps %xmm11,%xmm3 movdqa %xmm15,%xmm11 - pxor %xmm12,%xmm4 - movdqu %xmm2,(%rsi) - pxor %xmm13,%xmm5 - movdqu %xmm3,16(%rsi) - movdqu %xmm4,32(%rsi) - movdqu %xmm5,48(%rsi) + xorps %xmm12,%xmm4 + movups %xmm2,(%rsi) + xorps %xmm13,%xmm5 + movups %xmm3,16(%rsi) + movups %xmm4,32(%rsi) + movups %xmm5,48(%rsi) leaq 64(%rsi),%rsi jmp L$xts_dec_done @@ -2228,7 +1965,7 @@ L$oop_dec1_13: decl %eax movups (%rcx),%xmm1 leaq 16(%rcx),%rcx - jnz L$oop_dec1_13 + jnz L$oop_dec1_13 .byte 102,15,56,223,209 xorps %xmm11,%xmm2 movups %xmm2,(%rsi) @@ -2258,14 +1995,13 @@ L$oop_dec1_14: decl %eax movups (%rcx),%xmm1 leaq 16(%rcx),%rcx - jnz L$oop_dec1_14 + jnz L$oop_dec1_14 .byte 102,15,56,223,209 xorps %xmm10,%xmm2 movups %xmm2,(%rsi) L$xts_dec_ret: - leaq (%rbp),%rsp - popq %rbp + leaq 104(%rsp),%rsp L$xts_dec_epilogue: .byte 0xf3,0xc3 @@ -2302,7 +2038,7 @@ L$oop_enc1_15: decl %eax movups (%rcx),%xmm1 leaq 16(%rcx),%rcx - jnz L$oop_enc1_15 + jnz L$oop_enc1_15 .byte 102,15,56,221,209 movl %r10d,%eax movq %r11,%rcx @@ -2318,338 +2054,163 @@ L$oop_enc1_15: L$cbc_enc_tail: movq %rdx,%rcx xchgq %rdi,%rsi -.long 0x9066A4F3 +.long 0x9066A4F3 movl $16,%ecx subq %rdx,%rcx xorl %eax,%eax -.long 0x9066AAF3 +.long 0x9066AAF3 leaq -16(%rdi),%rdi movl %r10d,%eax movq %rdi,%rsi movq %r11,%rcx xorq %rdx,%rdx - jmp L$cbc_enc_loop + jmp L$cbc_enc_loop .p2align 4 L$cbc_decrypt: - leaq (%rsp),%rax - pushq %rbp - subq $16,%rsp - andq $-16,%rsp - leaq -8(%rax),%rbp - movups (%r8),%xmm10 + movups (%r8),%xmm9 movl %r10d,%eax - cmpq $80,%rdx - jbe L$cbc_dec_tail - - movups (%rcx),%xmm0 - movdqu 0(%rdi),%xmm2 - movdqu 16(%rdi),%xmm3 - movdqa %xmm2,%xmm11 - movdqu 32(%rdi),%xmm4 - movdqa %xmm3,%xmm12 - movdqu 48(%rdi),%xmm5 - movdqa %xmm4,%xmm13 - movdqu 64(%rdi),%xmm6 - movdqa %xmm5,%xmm14 - movdqu 80(%rdi),%xmm7 - movdqa %xmm6,%xmm15 cmpq $112,%rdx - jbe L$cbc_dec_six_or_seven - + jbe L$cbc_dec_tail + shrl $1,%r10d subq $112,%rdx - leaq 112(%rcx),%rcx + movl %r10d,%eax + movaps %xmm9,-24(%rsp) jmp L$cbc_dec_loop8_enter .p2align 4 L$cbc_dec_loop8: + movaps %xmm0,-24(%rsp) movups %xmm9,(%rsi) leaq 16(%rsi),%rsi L$cbc_dec_loop8_enter: - movdqu 96(%rdi),%xmm8 - pxor %xmm0,%xmm2 - movdqu 112(%rdi),%xmm9 - pxor %xmm0,%xmm3 - movups 16-112(%rcx),%xmm1 - pxor %xmm0,%xmm4 - xorq %r11,%r11 - cmpq $112,%rdx - pxor %xmm0,%xmm5 - pxor %xmm0,%xmm6 - pxor %xmm0,%xmm7 - pxor %xmm0,%xmm8 + movups (%rcx),%xmm0 + movups (%rdi),%xmm2 + movups 16(%rdi),%xmm3 + movups 16(%rcx),%xmm1 + leaq 32(%rcx),%rcx + movdqu 32(%rdi),%xmm4 + xorps %xmm0,%xmm2 + movdqu 48(%rdi),%xmm5 + xorps %xmm0,%xmm3 + movdqu 64(%rdi),%xmm6 .byte 102,15,56,222,209 - pxor %xmm0,%xmm9 - movups 32-112(%rcx),%xmm0 -.byte 102,15,56,222,217 -.byte 102,15,56,222,225 -.byte 102,15,56,222,233 -.byte 102,15,56,222,241 -.byte 102,15,56,222,249 - setnc %r11b -.byte 102,68,15,56,222,193 - shlq $7,%r11 -.byte 102,68,15,56,222,201 - addq %rdi,%r11 - movups 48-112(%rcx),%xmm1 -.byte 102,15,56,222,208 -.byte 102,15,56,222,216 -.byte 102,15,56,222,224 -.byte 102,15,56,222,232 -.byte 102,15,56,222,240 -.byte 102,15,56,222,248 -.byte 102,68,15,56,222,192 -.byte 102,68,15,56,222,200 - movups 64-112(%rcx),%xmm0 -.byte 102,15,56,222,209 -.byte 102,15,56,222,217 -.byte 102,15,56,222,225 -.byte 102,15,56,222,233 -.byte 102,15,56,222,241 -.byte 102,15,56,222,249 -.byte 102,68,15,56,222,193 -.byte 102,68,15,56,222,201 - movups 80-112(%rcx),%xmm1 -.byte 102,15,56,222,208 -.byte 102,15,56,222,216 -.byte 102,15,56,222,224 -.byte 102,15,56,222,232 -.byte 102,15,56,222,240 -.byte 102,15,56,222,248 -.byte 102,68,15,56,222,192 -.byte 102,68,15,56,222,200 - movups 96-112(%rcx),%xmm0 -.byte 102,15,56,222,209 -.byte 102,15,56,222,217 -.byte 102,15,56,222,225 -.byte 102,15,56,222,233 -.byte 102,15,56,222,241 -.byte 102,15,56,222,249 -.byte 102,68,15,56,222,193 -.byte 102,68,15,56,222,201 - movups 112-112(%rcx),%xmm1 -.byte 102,15,56,222,208 -.byte 102,15,56,222,216 -.byte 102,15,56,222,224 -.byte 102,15,56,222,232 -.byte 102,15,56,222,240 -.byte 102,15,56,222,248 -.byte 102,68,15,56,222,192 -.byte 102,68,15,56,222,200 - movups 128-112(%rcx),%xmm0 -.byte 102,15,56,222,209 -.byte 102,15,56,222,217 -.byte 102,15,56,222,225 -.byte 102,15,56,222,233 -.byte 102,15,56,222,241 -.byte 102,15,56,222,249 -.byte 102,68,15,56,222,193 -.byte 102,68,15,56,222,201 - movups 144-112(%rcx),%xmm1 -.byte 102,15,56,222,208 -.byte 102,15,56,222,216 -.byte 102,15,56,222,224 -.byte 102,15,56,222,232 -.byte 102,15,56,222,240 -.byte 102,15,56,222,248 -.byte 102,68,15,56,222,192 -.byte 102,68,15,56,222,200 - movups 160-112(%rcx),%xmm0 - cmpl $11,%eax - jb L$cbc_dec_done -.byte 102,15,56,222,209 -.byte 102,15,56,222,217 -.byte 102,15,56,222,225 -.byte 102,15,56,222,233 -.byte 102,15,56,222,241 -.byte 102,15,56,222,249 -.byte 102,68,15,56,222,193 -.byte 102,68,15,56,222,201 - movups 176-112(%rcx),%xmm1 -.byte 102,15,56,222,208 -.byte 102,15,56,222,216 -.byte 102,15,56,222,224 -.byte 102,15,56,222,232 -.byte 102,15,56,222,240 -.byte 102,15,56,222,248 -.byte 102,68,15,56,222,192 -.byte 102,68,15,56,222,200 - movups 192-112(%rcx),%xmm0 - je L$cbc_dec_done -.byte 102,15,56,222,209 -.byte 102,15,56,222,217 -.byte 102,15,56,222,225 -.byte 102,15,56,222,233 -.byte 102,15,56,222,241 -.byte 102,15,56,222,249 -.byte 102,68,15,56,222,193 -.byte 102,68,15,56,222,201 - movups 208-112(%rcx),%xmm1 -.byte 102,15,56,222,208 -.byte 102,15,56,222,216 -.byte 102,15,56,222,224 -.byte 102,15,56,222,232 -.byte 102,15,56,222,240 -.byte 102,15,56,222,248 -.byte 102,68,15,56,222,192 -.byte 102,68,15,56,222,200 - movups 224-112(%rcx),%xmm0 -L$cbc_dec_done: -.byte 102,15,56,222,209 - pxor %xmm0,%xmm10 + pxor %xmm0,%xmm4 + movdqu 80(%rdi),%xmm7 .byte 102,15,56,222,217 - pxor %xmm0,%xmm11 + pxor %xmm0,%xmm5 + movdqu 96(%rdi),%xmm8 .byte 102,15,56,222,225 - pxor %xmm0,%xmm12 + pxor %xmm0,%xmm6 + movdqu 112(%rdi),%xmm9 .byte 102,15,56,222,233 - pxor %xmm0,%xmm13 + pxor %xmm0,%xmm7 + decl %eax .byte 102,15,56,222,241 - pxor %xmm0,%xmm14 + pxor %xmm0,%xmm8 .byte 102,15,56,222,249 - pxor %xmm0,%xmm15 + pxor %xmm0,%xmm9 + movups (%rcx),%xmm0 .byte 102,68,15,56,222,193 .byte 102,68,15,56,222,201 - movdqu 80(%rdi),%xmm1 - -.byte 102,65,15,56,223,210 - movdqu 96(%rdi),%xmm10 - pxor %xmm0,%xmm1 -.byte 102,65,15,56,223,219 - pxor %xmm0,%xmm10 - movdqu 112(%rdi),%xmm0 - leaq 128(%rdi),%rdi -.byte 102,65,15,56,223,228 - movdqu 0(%r11),%xmm11 -.byte 102,65,15,56,223,237 - movdqu 16(%r11),%xmm12 -.byte 102,65,15,56,223,246 - movdqu 32(%r11),%xmm13 -.byte 102,65,15,56,223,255 - movdqu 48(%r11),%xmm14 -.byte 102,68,15,56,223,193 - movdqu 64(%r11),%xmm15 -.byte 102,69,15,56,223,202 - movdqa %xmm0,%xmm10 - movdqu 80(%r11),%xmm1 - movups -112(%rcx),%xmm0 + movups 16(%rcx),%xmm1 + call L$dec_loop8_enter + + movups (%rdi),%xmm1 + movups 16(%rdi),%xmm0 + xorps -24(%rsp),%xmm2 + xorps %xmm1,%xmm3 + movups 32(%rdi),%xmm1 + xorps %xmm0,%xmm4 + movups 48(%rdi),%xmm0 + xorps %xmm1,%xmm5 + movups 64(%rdi),%xmm1 + xorps %xmm0,%xmm6 + movups 80(%rdi),%xmm0 + xorps %xmm1,%xmm7 + movups 96(%rdi),%xmm1 + xorps %xmm0,%xmm8 + movups 112(%rdi),%xmm0 + xorps %xmm1,%xmm9 movups %xmm2,(%rsi) - movdqa %xmm11,%xmm2 movups %xmm3,16(%rsi) - movdqa %xmm12,%xmm3 movups %xmm4,32(%rsi) - movdqa %xmm13,%xmm4 movups %xmm5,48(%rsi) - movdqa %xmm14,%xmm5 + movl %r10d,%eax movups %xmm6,64(%rsi) - movdqa %xmm15,%xmm6 + movq %r11,%rcx movups %xmm7,80(%rsi) - movdqa %xmm1,%xmm7 + leaq 128(%rdi),%rdi movups %xmm8,96(%rsi) leaq 112(%rsi),%rsi - subq $128,%rdx ja L$cbc_dec_loop8 movaps %xmm9,%xmm2 - leaq -112(%rcx),%rcx + movaps %xmm0,%xmm9 addq $112,%rdx jle L$cbc_dec_tail_collected - movups %xmm9,(%rsi) + movups %xmm2,(%rsi) + leal 1(%r10,%r10,1),%eax leaq 16(%rsi),%rsi - cmpq $80,%rdx - jbe L$cbc_dec_tail - - movaps %xmm11,%xmm2 -L$cbc_dec_six_or_seven: - cmpq $96,%rdx - ja L$cbc_dec_seven - - movaps %xmm7,%xmm8 - call _aesni_decrypt6 - pxor %xmm10,%xmm2 - movaps %xmm8,%xmm10 - pxor %xmm11,%xmm3 - movdqu %xmm2,(%rsi) - pxor %xmm12,%xmm4 - movdqu %xmm3,16(%rsi) - pxor %xmm13,%xmm5 - movdqu %xmm4,32(%rsi) - pxor %xmm14,%xmm6 - movdqu %xmm5,48(%rsi) - pxor %xmm15,%xmm7 - movdqu %xmm6,64(%rsi) - leaq 80(%rsi),%rsi - movdqa %xmm7,%xmm2 - jmp L$cbc_dec_tail_collected - -.p2align 4 -L$cbc_dec_seven: - movups 96(%rdi),%xmm8 - xorps %xmm9,%xmm9 - call _aesni_decrypt8 - movups 80(%rdi),%xmm9 - pxor %xmm10,%xmm2 - movups 96(%rdi),%xmm10 - pxor %xmm11,%xmm3 - movdqu %xmm2,(%rsi) - pxor %xmm12,%xmm4 - movdqu %xmm3,16(%rsi) - pxor %xmm13,%xmm5 - movdqu %xmm4,32(%rsi) - pxor %xmm14,%xmm6 - movdqu %xmm5,48(%rsi) - pxor %xmm15,%xmm7 - movdqu %xmm6,64(%rsi) - pxor %xmm9,%xmm8 - movdqu %xmm7,80(%rsi) - leaq 96(%rsi),%rsi - movdqa %xmm8,%xmm2 - jmp L$cbc_dec_tail_collected - L$cbc_dec_tail: movups (%rdi),%xmm2 - subq $16,%rdx + movaps %xmm2,%xmm8 + cmpq $16,%rdx jbe L$cbc_dec_one movups 16(%rdi),%xmm3 - movaps %xmm2,%xmm11 - subq $16,%rdx + movaps %xmm3,%xmm7 + cmpq $32,%rdx jbe L$cbc_dec_two movups 32(%rdi),%xmm4 - movaps %xmm3,%xmm12 - subq $16,%rdx + movaps %xmm4,%xmm6 + cmpq $48,%rdx jbe L$cbc_dec_three movups 48(%rdi),%xmm5 - movaps %xmm4,%xmm13 - subq $16,%rdx + cmpq $64,%rdx jbe L$cbc_dec_four movups 64(%rdi),%xmm6 - movaps %xmm5,%xmm14 - movaps %xmm6,%xmm15 - xorps %xmm7,%xmm7 - call _aesni_decrypt6 - pxor %xmm10,%xmm2 - movaps %xmm15,%xmm10 - pxor %xmm11,%xmm3 - movdqu %xmm2,(%rsi) - pxor %xmm12,%xmm4 - movdqu %xmm3,16(%rsi) - pxor %xmm13,%xmm5 - movdqu %xmm4,32(%rsi) - pxor %xmm14,%xmm6 - movdqu %xmm5,48(%rsi) - leaq 64(%rsi),%rsi - movdqa %xmm6,%xmm2 - subq $16,%rdx - jmp L$cbc_dec_tail_collected + cmpq $80,%rdx + jbe L$cbc_dec_five + movups 80(%rdi),%xmm7 + cmpq $96,%rdx + jbe L$cbc_dec_six + + movups 96(%rdi),%xmm8 + movaps %xmm9,-24(%rsp) + call _aesni_decrypt8 + movups (%rdi),%xmm1 + movups 16(%rdi),%xmm0 + xorps -24(%rsp),%xmm2 + xorps %xmm1,%xmm3 + movups 32(%rdi),%xmm1 + xorps %xmm0,%xmm4 + movups 48(%rdi),%xmm0 + xorps %xmm1,%xmm5 + movups 64(%rdi),%xmm1 + xorps %xmm0,%xmm6 + movups 80(%rdi),%xmm0 + xorps %xmm1,%xmm7 + movups 96(%rdi),%xmm9 + xorps %xmm0,%xmm8 + movups %xmm2,(%rsi) + movups %xmm3,16(%rsi) + movups %xmm4,32(%rsi) + movups %xmm5,48(%rsi) + movups %xmm6,64(%rsi) + movups %xmm7,80(%rsi) + leaq 96(%rsi),%rsi + movaps %xmm8,%xmm2 + subq $112,%rdx + jmp L$cbc_dec_tail_collected .p2align 4 L$cbc_dec_one: - movaps %xmm2,%xmm11 movups (%rcx),%xmm0 movups 16(%rcx),%xmm1 leaq 32(%rcx),%rcx @@ -2659,71 +2220,113 @@ L$oop_dec1_16: decl %eax movups (%rcx),%xmm1 leaq 16(%rcx),%rcx - jnz L$oop_dec1_16 + jnz L$oop_dec1_16 .byte 102,15,56,223,209 - xorps %xmm10,%xmm2 - movaps %xmm11,%xmm10 + xorps %xmm9,%xmm2 + movaps %xmm8,%xmm9 + subq $16,%rdx jmp L$cbc_dec_tail_collected .p2align 4 L$cbc_dec_two: - movaps %xmm3,%xmm12 xorps %xmm4,%xmm4 call _aesni_decrypt3 - pxor %xmm10,%xmm2 - movaps %xmm12,%xmm10 - pxor %xmm11,%xmm3 - movdqu %xmm2,(%rsi) - movdqa %xmm3,%xmm2 + xorps %xmm9,%xmm2 + xorps %xmm8,%xmm3 + movups %xmm2,(%rsi) + movaps %xmm7,%xmm9 + movaps %xmm3,%xmm2 leaq 16(%rsi),%rsi + subq $32,%rdx jmp L$cbc_dec_tail_collected .p2align 4 L$cbc_dec_three: - movaps %xmm4,%xmm13 call _aesni_decrypt3 - pxor %xmm10,%xmm2 - movaps %xmm13,%xmm10 - pxor %xmm11,%xmm3 - movdqu %xmm2,(%rsi) - pxor %xmm12,%xmm4 - movdqu %xmm3,16(%rsi) - movdqa %xmm4,%xmm2 + xorps %xmm9,%xmm2 + xorps %xmm8,%xmm3 + movups %xmm2,(%rsi) + xorps %xmm7,%xmm4 + movups %xmm3,16(%rsi) + movaps %xmm6,%xmm9 + movaps %xmm4,%xmm2 leaq 32(%rsi),%rsi + subq $48,%rdx jmp L$cbc_dec_tail_collected .p2align 4 L$cbc_dec_four: - movaps %xmm5,%xmm14 call _aesni_decrypt4 - pxor %xmm10,%xmm2 - movaps %xmm14,%xmm10 - pxor %xmm11,%xmm3 - movdqu %xmm2,(%rsi) - pxor %xmm12,%xmm4 - movdqu %xmm3,16(%rsi) - pxor %xmm13,%xmm5 - movdqu %xmm4,32(%rsi) - movdqa %xmm5,%xmm2 + xorps %xmm9,%xmm2 + movups 48(%rdi),%xmm9 + xorps %xmm8,%xmm3 + movups %xmm2,(%rsi) + xorps %xmm7,%xmm4 + movups %xmm3,16(%rsi) + xorps %xmm6,%xmm5 + movups %xmm4,32(%rsi) + movaps %xmm5,%xmm2 leaq 48(%rsi),%rsi + subq $64,%rdx + jmp L$cbc_dec_tail_collected +.p2align 4 +L$cbc_dec_five: + xorps %xmm7,%xmm7 + call _aesni_decrypt6 + movups 16(%rdi),%xmm1 + movups 32(%rdi),%xmm0 + xorps %xmm9,%xmm2 + xorps %xmm8,%xmm3 + xorps %xmm1,%xmm4 + movups 48(%rdi),%xmm1 + xorps %xmm0,%xmm5 + movups 64(%rdi),%xmm9 + xorps %xmm1,%xmm6 + movups %xmm2,(%rsi) + movups %xmm3,16(%rsi) + movups %xmm4,32(%rsi) + movups %xmm5,48(%rsi) + leaq 64(%rsi),%rsi + movaps %xmm6,%xmm2 + subq $80,%rdx + jmp L$cbc_dec_tail_collected +.p2align 4 +L$cbc_dec_six: + call _aesni_decrypt6 + movups 16(%rdi),%xmm1 + movups 32(%rdi),%xmm0 + xorps %xmm9,%xmm2 + xorps %xmm8,%xmm3 + xorps %xmm1,%xmm4 + movups 48(%rdi),%xmm1 + xorps %xmm0,%xmm5 + movups 64(%rdi),%xmm0 + xorps %xmm1,%xmm6 + movups 80(%rdi),%xmm9 + xorps %xmm0,%xmm7 + movups %xmm2,(%rsi) + movups %xmm3,16(%rsi) + movups %xmm4,32(%rsi) + movups %xmm5,48(%rsi) + movups %xmm6,64(%rsi) + leaq 80(%rsi),%rsi + movaps %xmm7,%xmm2 + subq $96,%rdx jmp L$cbc_dec_tail_collected - .p2align 4 L$cbc_dec_tail_collected: - movups %xmm10,(%r8) andq $15,%rdx + movups %xmm9,(%r8) jnz L$cbc_dec_tail_partial movups %xmm2,(%rsi) jmp L$cbc_dec_ret .p2align 4 L$cbc_dec_tail_partial: - movaps %xmm2,(%rsp) + movaps %xmm2,-24(%rsp) movq $16,%rcx movq %rsi,%rdi subq %rdx,%rcx - leaq (%rsp),%rsi -.long 0x9066A4F3 + leaq -24(%rsp),%rsi +.long 0x9066A4F3 L$cbc_dec_ret: - leaq (%rbp),%rsp - popq %rbp L$cbc_ret: .byte 0xf3,0xc3 @@ -2731,7 +2334,7 @@ L$cbc_ret: .p2align 4 _aesni_set_decrypt_key: -.byte 0x48,0x83,0xEC,0x08 +.byte 0x48,0x83,0xEC,0x08 call __aesni_set_encrypt_key shll $4,%esi testl %eax,%eax @@ -2770,7 +2373,7 @@ L$SEH_end_set_decrypt_key: .p2align 4 _aesni_set_encrypt_key: __aesni_set_encrypt_key: -.byte 0x48,0x83,0xEC,0x08 +.byte 0x48,0x83,0xEC,0x08 movq $-1,%rax testq %rdi,%rdi jz L$enc_key_ret @@ -2966,10 +2569,7 @@ L$increment64: .long 1,0,0,0 L$xts_magic: .long 0x87,0,1,0 -L$increment1: -.byte 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1 .byte 65,69,83,32,102,111,114,32,73,110,116,101,108,32,65,69,83,45,78,73,44,32,67,82,89,80,84,79,71,65,77,83,32,98,121,32,60,97,112,112,114,111,64,111,112,101,110,115,115,108,46,111,114,103,62,0 .p2align 6 -.section .note.GNU-stack,"",%progbits diff --git a/lib/accelerated/x86/macosx/cpuid-x86.s b/lib/accelerated/x86/macosx/cpuid-x86.s index 874dca497d..beacbd8525 100644 --- a/lib/accelerated/x86/macosx/cpuid-x86.s +++ b/lib/accelerated/x86/macosx/cpuid-x86.s @@ -68,4 +68,3 @@ L_gnutls_have_cpuid_begin: ret .byte 67,80,85,73,68,32,102,111,114,32,120,56,54,0 -.section .note.GNU-stack,"",%progbits diff --git a/lib/accelerated/x86/macosx/cpuid-x86_64.s b/lib/accelerated/x86/macosx/cpuid-x86_64.s index e98a630f57..339883ff3d 100644 --- a/lib/accelerated/x86/macosx/cpuid-x86_64.s +++ b/lib/accelerated/x86/macosx/cpuid-x86_64.s @@ -56,4 +56,3 @@ _gnutls_cpuid: .byte 0xf3,0xc3 -.section .note.GNU-stack,"",%progbits diff --git a/lib/accelerated/x86/macosx/e_padlock-x86.s b/lib/accelerated/x86/macosx/e_padlock-x86.s index 049c730287..3ba69bd352 100644 --- a/lib/accelerated/x86/macosx/e_padlock-x86.s +++ b/lib/accelerated/x86/macosx/e_padlock-x86.s @@ -174,14 +174,16 @@ L005ecb_pic_point: leal 16(%edx),%edx xorl %eax,%eax xorl %ebx,%ebx + cmpl $128,%ecx + jbe L006ecb_short testl $32,(%edx) - jnz L006ecb_aligned + jnz L007ecb_aligned testl $15,%edi setz %al testl $15,%esi setz %bl testl %ebx,%eax - jnz L006ecb_aligned + jnz L007ecb_aligned negl %eax movl $512,%ebx notl %eax @@ -193,28 +195,10 @@ L005ecb_pic_point: negl %eax andl $511,%ebx leal (%eax,%ebp,1),%esp - movl $512,%eax - cmovzl %eax,%ebx - movl %ebp,%eax - andl $-16,%ebp andl $-16,%esp - movl %eax,16(%ebp) - cmpl %ebx,%ecx - ja L007ecb_loop - movl %esi,%eax - cmpl %esp,%ebp - cmovel %edi,%eax - addl %ecx,%eax - negl %eax - andl $4095,%eax - cmpl $128,%eax - movl $-128,%eax - cmovael %ebx,%eax - andl %eax,%ebx - jz L008ecb_unaligned_tail - jmp L007ecb_loop + jmp L008ecb_loop .align 4,0x90 -L007ecb_loop: +L008ecb_loop: movl %edi,(%ebp) movl %esi,4(%ebp) movl %ecx,8(%ebp) @@ -239,8 +223,8 @@ L009ecb_inp_aligned: testl $15,%edi jz L010ecb_out_aligned movl %ebx,%ecx - leal (%esp),%esi shrl $2,%ecx + leal (%esp),%esi .byte 243,165 subl %ebx,%edi L010ecb_out_aligned: @@ -250,75 +234,43 @@ L010ecb_out_aligned: addl %ebx,%esi subl %ebx,%ecx movl $512,%ebx - jz L011ecb_break - cmpl %ebx,%ecx - jae L007ecb_loop -L008ecb_unaligned_tail: - xorl %eax,%eax + jnz L008ecb_loop cmpl %ebp,%esp - cmovel %ecx,%eax - subl %eax,%esp - movl %edi,%eax - movl %ecx,%ebx - shrl $2,%ecx - leal (%esp),%edi -.byte 243,165 - movl %esp,%esi - movl %eax,%edi - movl %ebx,%ecx - jmp L007ecb_loop -.align 4,0x90 -L011ecb_break: - cmpl %ebp,%esp - je L012ecb_done + je L011ecb_done pxor %xmm0,%xmm0 leal (%esp),%eax -L013ecb_bzero: +L012ecb_bzero: movaps %xmm0,(%eax) leal 16(%eax),%eax cmpl %eax,%ebp - ja L013ecb_bzero -L012ecb_done: - movl 16(%ebp),%ebp + ja L012ecb_bzero +L011ecb_done: leal 24(%ebp),%esp - jmp L014ecb_exit + jmp L013ecb_exit .align 4,0x90 -L006ecb_aligned: - leal (%esi,%ecx,1),%ebp - negl %ebp - andl $4095,%ebp +L006ecb_short: xorl %eax,%eax - cmpl $128,%ebp - movl $127,%ebp - cmovael %eax,%ebp - andl %ecx,%ebp - subl %ebp,%ecx - jz L015ecb_aligned_tail + leal -24(%esp),%ebp + subl %ecx,%eax + leal (%eax,%ebp,1),%esp + andl $-16,%esp + xorl %ebx,%ebx +L014ecb_short_copy: + movups (%esi,%ebx,1),%xmm0 + leal 16(%ebx),%ebx + cmpl %ebx,%ecx + movaps %xmm0,-16(%esp,%ebx,1) + ja L014ecb_short_copy + movl %esp,%esi + movl %ecx,%ebx + jmp L008ecb_loop +.align 4,0x90 +L007ecb_aligned: leal -16(%edx),%eax leal 16(%edx),%ebx shrl $4,%ecx .byte 243,15,167,200 - testl %ebp,%ebp - jz L014ecb_exit -L015ecb_aligned_tail: - movl %ebp,%ecx - leal -24(%esp),%ebp - movl %ebp,%esp - movl %ebp,%eax - subl %ecx,%esp - andl $-16,%ebp - andl $-16,%esp - movl %eax,16(%ebp) - movl %edi,%eax - movl %ecx,%ebx - shrl $2,%ecx - leal (%esp),%edi -.byte 243,165 - movl %esp,%esi - movl %eax,%edi - movl %ebx,%ecx - jmp L007ecb_loop -L014ecb_exit: +L013ecb_exit: movl $1,%eax leal 4(%esp),%esp L004ecb_abort: @@ -340,17 +292,19 @@ L_padlock_cbc_encrypt_begin: movl 28(%esp),%edx movl 32(%esp),%ecx testl $15,%edx - jnz L016cbc_abort + jnz L015cbc_abort testl $15,%ecx - jnz L016cbc_abort - leal Lpadlock_saved_context-L017cbc_pic_point,%eax + jnz L015cbc_abort + leal Lpadlock_saved_context-L016cbc_pic_point,%eax pushfl cld call __padlock_verify_ctx -L017cbc_pic_point: +L016cbc_pic_point: leal 16(%edx),%edx xorl %eax,%eax xorl %ebx,%ebx + cmpl $64,%ecx + jbe L017cbc_short testl $32,(%edx) jnz L018cbc_aligned testl $15,%edi @@ -370,25 +324,7 @@ L017cbc_pic_point: negl %eax andl $511,%ebx leal (%eax,%ebp,1),%esp - movl $512,%eax - cmovzl %eax,%ebx - movl %ebp,%eax - andl $-16,%ebp andl $-16,%esp - movl %eax,16(%ebp) - cmpl %ebx,%ecx - ja L019cbc_loop - movl %esi,%eax - cmpl %esp,%ebp - cmovel %edi,%eax - addl %ecx,%eax - negl %eax - andl $4095,%eax - cmpl $64,%eax - movl $-64,%eax - cmovael %ebx,%eax - andl %eax,%ebx - jz L020cbc_unaligned_tail jmp L019cbc_loop .align 4,0x90 L019cbc_loop: @@ -400,13 +336,13 @@ L019cbc_loop: testl $15,%edi cmovnzl %esp,%edi testl $15,%esi - jz L021cbc_inp_aligned + jz L020cbc_inp_aligned shrl $2,%ecx .byte 243,165 subl %ebx,%edi movl %ebx,%ecx movl %edi,%esi -L021cbc_inp_aligned: +L020cbc_inp_aligned: leal -16(%edx),%eax leal 16(%edx),%ebx shrl $4,%ecx @@ -416,93 +352,61 @@ L021cbc_inp_aligned: movl (%ebp),%edi movl 12(%ebp),%ebx testl $15,%edi - jz L022cbc_out_aligned + jz L021cbc_out_aligned movl %ebx,%ecx - leal (%esp),%esi shrl $2,%ecx + leal (%esp),%esi .byte 243,165 subl %ebx,%edi -L022cbc_out_aligned: +L021cbc_out_aligned: movl 4(%ebp),%esi movl 8(%ebp),%ecx addl %ebx,%edi addl %ebx,%esi subl %ebx,%ecx movl $512,%ebx - jz L023cbc_break - cmpl %ebx,%ecx - jae L019cbc_loop -L020cbc_unaligned_tail: - xorl %eax,%eax - cmpl %ebp,%esp - cmovel %ecx,%eax - subl %eax,%esp - movl %edi,%eax - movl %ecx,%ebx - shrl $2,%ecx - leal (%esp),%edi -.byte 243,165 - movl %esp,%esi - movl %eax,%edi - movl %ebx,%ecx - jmp L019cbc_loop -.align 4,0x90 -L023cbc_break: + jnz L019cbc_loop cmpl %ebp,%esp - je L024cbc_done + je L022cbc_done pxor %xmm0,%xmm0 leal (%esp),%eax -L025cbc_bzero: +L023cbc_bzero: movaps %xmm0,(%eax) leal 16(%eax),%eax cmpl %eax,%ebp - ja L025cbc_bzero -L024cbc_done: - movl 16(%ebp),%ebp + ja L023cbc_bzero +L022cbc_done: leal 24(%ebp),%esp - jmp L026cbc_exit + jmp L024cbc_exit .align 4,0x90 -L018cbc_aligned: - leal (%esi,%ecx,1),%ebp - negl %ebp - andl $4095,%ebp +L017cbc_short: xorl %eax,%eax - cmpl $64,%ebp - movl $63,%ebp - cmovael %eax,%ebp - andl %ecx,%ebp - subl %ebp,%ecx - jz L027cbc_aligned_tail + leal -24(%esp),%ebp + subl %ecx,%eax + leal (%eax,%ebp,1),%esp + andl $-16,%esp + xorl %ebx,%ebx +L025cbc_short_copy: + movups (%esi,%ebx,1),%xmm0 + leal 16(%ebx),%ebx + cmpl %ebx,%ecx + movaps %xmm0,-16(%esp,%ebx,1) + ja L025cbc_short_copy + movl %esp,%esi + movl %ecx,%ebx + jmp L019cbc_loop +.align 4,0x90 +L018cbc_aligned: leal -16(%edx),%eax leal 16(%edx),%ebx shrl $4,%ecx .byte 243,15,167,208 movaps (%eax),%xmm0 movaps %xmm0,-16(%edx) - testl %ebp,%ebp - jz L026cbc_exit -L027cbc_aligned_tail: - movl %ebp,%ecx - leal -24(%esp),%ebp - movl %ebp,%esp - movl %ebp,%eax - subl %ecx,%esp - andl $-16,%ebp - andl $-16,%esp - movl %eax,16(%ebp) - movl %edi,%eax - movl %ecx,%ebx - shrl $2,%ecx - leal (%esp),%edi -.byte 243,165 - movl %esp,%esi - movl %eax,%edi - movl %ebx,%ecx - jmp L019cbc_loop -L026cbc_exit: +L024cbc_exit: movl $1,%eax leal 4(%esp),%esp -L016cbc_abort: +L015cbc_abort: popl %edi popl %esi popl %ebx @@ -521,25 +425,25 @@ L_padlock_cfb_encrypt_begin: movl 28(%esp),%edx movl 32(%esp),%ecx testl $15,%edx - jnz L028cfb_abort + jnz L026cfb_abort testl $15,%ecx - jnz L028cfb_abort - leal Lpadlock_saved_context-L029cfb_pic_point,%eax + jnz L026cfb_abort + leal Lpadlock_saved_context-L027cfb_pic_point,%eax pushfl cld call __padlock_verify_ctx -L029cfb_pic_point: +L027cfb_pic_point: leal 16(%edx),%edx xorl %eax,%eax xorl %ebx,%ebx testl $32,(%edx) - jnz L030cfb_aligned + jnz L028cfb_aligned testl $15,%edi setz %al testl $15,%esi setz %bl testl %ebx,%eax - jnz L030cfb_aligned + jnz L028cfb_aligned negl %eax movl $512,%ebx notl %eax @@ -551,15 +455,10 @@ L029cfb_pic_point: negl %eax andl $511,%ebx leal (%eax,%ebp,1),%esp - movl $512,%eax - cmovzl %eax,%ebx - movl %ebp,%eax - andl $-16,%ebp andl $-16,%esp - movl %eax,16(%ebp) - jmp L031cfb_loop + jmp L029cfb_loop .align 4,0x90 -L031cfb_loop: +L029cfb_loop: movl %edi,(%ebp) movl %esi,4(%ebp) movl %ecx,8(%ebp) @@ -568,13 +467,13 @@ L031cfb_loop: testl $15,%edi cmovnzl %esp,%edi testl $15,%esi - jz L032cfb_inp_aligned + jz L030cfb_inp_aligned shrl $2,%ecx .byte 243,165 subl %ebx,%edi movl %ebx,%ecx movl %edi,%esi -L032cfb_inp_aligned: +L030cfb_inp_aligned: leal -16(%edx),%eax leal 16(%edx),%ebx shrl $4,%ecx @@ -584,45 +483,61 @@ L032cfb_inp_aligned: movl (%ebp),%edi movl 12(%ebp),%ebx testl $15,%edi - jz L033cfb_out_aligned + jz L031cfb_out_aligned movl %ebx,%ecx - leal (%esp),%esi shrl $2,%ecx + leal (%esp),%esi .byte 243,165 subl %ebx,%edi -L033cfb_out_aligned: +L031cfb_out_aligned: movl 4(%ebp),%esi movl 8(%ebp),%ecx addl %ebx,%edi addl %ebx,%esi subl %ebx,%ecx movl $512,%ebx - jnz L031cfb_loop + jnz L029cfb_loop cmpl %ebp,%esp - je L034cfb_done + je L032cfb_done pxor %xmm0,%xmm0 leal (%esp),%eax -L035cfb_bzero: +L033cfb_bzero: movaps %xmm0,(%eax) leal 16(%eax),%eax cmpl %eax,%ebp - ja L035cfb_bzero -L034cfb_done: - movl 16(%ebp),%ebp + ja L033cfb_bzero +L032cfb_done: leal 24(%ebp),%esp - jmp L036cfb_exit + jmp L034cfb_exit .align 4,0x90 -L030cfb_aligned: +L035cfb_short: + xorl %eax,%eax + leal -24(%esp),%ebp + subl %ecx,%eax + leal (%eax,%ebp,1),%esp + andl $-16,%esp + xorl %ebx,%ebx +L036cfb_short_copy: + movups (%esi,%ebx,1),%xmm0 + leal 16(%ebx),%ebx + cmpl %ebx,%ecx + movaps %xmm0,-16(%esp,%ebx,1) + ja L036cfb_short_copy + movl %esp,%esi + movl %ecx,%ebx + jmp L029cfb_loop +.align 4,0x90 +L028cfb_aligned: leal -16(%edx),%eax leal 16(%edx),%ebx shrl $4,%ecx .byte 243,15,167,224 movaps (%eax),%xmm0 movaps %xmm0,-16(%edx) -L036cfb_exit: +L034cfb_exit: movl $1,%eax leal 4(%esp),%esp -L028cfb_abort: +L026cfb_abort: popl %edi popl %esi popl %ebx @@ -671,12 +586,7 @@ L038ofb_pic_point: negl %eax andl $511,%ebx leal (%eax,%ebp,1),%esp - movl $512,%eax - cmovzl %eax,%ebx - movl %ebp,%eax - andl $-16,%ebp andl $-16,%esp - movl %eax,16(%ebp) jmp L040ofb_loop .align 4,0x90 L040ofb_loop: @@ -706,8 +616,8 @@ L041ofb_inp_aligned: testl $15,%edi jz L042ofb_out_aligned movl %ebx,%ecx - leal (%esp),%esi shrl $2,%ecx + leal (%esp),%esi .byte 243,165 subl %ebx,%edi L042ofb_out_aligned: @@ -728,10 +638,26 @@ L044ofb_bzero: cmpl %eax,%ebp ja L044ofb_bzero L043ofb_done: - movl 16(%ebp),%ebp leal 24(%ebp),%esp jmp L045ofb_exit .align 4,0x90 +L046ofb_short: + xorl %eax,%eax + leal -24(%esp),%ebp + subl %ecx,%eax + leal (%eax,%ebp,1),%esp + andl $-16,%esp + xorl %ebx,%ebx +L047ofb_short_copy: + movups (%esi,%ebx,1),%xmm0 + leal 16(%ebx),%ebx + cmpl %ebx,%ecx + movaps %xmm0,-16(%esp,%ebx,1) + ja L047ofb_short_copy + movl %esp,%esi + movl %ecx,%ebx + jmp L040ofb_loop +.align 4,0x90 L039ofb_aligned: leal -16(%edx),%eax leal 16(%edx),%ebx @@ -761,14 +687,14 @@ L_padlock_ctr32_encrypt_begin: movl 28(%esp),%edx movl 32(%esp),%ecx testl $15,%edx - jnz L046ctr32_abort + jnz L048ctr32_abort testl $15,%ecx - jnz L046ctr32_abort - leal Lpadlock_saved_context-L047ctr32_pic_point,%eax + jnz L048ctr32_abort + leal Lpadlock_saved_context-L049ctr32_pic_point,%eax pushfl cld call __padlock_verify_ctx -L047ctr32_pic_point: +L049ctr32_pic_point: leal 16(%edx),%edx xorl %eax,%eax movq -16(%edx),%mm0 @@ -782,15 +708,10 @@ L047ctr32_pic_point: negl %eax andl $511,%ebx leal (%eax,%ebp,1),%esp - movl $512,%eax - cmovzl %eax,%ebx - movl %ebp,%eax - andl $-16,%ebp andl $-16,%esp - movl %eax,16(%ebp) - jmp L048ctr32_loop + jmp L050ctr32_loop .align 4,0x90 -L048ctr32_loop: +L050ctr32_loop: movl %edi,(%ebp) movl %esi,4(%ebp) movl %ecx,8(%ebp) @@ -799,7 +720,7 @@ L048ctr32_loop: movl -4(%edx),%ecx xorl %edi,%edi movl -8(%edx),%eax -L049ctr32_prepare: +L051ctr32_prepare: movl %ecx,12(%esp,%edi,1) bswap %ecx movq %mm0,(%esp,%edi,1) @@ -808,7 +729,7 @@ L049ctr32_prepare: bswap %ecx leal 16(%edi),%edi cmpl %ebx,%edi - jb L049ctr32_prepare + jb L051ctr32_prepare movl %ecx,-4(%edx) leal (%esp),%esi leal (%esp),%edi @@ -821,33 +742,32 @@ L049ctr32_prepare: movl 12(%ebp),%ebx movl 4(%ebp),%esi xorl %ecx,%ecx -L050ctr32_xor: +L052ctr32_xor: movups (%esi,%ecx,1),%xmm1 leal 16(%ecx),%ecx pxor -16(%esp,%ecx,1),%xmm1 movups %xmm1,-16(%edi,%ecx,1) cmpl %ebx,%ecx - jb L050ctr32_xor + jb L052ctr32_xor movl 8(%ebp),%ecx addl %ebx,%edi addl %ebx,%esi subl %ebx,%ecx movl $512,%ebx - jnz L048ctr32_loop + jnz L050ctr32_loop pxor %xmm0,%xmm0 leal (%esp),%eax -L051ctr32_bzero: +L053ctr32_bzero: movaps %xmm0,(%eax) leal 16(%eax),%eax cmpl %eax,%ebp - ja L051ctr32_bzero -L052ctr32_done: - movl 16(%ebp),%ebp + ja L053ctr32_bzero +L054ctr32_done: leal 24(%ebp),%esp movl $1,%eax leal 4(%esp),%esp emms -L046ctr32_abort: +L048ctr32_abort: popl %edi popl %esi popl %ebx @@ -869,10 +789,10 @@ __win32_segv_handler: movl 4(%esp),%edx movl 12(%esp),%ecx cmpl $3221225477,(%edx) - jne L053ret + jne L055ret addl $4,184(%ecx) movl $0,%eax -L053ret: +L055ret: ret .globl _padlock_sha1_oneshot .align 4 @@ -1033,4 +953,3 @@ L_padlock_sha512_blocks_begin: Lpadlock_saved_context: .long 0 -.section .note.GNU-stack,"",%progbits diff --git a/lib/accelerated/x86/macosx/e_padlock-x86_64.s b/lib/accelerated/x86/macosx/e_padlock-x86_64.s index d34e3bc57b..00c2bdaac0 100644 --- a/lib/accelerated/x86/macosx/e_padlock-x86_64.s +++ b/lib/accelerated/x86/macosx/e_padlock-x86_64.s @@ -127,7 +127,7 @@ _padlock_aes_block: movq $1,%rcx leaq 32(%rdx),%rbx leaq 16(%rdx),%rdx -.byte 0xf3,0x0f,0xa7,0xc8 +.byte 0xf3,0x0f,0xa7,0xc8 movq %r8,%rbx .byte 0xf3,0xc3 @@ -137,7 +137,7 @@ _padlock_aes_block: .p2align 4 _padlock_xstore: movl %esi,%edx -.byte 0x0f,0xa7,0xc0 +.byte 0x0f,0xa7,0xc0 .byte 0xf3,0xc3 @@ -154,7 +154,7 @@ _padlock_sha1_oneshot: movq %rsp,%rdi movl %eax,16(%rsp) xorq %rax,%rax -.byte 0xf3,0x0f,0xa6,0xc8 +.byte 0xf3,0x0f,0xa6,0xc8 movaps (%rsp),%xmm0 movl 16(%rsp),%eax addq $128+8,%rsp @@ -176,7 +176,7 @@ _padlock_sha1_blocks: movq %rsp,%rdi movl %eax,16(%rsp) movq $-1,%rax -.byte 0xf3,0x0f,0xa6,0xc8 +.byte 0xf3,0x0f,0xa6,0xc8 movaps (%rsp),%xmm0 movl 16(%rsp),%eax addq $128+8,%rsp @@ -198,7 +198,7 @@ _padlock_sha256_oneshot: movq %rsp,%rdi movaps %xmm1,16(%rsp) xorq %rax,%rax -.byte 0xf3,0x0f,0xa6,0xd0 +.byte 0xf3,0x0f,0xa6,0xd0 movaps (%rsp),%xmm0 movaps 16(%rsp),%xmm1 addq $128+8,%rsp @@ -220,7 +220,7 @@ _padlock_sha256_blocks: movq %rsp,%rdi movaps %xmm1,16(%rsp) movq $-1,%rax -.byte 0xf3,0x0f,0xa6,0xd0 +.byte 0xf3,0x0f,0xa6,0xd0 movaps (%rsp),%xmm0 movaps 16(%rsp),%xmm1 addq $128+8,%rsp @@ -245,7 +245,7 @@ _padlock_sha512_blocks: movaps %xmm1,16(%rsp) movaps %xmm2,32(%rsp) movaps %xmm3,48(%rsp) -.byte 0xf3,0x0f,0xa6,0xe0 +.byte 0xf3,0x0f,0xa6,0xe0 movaps (%rsp),%xmm0 movaps 16(%rsp),%xmm1 movaps 32(%rsp),%xmm2 @@ -276,6 +276,8 @@ _padlock_ecb_encrypt: leaq 16(%rdx),%rdx xorl %eax,%eax xorl %ebx,%ebx + cmpq $128,%rcx + jbe L$ecb_short testl $32,(%rdx) jnz L$ecb_aligned testq $15,%rdi @@ -295,21 +297,6 @@ _padlock_ecb_encrypt: negq %rax andq $512-1,%rbx leaq (%rax,%rbp,1),%rsp - movq $512,%rax - cmovzq %rax,%rbx - cmpq %rbx,%rcx - ja L$ecb_loop - movq %rsi,%rax - cmpq %rsp,%rbp - cmoveq %rdi,%rax - addq %rcx,%rax - negq %rax - andq $4095,%rax - cmpq $128,%rax - movq $-128,%rax - cmovaeq %rbx,%rax - andq %rax,%rbx - jz L$ecb_unaligned_tail jmp L$ecb_loop .p2align 4 L$ecb_loop: @@ -325,7 +312,7 @@ L$ecb_loop: testq $15,%rsi jz L$ecb_inp_aligned shrq $3,%rcx -.byte 0xf3,0x48,0xa5 +.byte 0xf3,0x48,0xa5 subq %rbx,%rdi movq %rbx,%rcx movq %rdi,%rsi @@ -333,15 +320,15 @@ L$ecb_inp_aligned: leaq -16(%rdx),%rax leaq 16(%rdx),%rbx shrq $4,%rcx -.byte 0xf3,0x0f,0xa7,200 +.byte 0xf3,0x0f,0xa7,200 movq %r8,%rdi movq %r11,%rbx testq $15,%rdi jz L$ecb_out_aligned movq %rbx,%rcx - leaq (%rsp),%rsi shrq $3,%rcx -.byte 0xf3,0x48,0xa5 + leaq (%rsp),%rsi +.byte 0xf3,0x48,0xa5 subq %rbx,%rdi L$ecb_out_aligned: movq %r9,%rsi @@ -350,26 +337,9 @@ L$ecb_out_aligned: addq %rbx,%rsi subq %rbx,%rcx movq $512,%rbx - jz L$ecb_break - cmpq %rbx,%rcx - jae L$ecb_loop -L$ecb_unaligned_tail: - xorl %eax,%eax + jnz L$ecb_loop + cmpq %rsp,%rbp - cmoveq %rcx,%rax - movq %rdi,%r8 - movq %rcx,%rbx - subq %rax,%rsp - shrq $3,%rcx - leaq (%rsp),%rdi -.byte 0xf3,0x48,0xa5 - movq %rsp,%rsi - movq %r8,%rdi - movq %rbx,%rcx - jmp L$ecb_loop -.p2align 4 -L$ecb_break: - cmpq %rbp,%rsp je L$ecb_done pxor %xmm0,%xmm0 @@ -383,39 +353,26 @@ L$ecb_bzero: L$ecb_done: leaq (%rbp),%rsp jmp L$ecb_exit - +.p2align 4 +L$ecb_short: + movq %rsp,%rbp + subq %rcx,%rsp + xorq %rbx,%rbx +L$ecb_short_copy: + movups (%rsi,%rbx,1),%xmm0 + leaq 16(%rbx),%rbx + cmpq %rbx,%rcx + movaps %xmm0,-16(%rsp,%rbx,1) + ja L$ecb_short_copy + movq %rsp,%rsi + movq %rcx,%rbx + jmp L$ecb_loop .p2align 4 L$ecb_aligned: - leaq (%rsi,%rcx,1),%rbp - negq %rbp - andq $4095,%rbp - xorl %eax,%eax - cmpq $128,%rbp - movq $128-1,%rbp - cmovaeq %rax,%rbp - andq %rcx,%rbp - subq %rbp,%rcx - jz L$ecb_aligned_tail leaq -16(%rdx),%rax leaq 16(%rdx),%rbx shrq $4,%rcx -.byte 0xf3,0x0f,0xa7,200 - testq %rbp,%rbp - jz L$ecb_exit - -L$ecb_aligned_tail: - movq %rdi,%r8 - movq %rbp,%rbx - movq %rbp,%rcx - leaq (%rsp),%rbp - subq %rcx,%rsp - shrq $3,%rcx - leaq (%rsp),%rdi -.byte 0xf3,0x48,0xa5 - leaq (%r8),%rdi - leaq (%rsp),%rsi - movq %rbx,%rcx - jmp L$ecb_loop +.byte 0xf3,0x0f,0xa7,200 L$ecb_exit: movl $1,%eax leaq 8(%rsp),%rsp @@ -443,6 +400,8 @@ _padlock_cbc_encrypt: leaq 16(%rdx),%rdx xorl %eax,%eax xorl %ebx,%ebx + cmpq $64,%rcx + jbe L$cbc_short testl $32,(%rdx) jnz L$cbc_aligned testq $15,%rdi @@ -462,21 +421,6 @@ _padlock_cbc_encrypt: negq %rax andq $512-1,%rbx leaq (%rax,%rbp,1),%rsp - movq $512,%rax - cmovzq %rax,%rbx - cmpq %rbx,%rcx - ja L$cbc_loop - movq %rsi,%rax - cmpq %rsp,%rbp - cmoveq %rdi,%rax - addq %rcx,%rax - negq %rax - andq $4095,%rax - cmpq $64,%rax - movq $-64,%rax - cmovaeq %rbx,%rax - andq %rax,%rbx - jz L$cbc_unaligned_tail jmp L$cbc_loop .p2align 4 L$cbc_loop: @@ -492,7 +436,7 @@ L$cbc_loop: testq $15,%rsi jz L$cbc_inp_aligned shrq $3,%rcx -.byte 0xf3,0x48,0xa5 +.byte 0xf3,0x48,0xa5 subq %rbx,%rdi movq %rbx,%rcx movq %rdi,%rsi @@ -500,7 +444,7 @@ L$cbc_inp_aligned: leaq -16(%rdx),%rax leaq 16(%rdx),%rbx shrq $4,%rcx -.byte 0xf3,0x0f,0xa7,208 +.byte 0xf3,0x0f,0xa7,208 movdqa (%rax),%xmm0 movdqa %xmm0,-16(%rdx) movq %r8,%rdi @@ -508,9 +452,9 @@ L$cbc_inp_aligned: testq $15,%rdi jz L$cbc_out_aligned movq %rbx,%rcx - leaq (%rsp),%rsi shrq $3,%rcx -.byte 0xf3,0x48,0xa5 + leaq (%rsp),%rsi +.byte 0xf3,0x48,0xa5 subq %rbx,%rdi L$cbc_out_aligned: movq %r9,%rsi @@ -519,26 +463,9 @@ L$cbc_out_aligned: addq %rbx,%rsi subq %rbx,%rcx movq $512,%rbx - jz L$cbc_break - cmpq %rbx,%rcx - jae L$cbc_loop -L$cbc_unaligned_tail: - xorl %eax,%eax + jnz L$cbc_loop + cmpq %rsp,%rbp - cmoveq %rcx,%rax - movq %rdi,%r8 - movq %rcx,%rbx - subq %rax,%rsp - shrq $3,%rcx - leaq (%rsp),%rdi -.byte 0xf3,0x48,0xa5 - movq %rsp,%rsi - movq %r8,%rdi - movq %rbx,%rcx - jmp L$cbc_loop -.p2align 4 -L$cbc_break: - cmpq %rbp,%rsp je L$cbc_done pxor %xmm0,%xmm0 @@ -552,41 +479,28 @@ L$cbc_bzero: L$cbc_done: leaq (%rbp),%rsp jmp L$cbc_exit - +.p2align 4 +L$cbc_short: + movq %rsp,%rbp + subq %rcx,%rsp + xorq %rbx,%rbx +L$cbc_short_copy: + movups (%rsi,%rbx,1),%xmm0 + leaq 16(%rbx),%rbx + cmpq %rbx,%rcx + movaps %xmm0,-16(%rsp,%rbx,1) + ja L$cbc_short_copy + movq %rsp,%rsi + movq %rcx,%rbx + jmp L$cbc_loop .p2align 4 L$cbc_aligned: - leaq (%rsi,%rcx,1),%rbp - negq %rbp - andq $4095,%rbp - xorl %eax,%eax - cmpq $64,%rbp - movq $64-1,%rbp - cmovaeq %rax,%rbp - andq %rcx,%rbp - subq %rbp,%rcx - jz L$cbc_aligned_tail leaq -16(%rdx),%rax leaq 16(%rdx),%rbx shrq $4,%rcx -.byte 0xf3,0x0f,0xa7,208 +.byte 0xf3,0x0f,0xa7,208 movdqa (%rax),%xmm0 movdqa %xmm0,-16(%rdx) - testq %rbp,%rbp - jz L$cbc_exit - -L$cbc_aligned_tail: - movq %rdi,%r8 - movq %rbp,%rbx - movq %rbp,%rcx - leaq (%rsp),%rbp - subq %rcx,%rsp - shrq $3,%rcx - leaq (%rsp),%rdi -.byte 0xf3,0x48,0xa5 - leaq (%r8),%rdi - leaq (%rsp),%rsi - movq %rbx,%rcx - jmp L$cbc_loop L$cbc_exit: movl $1,%eax leaq 8(%rsp),%rsp @@ -633,8 +547,6 @@ _padlock_cfb_encrypt: negq %rax andq $512-1,%rbx leaq (%rax,%rbp,1),%rsp - movq $512,%rax - cmovzq %rax,%rbx jmp L$cfb_loop .p2align 4 L$cfb_loop: @@ -650,7 +562,7 @@ L$cfb_loop: testq $15,%rsi jz L$cfb_inp_aligned shrq $3,%rcx -.byte 0xf3,0x48,0xa5 +.byte 0xf3,0x48,0xa5 subq %rbx,%rdi movq %rbx,%rcx movq %rdi,%rsi @@ -658,7 +570,7 @@ L$cfb_inp_aligned: leaq -16(%rdx),%rax leaq 16(%rdx),%rbx shrq $4,%rcx -.byte 0xf3,0x0f,0xa7,224 +.byte 0xf3,0x0f,0xa7,224 movdqa (%rax),%xmm0 movdqa %xmm0,-16(%rdx) movq %r8,%rdi @@ -666,9 +578,9 @@ L$cfb_inp_aligned: testq $15,%rdi jz L$cfb_out_aligned movq %rbx,%rcx - leaq (%rsp),%rsi shrq $3,%rcx -.byte 0xf3,0x48,0xa5 + leaq (%rsp),%rsi +.byte 0xf3,0x48,0xa5 subq %rbx,%rdi L$cfb_out_aligned: movq %r9,%rsi @@ -678,7 +590,8 @@ L$cfb_out_aligned: subq %rbx,%rcx movq $512,%rbx jnz L$cfb_loop - cmpq %rbp,%rsp + + cmpq %rsp,%rbp je L$cfb_done pxor %xmm0,%xmm0 @@ -692,13 +605,12 @@ L$cfb_bzero: L$cfb_done: leaq (%rbp),%rsp jmp L$cfb_exit - .p2align 4 L$cfb_aligned: leaq -16(%rdx),%rax leaq 16(%rdx),%rbx shrq $4,%rcx -.byte 0xf3,0x0f,0xa7,224 +.byte 0xf3,0x0f,0xa7,224 movdqa (%rax),%xmm0 movdqa %xmm0,-16(%rdx) L$cfb_exit: @@ -747,8 +659,6 @@ _padlock_ofb_encrypt: negq %rax andq $512-1,%rbx leaq (%rax,%rbp,1),%rsp - movq $512,%rax - cmovzq %rax,%rbx jmp L$ofb_loop .p2align 4 L$ofb_loop: @@ -764,7 +674,7 @@ L$ofb_loop: testq $15,%rsi jz L$ofb_inp_aligned shrq $3,%rcx -.byte 0xf3,0x48,0xa5 +.byte 0xf3,0x48,0xa5 subq %rbx,%rdi movq %rbx,%rcx movq %rdi,%rsi @@ -772,7 +682,7 @@ L$ofb_inp_aligned: leaq -16(%rdx),%rax leaq 16(%rdx),%rbx shrq $4,%rcx -.byte 0xf3,0x0f,0xa7,232 +.byte 0xf3,0x0f,0xa7,232 movdqa (%rax),%xmm0 movdqa %xmm0,-16(%rdx) movq %r8,%rdi @@ -780,9 +690,9 @@ L$ofb_inp_aligned: testq $15,%rdi jz L$ofb_out_aligned movq %rbx,%rcx - leaq (%rsp),%rsi shrq $3,%rcx -.byte 0xf3,0x48,0xa5 + leaq (%rsp),%rsi +.byte 0xf3,0x48,0xa5 subq %rbx,%rdi L$ofb_out_aligned: movq %r9,%rsi @@ -792,7 +702,8 @@ L$ofb_out_aligned: subq %rbx,%rcx movq $512,%rbx jnz L$ofb_loop - cmpq %rbp,%rsp + + cmpq %rsp,%rbp je L$ofb_done pxor %xmm0,%xmm0 @@ -806,13 +717,12 @@ L$ofb_bzero: L$ofb_done: leaq (%rbp),%rsp jmp L$ofb_exit - .p2align 4 L$ofb_aligned: leaq -16(%rdx),%rax leaq 16(%rdx),%rbx shrq $4,%rcx -.byte 0xf3,0x0f,0xa7,232 +.byte 0xf3,0x0f,0xa7,232 movdqa (%rax),%xmm0 movdqa %xmm0,-16(%rdx) L$ofb_exit: @@ -842,6 +752,8 @@ _padlock_ctr32_encrypt: leaq 16(%rdx),%rdx xorl %eax,%eax xorl %ebx,%ebx + cmpq $64,%rcx + jbe L$ctr32_short testl $32,(%rdx) jnz L$ctr32_aligned testq $15,%rdi @@ -861,32 +773,15 @@ _padlock_ctr32_encrypt: negq %rax andq $512-1,%rbx leaq (%rax,%rbp,1),%rsp - movq $512,%rax - cmovzq %rax,%rbx L$ctr32_reenter: movl -4(%rdx),%eax bswapl %eax negl %eax andl $31,%eax - movq $512,%rbx + jz L$ctr32_loop shll $4,%eax - cmovzq %rbx,%rax cmpq %rax,%rcx cmovaq %rax,%rbx - cmovbeq %rcx,%rbx - cmpq %rbx,%rcx - ja L$ctr32_loop - movq %rsi,%rax - cmpq %rsp,%rbp - cmoveq %rdi,%rax - addq %rcx,%rax - negq %rax - andq $4095,%rax - cmpq $32,%rax - movq $-32,%rax - cmovaeq %rbx,%rax - andq %rax,%rbx - jz L$ctr32_unaligned_tail jmp L$ctr32_loop .p2align 4 L$ctr32_loop: @@ -902,7 +797,7 @@ L$ctr32_loop: testq $15,%rsi jz L$ctr32_inp_aligned shrq $3,%rcx -.byte 0xf3,0x48,0xa5 +.byte 0xf3,0x48,0xa5 subq %rbx,%rdi movq %rbx,%rcx movq %rdi,%rsi @@ -910,23 +805,23 @@ L$ctr32_inp_aligned: leaq -16(%rdx),%rax leaq 16(%rdx),%rbx shrq $4,%rcx -.byte 0xf3,0x0f,0xa7,216 +.byte 0xf3,0x0f,0xa7,216 movl -4(%rdx),%eax testl $4294901760,%eax - jnz L$ctr32_no_carry + jnz L$ctr32_no_corr bswapl %eax addl $65536,%eax bswapl %eax movl %eax,-4(%rdx) -L$ctr32_no_carry: +L$ctr32_no_corr: movq %r8,%rdi movq %r11,%rbx testq $15,%rdi jz L$ctr32_out_aligned movq %rbx,%rcx - leaq (%rsp),%rsi shrq $3,%rcx -.byte 0xf3,0x48,0xa5 + leaq (%rsp),%rsi +.byte 0xf3,0x48,0xa5 subq %rbx,%rdi L$ctr32_out_aligned: movq %r9,%rsi @@ -935,38 +830,9 @@ L$ctr32_out_aligned: addq %rbx,%rsi subq %rbx,%rcx movq $512,%rbx - jz L$ctr32_break - cmpq %rbx,%rcx - jae L$ctr32_loop - movq %rcx,%rbx - movq %rsi,%rax - cmpq %rsp,%rbp - cmoveq %rdi,%rax - addq %rcx,%rax - negq %rax - andq $4095,%rax - cmpq $32,%rax - movq $-32,%rax - cmovaeq %rbx,%rax - andq %rax,%rbx jnz L$ctr32_loop -L$ctr32_unaligned_tail: - xorl %eax,%eax + cmpq %rsp,%rbp - cmoveq %rcx,%rax - movq %rdi,%r8 - movq %rcx,%rbx - subq %rax,%rsp - shrq $3,%rcx - leaq (%rsp),%rdi -.byte 0xf3,0x48,0xa5 - movq %rsp,%rsi - movq %r8,%rdi - movq %rbx,%rcx - jmp L$ctr32_loop -.p2align 4 -L$ctr32_break: - cmpq %rbp,%rsp je L$ctr32_done pxor %xmm0,%xmm0 @@ -980,75 +846,56 @@ L$ctr32_bzero: L$ctr32_done: leaq (%rbp),%rsp jmp L$ctr32_exit - +.p2align 4 +L$ctr32_short: + movq %rsp,%rbp + subq %rcx,%rsp + xorq %rbx,%rbx +L$ctr32_short_copy: + movups (%rsi,%rbx,1),%xmm0 + leaq 16(%rbx),%rbx + cmpq %rbx,%rcx + movaps %xmm0,-16(%rsp,%rbx,1) + ja L$ctr32_short_copy + movq %rsp,%rsi + movq %rcx,%rbx + jmp L$ctr32_reenter .p2align 4 L$ctr32_aligned: movl -4(%rdx),%eax + movq $1048576,%rbx bswapl %eax + cmpq %rcx,%rbx + cmovaq %rcx,%rbx negl %eax andl $65535,%eax - movq $1048576,%rbx + jz L$ctr32_aligned_loop shll $4,%eax - cmovzq %rbx,%rax cmpq %rax,%rcx cmovaq %rax,%rbx - cmovbeq %rcx,%rbx - jbe L$ctr32_aligned_skip - + jmp L$ctr32_aligned_loop +.p2align 4 L$ctr32_aligned_loop: + cmpq %rcx,%rbx + cmovaq %rcx,%rbx movq %rcx,%r10 movq %rbx,%rcx movq %rbx,%r11 - leaq -16(%rdx),%rax leaq 16(%rdx),%rbx shrq $4,%rcx -.byte 0xf3,0x0f,0xa7,216 - +.byte 0xf3,0x0f,0xa7,216 movl -4(%rdx),%eax bswapl %eax addl $65536,%eax bswapl %eax movl %eax,-4(%rdx) + movq %r11,%rbx movq %r10,%rcx - subq %r11,%rcx + subq %rbx,%rcx movq $1048576,%rbx - jz L$ctr32_exit - cmpq %rbx,%rcx - jae L$ctr32_aligned_loop - -L$ctr32_aligned_skip: - leaq (%rsi,%rcx,1),%rbp - negq %rbp - andq $4095,%rbp - xorl %eax,%eax - cmpq $32,%rbp - movq $32-1,%rbp - cmovaeq %rax,%rbp - andq %rcx,%rbp - subq %rbp,%rcx - jz L$ctr32_aligned_tail - leaq -16(%rdx),%rax - leaq 16(%rdx),%rbx - shrq $4,%rcx -.byte 0xf3,0x0f,0xa7,216 - testq %rbp,%rbp - jz L$ctr32_exit - -L$ctr32_aligned_tail: - movq %rdi,%r8 - movq %rbp,%rbx - movq %rbp,%rcx - leaq (%rsp),%rbp - subq %rcx,%rsp - shrq $3,%rcx - leaq (%rsp),%rdi -.byte 0xf3,0x48,0xa5 - leaq (%r8),%rdi - leaq (%rsp),%rsi - movq %rbx,%rcx - jmp L$ctr32_loop + jnz L$ctr32_aligned_loop L$ctr32_exit: movl $1,%eax leaq 8(%rsp),%rsp @@ -1064,4 +911,3 @@ L$ctr32_abort: L$padlock_saved_context: .quad 0 -.section .note.GNU-stack,"",%progbits diff --git a/lib/accelerated/x86/macosx/ghash-x86_64.s b/lib/accelerated/x86/macosx/ghash-x86_64.s index a400ade9d7..8aa7ffc04f 100644 --- a/lib/accelerated/x86/macosx/ghash-x86_64.s +++ b/lib/accelerated/x86/macosx/ghash-x86_64.s @@ -697,7 +697,6 @@ L$ghash_epilogue: .p2align 4 _gcm_init_clmul: -L$_init_clmul: movdqu (%rsi),%xmm2 pshufd $78,%xmm2,%xmm2 @@ -716,15 +715,15 @@ L$_init_clmul: pxor %xmm5,%xmm2 - pshufd $78,%xmm2,%xmm6 movdqa %xmm2,%xmm0 - pxor %xmm2,%xmm6 movdqa %xmm0,%xmm1 pshufd $78,%xmm0,%xmm3 + pshufd $78,%xmm2,%xmm4 pxor %xmm0,%xmm3 + pxor %xmm2,%xmm4 .byte 102,15,58,68,194,0 .byte 102,15,58,68,202,17 -.byte 102,15,58,68,222,0 +.byte 102,15,58,68,220,0 pxor %xmm0,%xmm3 pxor %xmm1,%xmm3 @@ -734,134 +733,44 @@ L$_init_clmul: pxor %xmm3,%xmm1 pxor %xmm4,%xmm0 - movdqa %xmm0,%xmm4 movdqa %xmm0,%xmm3 - psllq $5,%xmm0 - pxor %xmm0,%xmm3 psllq $1,%xmm0 pxor %xmm3,%xmm0 - psllq $57,%xmm0 - movdqa %xmm0,%xmm3 - pslldq $8,%xmm0 - psrldq $8,%xmm3 - pxor %xmm4,%xmm0 - pxor %xmm3,%xmm1 - - - movdqa %xmm0,%xmm4 - psrlq $1,%xmm0 - pxor %xmm4,%xmm1 - pxor %xmm0,%xmm4 - psrlq $5,%xmm0 - pxor %xmm4,%xmm0 - psrlq $1,%xmm0 - pxor %xmm1,%xmm0 - pshufd $78,%xmm2,%xmm3 - pshufd $78,%xmm0,%xmm4 - pxor %xmm2,%xmm3 - movdqu %xmm2,0(%rdi) - pxor %xmm0,%xmm4 - movdqu %xmm0,16(%rdi) -.byte 102,15,58,15,227,8 - movdqu %xmm4,32(%rdi) - movdqa %xmm0,%xmm1 - pshufd $78,%xmm0,%xmm3 - pxor %xmm0,%xmm3 -.byte 102,15,58,68,194,0 -.byte 102,15,58,68,202,17 -.byte 102,15,58,68,222,0 - pxor %xmm0,%xmm3 - pxor %xmm1,%xmm3 - - movdqa %xmm3,%xmm4 - psrldq $8,%xmm3 - pslldq $8,%xmm4 - pxor %xmm3,%xmm1 - pxor %xmm4,%xmm0 - - movdqa %xmm0,%xmm4 - movdqa %xmm0,%xmm3 psllq $5,%xmm0 - pxor %xmm0,%xmm3 - psllq $1,%xmm0 pxor %xmm3,%xmm0 psllq $57,%xmm0 - movdqa %xmm0,%xmm3 + movdqa %xmm0,%xmm4 pslldq $8,%xmm0 - psrldq $8,%xmm3 - pxor %xmm4,%xmm0 - pxor %xmm3,%xmm1 + psrldq $8,%xmm4 + pxor %xmm3,%xmm0 + pxor %xmm4,%xmm1 movdqa %xmm0,%xmm4 - psrlq $1,%xmm0 - pxor %xmm4,%xmm1 - pxor %xmm0,%xmm4 psrlq $5,%xmm0 pxor %xmm4,%xmm0 psrlq $1,%xmm0 - pxor %xmm1,%xmm0 - movdqa %xmm0,%xmm5 - movdqa %xmm0,%xmm1 - pshufd $78,%xmm0,%xmm3 - pxor %xmm0,%xmm3 -.byte 102,15,58,68,194,0 -.byte 102,15,58,68,202,17 -.byte 102,15,58,68,222,0 - pxor %xmm0,%xmm3 - pxor %xmm1,%xmm3 - - movdqa %xmm3,%xmm4 - psrldq $8,%xmm3 - pslldq $8,%xmm4 - pxor %xmm3,%xmm1 - pxor %xmm4,%xmm0 - - movdqa %xmm0,%xmm4 - movdqa %xmm0,%xmm3 - psllq $5,%xmm0 - pxor %xmm0,%xmm3 - psllq $1,%xmm0 - pxor %xmm3,%xmm0 - psllq $57,%xmm0 - movdqa %xmm0,%xmm3 - pslldq $8,%xmm0 - psrldq $8,%xmm3 pxor %xmm4,%xmm0 - pxor %xmm3,%xmm1 - - - movdqa %xmm0,%xmm4 + pxor %xmm1,%xmm4 psrlq $1,%xmm0 - pxor %xmm4,%xmm1 - pxor %xmm0,%xmm4 - psrlq $5,%xmm0 pxor %xmm4,%xmm0 - psrlq $1,%xmm0 - pxor %xmm1,%xmm0 - pshufd $78,%xmm5,%xmm3 - pshufd $78,%xmm0,%xmm4 - pxor %xmm5,%xmm3 - movdqu %xmm5,48(%rdi) - pxor %xmm0,%xmm4 - movdqu %xmm0,64(%rdi) -.byte 102,15,58,15,227,8 - movdqu %xmm4,80(%rdi) + movdqu %xmm2,(%rdi) + movdqu %xmm0,16(%rdi) .byte 0xf3,0xc3 .globl _gcm_gmult_clmul .p2align 4 _gcm_gmult_clmul: -L$_gmult_clmul: movdqu (%rdi),%xmm0 movdqa L$bswap_mask(%rip),%xmm5 movdqu (%rsi),%xmm2 - movdqu 32(%rsi),%xmm4 .byte 102,15,56,0,197 movdqa %xmm0,%xmm1 pshufd $78,%xmm0,%xmm3 + pshufd $78,%xmm2,%xmm4 pxor %xmm0,%xmm3 + pxor %xmm2,%xmm4 .byte 102,15,58,68,194,0 .byte 102,15,58,68,202,17 .byte 102,15,58,68,220,0 @@ -874,358 +783,186 @@ L$_gmult_clmul: pxor %xmm3,%xmm1 pxor %xmm4,%xmm0 - movdqa %xmm0,%xmm4 movdqa %xmm0,%xmm3 - psllq $5,%xmm0 - pxor %xmm0,%xmm3 psllq $1,%xmm0 pxor %xmm3,%xmm0 + psllq $5,%xmm0 + pxor %xmm3,%xmm0 psllq $57,%xmm0 - movdqa %xmm0,%xmm3 + movdqa %xmm0,%xmm4 pslldq $8,%xmm0 - psrldq $8,%xmm3 - pxor %xmm4,%xmm0 - pxor %xmm3,%xmm1 + psrldq $8,%xmm4 + pxor %xmm3,%xmm0 + pxor %xmm4,%xmm1 movdqa %xmm0,%xmm4 - psrlq $1,%xmm0 - pxor %xmm4,%xmm1 - pxor %xmm0,%xmm4 psrlq $5,%xmm0 pxor %xmm4,%xmm0 psrlq $1,%xmm0 - pxor %xmm1,%xmm0 + pxor %xmm4,%xmm0 + pxor %xmm1,%xmm4 + psrlq $1,%xmm0 + pxor %xmm4,%xmm0 .byte 102,15,56,0,197 movdqu %xmm0,(%rdi) .byte 0xf3,0xc3 .globl _gcm_ghash_clmul -.p2align 5 +.p2align 4 _gcm_ghash_clmul: -L$_ghash_clmul: movdqa L$bswap_mask(%rip),%xmm5 - movq $11547335547999543296,%rax movdqu (%rdi),%xmm0 movdqu (%rsi),%xmm2 - movdqu 32(%rsi),%xmm10 .byte 102,15,56,0,197 subq $16,%rcx jz L$odd_tail - movdqu 16(%rsi),%xmm9 - cmpq $48,%rcx - jb L$skip4x + movdqu 16(%rsi),%xmm8 - subq $48,%rcx - movdqu 48(%rsi),%xmm14 - movdqu 64(%rsi),%xmm15 - movdqu 48(%rdx),%xmm6 - movdqu 32(%rdx),%xmm11 -.byte 102,15,56,0,245 -.byte 102,68,15,56,0,221 - movdqa %xmm6,%xmm8 - pshufd $78,%xmm6,%xmm7 - pxor %xmm6,%xmm7 -.byte 102,15,58,68,242,0 -.byte 102,68,15,58,68,194,17 -.byte 102,65,15,58,68,250,0 - - movdqa %xmm11,%xmm13 - pshufd $78,%xmm11,%xmm12 - pxor %xmm11,%xmm12 -.byte 102,69,15,58,68,217,0 -.byte 102,69,15,58,68,233,17 - xorps %xmm11,%xmm6 -.byte 102,69,15,58,68,226,16 - xorps %xmm13,%xmm8 - movups 80(%rsi),%xmm10 - xorps %xmm12,%xmm7 - - movdqu 16(%rdx),%xmm11 - movdqu 0(%rdx),%xmm3 -.byte 102,68,15,56,0,221 + movdqu (%rdx),%xmm3 + movdqu 16(%rdx),%xmm6 .byte 102,15,56,0,221 - movdqa %xmm11,%xmm13 - pshufd $78,%xmm11,%xmm12 - pxor %xmm3,%xmm0 - pxor %xmm11,%xmm12 -.byte 102,69,15,58,68,222,0 - movdqa %xmm0,%xmm1 - pshufd $78,%xmm0,%xmm3 - pxor %xmm0,%xmm3 -.byte 102,69,15,58,68,238,17 - xorps %xmm11,%xmm6 -.byte 102,69,15,58,68,226,0 - xorps %xmm13,%xmm8 - - leaq 64(%rdx),%rdx - subq $64,%rcx - jc L$tail4x - - jmp L$mod4_loop -.p2align 5 -L$mod4_loop: -.byte 102,65,15,58,68,199,0 - xorps %xmm12,%xmm7 - movdqu 48(%rdx),%xmm11 -.byte 102,68,15,56,0,221 -.byte 102,65,15,58,68,207,17 - xorps %xmm6,%xmm0 - movdqu 32(%rdx),%xmm6 - movdqa %xmm11,%xmm13 - pshufd $78,%xmm11,%xmm12 -.byte 102,65,15,58,68,218,16 - xorps %xmm8,%xmm1 - pxor %xmm11,%xmm12 .byte 102,15,56,0,245 - movups 32(%rsi),%xmm10 -.byte 102,68,15,58,68,218,0 - xorps %xmm7,%xmm3 - movdqa %xmm6,%xmm8 - pshufd $78,%xmm6,%xmm7 + pxor %xmm3,%xmm0 + movdqa %xmm6,%xmm7 + pshufd $78,%xmm6,%xmm3 + pshufd $78,%xmm2,%xmm4 + pxor %xmm6,%xmm3 + pxor %xmm2,%xmm4 +.byte 102,15,58,68,242,0 +.byte 102,15,58,68,250,17 +.byte 102,15,58,68,220,0 + pxor %xmm6,%xmm3 + pxor %xmm7,%xmm3 - pxor %xmm0,%xmm3 - pxor %xmm6,%xmm7 - pxor %xmm1,%xmm3 movdqa %xmm3,%xmm4 - pslldq $8,%xmm3 -.byte 102,68,15,58,68,234,17 - psrldq $8,%xmm4 - pxor %xmm3,%xmm0 - movdqa L$7_mask(%rip),%xmm3 - pxor %xmm4,%xmm1 -.byte 102,72,15,110,224 - - pand %xmm0,%xmm3 -.byte 102,15,56,0,227 -.byte 102,69,15,58,68,226,0 - pxor %xmm0,%xmm4 - psllq $57,%xmm4 - movdqa %xmm4,%xmm3 - pslldq $8,%xmm4 -.byte 102,65,15,58,68,241,0 psrldq $8,%xmm3 - pxor %xmm4,%xmm0 - pxor %xmm3,%xmm1 - movdqu 0(%rdx),%xmm3 - - movdqa %xmm0,%xmm4 - psrlq $1,%xmm0 -.byte 102,69,15,58,68,193,17 - xorps %xmm11,%xmm6 - movdqu 16(%rdx),%xmm11 -.byte 102,68,15,56,0,221 -.byte 102,65,15,58,68,250,16 - xorps %xmm13,%xmm8 - movups 80(%rsi),%xmm10 -.byte 102,15,56,0,221 - pxor %xmm4,%xmm1 - pxor %xmm0,%xmm4 - psrlq $5,%xmm0 - - movdqa %xmm11,%xmm13 - pxor %xmm12,%xmm7 - pshufd $78,%xmm11,%xmm12 - pxor %xmm11,%xmm12 -.byte 102,69,15,58,68,222,0 - pxor %xmm4,%xmm0 - pxor %xmm3,%xmm1 - psrlq $1,%xmm0 -.byte 102,69,15,58,68,238,17 - xorps %xmm11,%xmm6 - pxor %xmm1,%xmm0 - -.byte 102,69,15,58,68,226,0 - xorps %xmm13,%xmm8 - + pslldq $8,%xmm4 + pxor %xmm3,%xmm7 + pxor %xmm4,%xmm6 movdqa %xmm0,%xmm1 pshufd $78,%xmm0,%xmm3 + pshufd $78,%xmm8,%xmm4 pxor %xmm0,%xmm3 + pxor %xmm8,%xmm4 - leaq 64(%rdx),%rdx - subq $64,%rcx - jnc L$mod4_loop - -L$tail4x: -.byte 102,65,15,58,68,199,0 - xorps %xmm12,%xmm7 -.byte 102,65,15,58,68,207,17 - xorps %xmm6,%xmm0 -.byte 102,65,15,58,68,218,16 - xorps %xmm8,%xmm1 - pxor %xmm0,%xmm1 - pxor %xmm7,%xmm3 + leaq 32(%rdx),%rdx + subq $32,%rcx + jbe L$even_tail +L$mod_loop: +.byte 102,65,15,58,68,192,0 +.byte 102,65,15,58,68,200,17 +.byte 102,15,58,68,220,0 + pxor %xmm0,%xmm3 pxor %xmm1,%xmm3 - pxor %xmm0,%xmm1 movdqa %xmm3,%xmm4 psrldq $8,%xmm3 pslldq $8,%xmm4 pxor %xmm3,%xmm1 pxor %xmm4,%xmm0 - - movdqa %xmm0,%xmm4 - movdqa %xmm0,%xmm3 - psllq $5,%xmm0 - pxor %xmm0,%xmm3 - psllq $1,%xmm0 - pxor %xmm3,%xmm0 - psllq $57,%xmm0 - movdqa %xmm0,%xmm3 - pslldq $8,%xmm0 - psrldq $8,%xmm3 - pxor %xmm4,%xmm0 - pxor %xmm3,%xmm1 - - - movdqa %xmm0,%xmm4 - psrlq $1,%xmm0 - pxor %xmm4,%xmm1 - pxor %xmm0,%xmm4 - psrlq $5,%xmm0 - pxor %xmm4,%xmm0 - psrlq $1,%xmm0 - pxor %xmm1,%xmm0 - addq $64,%rcx - jz L$done - movdqu 32(%rsi),%xmm10 - subq $16,%rcx - jz L$odd_tail -L$skip4x: - - - - - movdqu (%rdx),%xmm3 + pxor %xmm6,%xmm0 + pxor %xmm7,%xmm1 + movdqu 16(%rdx),%xmm6 .byte 102,15,56,0,221 .byte 102,15,56,0,245 - pxor %xmm3,%xmm0 - - movdqa %xmm6,%xmm8 - pshufd $78,%xmm6,%xmm3 - pxor %xmm6,%xmm3 -.byte 102,15,58,68,242,0 -.byte 102,68,15,58,68,194,17 -.byte 102,65,15,58,68,218,0 - - leaq 32(%rdx),%rdx - subq $32,%rcx - jbe L$even_tail - jmp L$mod_loop - -.p2align 5 -L$mod_loop: - movdqa %xmm0,%xmm1 - pshufd $78,%xmm0,%xmm4 - pxor %xmm0,%xmm4 - -.byte 102,65,15,58,68,193,0 -.byte 102,65,15,58,68,201,17 -.byte 102,65,15,58,68,226,16 - - pxor %xmm6,%xmm0 - pxor %xmm8,%xmm1 - movdqu (%rdx),%xmm8 -.byte 102,68,15,56,0,197 - movdqu 16(%rdx),%xmm6 - pxor %xmm0,%xmm3 - pxor %xmm1,%xmm3 - pxor %xmm8,%xmm1 - pxor %xmm3,%xmm4 -.byte 102,15,56,0,245 - movdqa %xmm4,%xmm3 - psrldq $8,%xmm3 - pslldq $8,%xmm4 + movdqa %xmm6,%xmm7 + pshufd $78,%xmm6,%xmm9 + pshufd $78,%xmm2,%xmm10 + pxor %xmm6,%xmm9 + pxor %xmm2,%xmm10 pxor %xmm3,%xmm1 - pxor %xmm4,%xmm0 - movdqa %xmm6,%xmm8 - - movdqa %xmm0,%xmm4 movdqa %xmm0,%xmm3 - psllq $5,%xmm0 -.byte 102,15,58,68,242,0 - pxor %xmm0,%xmm3 psllq $1,%xmm0 pxor %xmm3,%xmm0 + psllq $5,%xmm0 + pxor %xmm3,%xmm0 +.byte 102,15,58,68,242,0 psllq $57,%xmm0 - movdqa %xmm0,%xmm3 + movdqa %xmm0,%xmm4 pslldq $8,%xmm0 - psrldq $8,%xmm3 - pxor %xmm4,%xmm0 - pxor %xmm3,%xmm1 - pshufd $78,%xmm8,%xmm3 - pxor %xmm8,%xmm3 + psrldq $8,%xmm4 + pxor %xmm3,%xmm0 + pxor %xmm4,%xmm1 -.byte 102,68,15,58,68,194,17 +.byte 102,15,58,68,250,17 movdqa %xmm0,%xmm4 - psrlq $1,%xmm0 - pxor %xmm4,%xmm1 - pxor %xmm0,%xmm4 psrlq $5,%xmm0 pxor %xmm4,%xmm0 psrlq $1,%xmm0 -.byte 102,65,15,58,68,218,0 - pxor %xmm1,%xmm0 + pxor %xmm4,%xmm0 + pxor %xmm1,%xmm4 + psrlq $1,%xmm0 + pxor %xmm4,%xmm0 + +.byte 102,69,15,58,68,202,0 + movdqa %xmm0,%xmm1 + pshufd $78,%xmm0,%xmm3 + pshufd $78,%xmm8,%xmm4 + pxor %xmm0,%xmm3 + pxor %xmm8,%xmm4 + + pxor %xmm6,%xmm9 + pxor %xmm7,%xmm9 + movdqa %xmm9,%xmm10 + psrldq $8,%xmm9 + pslldq $8,%xmm10 + pxor %xmm9,%xmm7 + pxor %xmm10,%xmm6 leaq 32(%rdx),%rdx subq $32,%rcx ja L$mod_loop L$even_tail: - movdqa %xmm0,%xmm1 - pshufd $78,%xmm0,%xmm4 - pxor %xmm0,%xmm4 - -.byte 102,65,15,58,68,193,0 -.byte 102,65,15,58,68,201,17 -.byte 102,65,15,58,68,226,16 - - pxor %xmm6,%xmm0 - pxor %xmm8,%xmm1 +.byte 102,65,15,58,68,192,0 +.byte 102,65,15,58,68,200,17 +.byte 102,15,58,68,220,0 pxor %xmm0,%xmm3 pxor %xmm1,%xmm3 - pxor %xmm3,%xmm4 - movdqa %xmm4,%xmm3 + + movdqa %xmm3,%xmm4 psrldq $8,%xmm3 pslldq $8,%xmm4 pxor %xmm3,%xmm1 pxor %xmm4,%xmm0 + pxor %xmm6,%xmm0 + pxor %xmm7,%xmm1 - movdqa %xmm0,%xmm4 movdqa %xmm0,%xmm3 - psllq $5,%xmm0 - pxor %xmm0,%xmm3 psllq $1,%xmm0 pxor %xmm3,%xmm0 + psllq $5,%xmm0 + pxor %xmm3,%xmm0 psllq $57,%xmm0 - movdqa %xmm0,%xmm3 + movdqa %xmm0,%xmm4 pslldq $8,%xmm0 - psrldq $8,%xmm3 - pxor %xmm4,%xmm0 - pxor %xmm3,%xmm1 + psrldq $8,%xmm4 + pxor %xmm3,%xmm0 + pxor %xmm4,%xmm1 movdqa %xmm0,%xmm4 - psrlq $1,%xmm0 - pxor %xmm4,%xmm1 - pxor %xmm0,%xmm4 psrlq $5,%xmm0 pxor %xmm4,%xmm0 psrlq $1,%xmm0 - pxor %xmm1,%xmm0 + pxor %xmm4,%xmm0 + pxor %xmm1,%xmm4 + psrlq $1,%xmm0 + pxor %xmm4,%xmm0 testq %rcx,%rcx jnz L$done @@ -1235,10 +972,12 @@ L$odd_tail: pxor %xmm3,%xmm0 movdqa %xmm0,%xmm1 pshufd $78,%xmm0,%xmm3 + pshufd $78,%xmm2,%xmm4 pxor %xmm0,%xmm3 + pxor %xmm2,%xmm4 .byte 102,15,58,68,194,0 .byte 102,15,58,68,202,17 -.byte 102,65,15,58,68,218,0 +.byte 102,15,58,68,220,0 pxor %xmm0,%xmm3 pxor %xmm1,%xmm3 @@ -1248,60 +987,38 @@ L$odd_tail: pxor %xmm3,%xmm1 pxor %xmm4,%xmm0 - movdqa %xmm0,%xmm4 movdqa %xmm0,%xmm3 - psllq $5,%xmm0 - pxor %xmm0,%xmm3 psllq $1,%xmm0 pxor %xmm3,%xmm0 + psllq $5,%xmm0 + pxor %xmm3,%xmm0 psllq $57,%xmm0 - movdqa %xmm0,%xmm3 + movdqa %xmm0,%xmm4 pslldq $8,%xmm0 - psrldq $8,%xmm3 - pxor %xmm4,%xmm0 - pxor %xmm3,%xmm1 + psrldq $8,%xmm4 + pxor %xmm3,%xmm0 + pxor %xmm4,%xmm1 movdqa %xmm0,%xmm4 - psrlq $1,%xmm0 - pxor %xmm4,%xmm1 - pxor %xmm0,%xmm4 psrlq $5,%xmm0 pxor %xmm4,%xmm0 psrlq $1,%xmm0 - pxor %xmm1,%xmm0 + pxor %xmm4,%xmm0 + pxor %xmm1,%xmm4 + psrlq $1,%xmm0 + pxor %xmm4,%xmm0 L$done: .byte 102,15,56,0,197 movdqu %xmm0,(%rdi) .byte 0xf3,0xc3 - -.globl _gcm_init_avx - -.p2align 5 -_gcm_init_avx: - jmp L$_init_clmul - -.globl _gcm_gmult_avx - -.p2align 5 -_gcm_gmult_avx: - jmp L$_gmult_clmul - -.globl _gcm_ghash_avx - -.p2align 5 -_gcm_ghash_avx: - jmp L$_ghash_clmul +L$SEH_end_gcm_ghash_clmul: .p2align 6 L$bswap_mask: .byte 15,14,13,12,11,10,9,8,7,6,5,4,3,2,1,0 L$0x1c2_polynomial: .byte 1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0xc2 -L$7_mask: -.long 7,0,7,0 -L$7_mask_poly: -.long 7,0,450,0 .p2align 6 L$rem_4bit: @@ -1347,4 +1064,3 @@ L$rem_8bit: .byte 71,72,65,83,72,32,102,111,114,32,120,56,54,95,54,52,44,32,67,82,89,80,84,79,71,65,77,83,32,98,121,32,60,97,112,112,114,111,64,111,112,101,110,115,115,108,46,111,114,103,62,0 .p2align 6 -.section .note.GNU-stack,"",%progbits diff --git a/lib/accelerated/x86/macosx/sha1-ssse3-x86.s b/lib/accelerated/x86/macosx/sha1-ssse3-x86.s index f03312e8c3..8e01010ce3 100644 --- a/lib/accelerated/x86/macosx/sha1-ssse3-x86.s +++ b/lib/accelerated/x86/macosx/sha1-ssse3-x86.s @@ -1416,4 +1416,3 @@ L000loop: .byte 89,80,84,79,71,65,77,83,32,98,121,32,60,97,112,112 .byte 114,111,64,111,112,101,110,115,115,108,46,111,114,103,62,0 -.section .note.GNU-stack,"",%progbits diff --git a/lib/accelerated/x86/macosx/sha1-ssse3-x86_64.s b/lib/accelerated/x86/macosx/sha1-ssse3-x86_64.s index 2982461347..d7f1e40b18 100644 --- a/lib/accelerated/x86/macosx/sha1-ssse3-x86_64.s +++ b/lib/accelerated/x86/macosx/sha1-ssse3-x86_64.s @@ -46,7 +46,6 @@ _sha1_block_data_order: movl __gnutls_x86_cpuid_s+0(%rip),%r9d movl __gnutls_x86_cpuid_s+4(%rip),%r8d - movl __gnutls_x86_cpuid_s+8(%rip),%r10d testl $512,%r8d jz L$ialu jmp _ssse3_shortcut @@ -1342,7 +1341,7 @@ _ssse3_shortcut: shlq $6,%r10 addq %r9,%r10 - leaq K_XX_XX+64(%rip),%r11 + leaq K_XX_XX(%rip),%r11 movl 0(%r8),%eax movl 4(%r8),%ebx @@ -1350,12 +1349,9 @@ _ssse3_shortcut: movl 12(%r8),%edx movl %ebx,%esi movl 16(%r8),%ebp - movl %ecx,%edi - xorl %edx,%edi - andl %edi,%esi movdqa 64(%r11),%xmm6 - movdqa -64(%r11),%xmm9 + movdqa 0(%r11),%xmm9 movdqu 0(%r9),%xmm0 movdqu 16(%r9),%xmm1 movdqu 32(%r9),%xmm2 @@ -1378,881 +1374,903 @@ _ssse3_shortcut: .p2align 4 L$oop_ssse3: movdqa %xmm1,%xmm4 - rorl $2,%ebx - xorl %edx,%esi + addl 0(%rsp),%ebp + xorl %edx,%ecx movdqa %xmm3,%xmm8 .byte 102,15,58,15,224,8 movl %eax,%edi - addl 0(%rsp),%ebp - paddd %xmm3,%xmm9 - xorl %ecx,%ebx roll $5,%eax + paddd %xmm3,%xmm9 + andl %ecx,%esi + xorl %edx,%ecx psrldq $4,%xmm8 - addl %esi,%ebp - andl %ebx,%edi - pxor %xmm0,%xmm4 - xorl %ecx,%ebx + xorl %edx,%esi addl %eax,%ebp + pxor %xmm0,%xmm4 + rorl $2,%ebx + addl %esi,%ebp pxor %xmm2,%xmm8 - rorl $7,%eax - xorl %ecx,%edi - movl %ebp,%esi addl 4(%rsp),%edx - pxor %xmm8,%xmm4 - xorl %ebx,%eax + xorl %ecx,%ebx + movl %ebp,%esi roll $5,%ebp + pxor %xmm8,%xmm4 + andl %ebx,%edi + xorl %ecx,%ebx movdqa %xmm9,48(%rsp) - addl %edi,%edx - andl %eax,%esi + xorl %ecx,%edi + addl %ebp,%edx movdqa %xmm4,%xmm10 movdqa %xmm4,%xmm8 + rorl $7,%eax + addl %edi,%edx + addl 8(%rsp),%ecx xorl %ebx,%eax - addl %ebp,%edx - rorl $7,%ebp - xorl %ebx,%esi pslldq $12,%xmm10 paddd %xmm4,%xmm4 movl %edx,%edi - addl 8(%rsp),%ecx - xorl %eax,%ebp roll $5,%edx + andl %eax,%esi + xorl %ebx,%eax psrld $31,%xmm8 - addl %esi,%ecx - andl %ebp,%edi - movdqa %xmm10,%xmm9 - xorl %eax,%ebp + xorl %ebx,%esi addl %edx,%ecx + movdqa %xmm10,%xmm9 + rorl $7,%ebp + addl %esi,%ecx psrld $30,%xmm10 por %xmm8,%xmm4 - rorl $7,%edx - xorl %eax,%edi - movl %ecx,%esi addl 12(%rsp),%ebx + xorl %eax,%ebp + movl %ecx,%esi + roll $5,%ecx pslld $2,%xmm9 pxor %xmm10,%xmm4 - xorl %ebp,%edx - roll $5,%ecx - movdqa -64(%r11),%xmm10 - addl %edi,%ebx - andl %edx,%esi - pxor %xmm9,%xmm4 - xorl %ebp,%edx + andl %ebp,%edi + xorl %eax,%ebp + movdqa 0(%r11),%xmm10 + xorl %eax,%edi addl %ecx,%ebx + pxor %xmm9,%xmm4 + rorl $7,%edx + addl %edi,%ebx movdqa %xmm2,%xmm5 - rorl $7,%ecx - xorl %ebp,%esi + addl 16(%rsp),%eax + xorl %ebp,%edx movdqa %xmm4,%xmm9 .byte 102,15,58,15,233,8 movl %ebx,%edi - addl 16(%rsp),%eax - paddd %xmm4,%xmm10 - xorl %edx,%ecx roll $5,%ebx + paddd %xmm4,%xmm10 + andl %edx,%esi + xorl %ebp,%edx psrldq $4,%xmm9 - addl %esi,%eax - andl %ecx,%edi - pxor %xmm1,%xmm5 - xorl %edx,%ecx + xorl %ebp,%esi addl %ebx,%eax + pxor %xmm1,%xmm5 + rorl $7,%ecx + addl %esi,%eax pxor %xmm3,%xmm9 - rorl $7,%ebx - xorl %edx,%edi - movl %eax,%esi addl 20(%rsp),%ebp - pxor %xmm9,%xmm5 - xorl %ecx,%ebx + xorl %edx,%ecx + movl %eax,%esi roll $5,%eax + pxor %xmm9,%xmm5 + andl %ecx,%edi + xorl %edx,%ecx movdqa %xmm10,0(%rsp) - addl %edi,%ebp - andl %ebx,%esi + xorl %edx,%edi + addl %eax,%ebp movdqa %xmm5,%xmm8 movdqa %xmm5,%xmm9 + rorl $7,%ebx + addl %edi,%ebp + addl 24(%rsp),%edx xorl %ecx,%ebx - addl %eax,%ebp - rorl $7,%eax - xorl %ecx,%esi pslldq $12,%xmm8 paddd %xmm5,%xmm5 movl %ebp,%edi - addl 24(%rsp),%edx - xorl %ebx,%eax roll $5,%ebp + andl %ebx,%esi + xorl %ecx,%ebx psrld $31,%xmm9 - addl %esi,%edx - andl %eax,%edi - movdqa %xmm8,%xmm10 - xorl %ebx,%eax + xorl %ecx,%esi addl %ebp,%edx + movdqa %xmm8,%xmm10 + rorl $7,%eax + addl %esi,%edx psrld $30,%xmm8 por %xmm9,%xmm5 - rorl $7,%ebp - xorl %ebx,%edi - movl %edx,%esi addl 28(%rsp),%ecx + xorl %ebx,%eax + movl %edx,%esi + roll $5,%edx pslld $2,%xmm10 pxor %xmm8,%xmm5 - xorl %eax,%ebp - roll $5,%edx - movdqa -32(%r11),%xmm8 - addl %edi,%ecx - andl %ebp,%esi - pxor %xmm10,%xmm5 - xorl %eax,%ebp + andl %eax,%edi + xorl %ebx,%eax + movdqa 16(%r11),%xmm8 + xorl %ebx,%edi addl %edx,%ecx + pxor %xmm10,%xmm5 + rorl $7,%ebp + addl %edi,%ecx movdqa %xmm3,%xmm6 - rorl $7,%edx - xorl %eax,%esi + addl 32(%rsp),%ebx + xorl %eax,%ebp movdqa %xmm5,%xmm10 .byte 102,15,58,15,242,8 movl %ecx,%edi - addl 32(%rsp),%ebx - paddd %xmm5,%xmm8 - xorl %ebp,%edx roll $5,%ecx + paddd %xmm5,%xmm8 + andl %ebp,%esi + xorl %eax,%ebp psrldq $4,%xmm10 - addl %esi,%ebx - andl %edx,%edi - pxor %xmm2,%xmm6 - xorl %ebp,%edx + xorl %eax,%esi addl %ecx,%ebx + pxor %xmm2,%xmm6 + rorl $7,%edx + addl %esi,%ebx pxor %xmm4,%xmm10 - rorl $7,%ecx - xorl %ebp,%edi - movl %ebx,%esi addl 36(%rsp),%eax - pxor %xmm10,%xmm6 - xorl %edx,%ecx + xorl %ebp,%edx + movl %ebx,%esi roll $5,%ebx + pxor %xmm10,%xmm6 + andl %edx,%edi + xorl %ebp,%edx movdqa %xmm8,16(%rsp) - addl %edi,%eax - andl %ecx,%esi + xorl %ebp,%edi + addl %ebx,%eax movdqa %xmm6,%xmm9 movdqa %xmm6,%xmm10 + rorl $7,%ecx + addl %edi,%eax + addl 40(%rsp),%ebp xorl %edx,%ecx - addl %ebx,%eax - rorl $7,%ebx - xorl %edx,%esi pslldq $12,%xmm9 paddd %xmm6,%xmm6 movl %eax,%edi - addl 40(%rsp),%ebp - xorl %ecx,%ebx roll $5,%eax + andl %ecx,%esi + xorl %edx,%ecx psrld $31,%xmm10 - addl %esi,%ebp - andl %ebx,%edi - movdqa %xmm9,%xmm8 - xorl %ecx,%ebx + xorl %edx,%esi addl %eax,%ebp + movdqa %xmm9,%xmm8 + rorl $7,%ebx + addl %esi,%ebp psrld $30,%xmm9 por %xmm10,%xmm6 - rorl $7,%eax - xorl %ecx,%edi - movl %ebp,%esi addl 44(%rsp),%edx + xorl %ecx,%ebx + movl %ebp,%esi + roll $5,%ebp pslld $2,%xmm8 pxor %xmm9,%xmm6 - xorl %ebx,%eax - roll $5,%ebp - movdqa -32(%r11),%xmm9 - addl %edi,%edx - andl %eax,%esi - pxor %xmm8,%xmm6 - xorl %ebx,%eax + andl %ebx,%edi + xorl %ecx,%ebx + movdqa 16(%r11),%xmm9 + xorl %ecx,%edi addl %ebp,%edx + pxor %xmm8,%xmm6 + rorl $7,%eax + addl %edi,%edx movdqa %xmm4,%xmm7 - rorl $7,%ebp - xorl %ebx,%esi + addl 48(%rsp),%ecx + xorl %ebx,%eax movdqa %xmm6,%xmm8 .byte 102,15,58,15,251,8 movl %edx,%edi - addl 48(%rsp),%ecx - paddd %xmm6,%xmm9 - xorl %eax,%ebp roll $5,%edx + paddd %xmm6,%xmm9 + andl %eax,%esi + xorl %ebx,%eax psrldq $4,%xmm8 - addl %esi,%ecx - andl %ebp,%edi - pxor %xmm3,%xmm7 - xorl %eax,%ebp + xorl %ebx,%esi addl %edx,%ecx + pxor %xmm3,%xmm7 + rorl $7,%ebp + addl %esi,%ecx pxor %xmm5,%xmm8 - rorl $7,%edx - xorl %eax,%edi - movl %ecx,%esi addl 52(%rsp),%ebx - pxor %xmm8,%xmm7 - xorl %ebp,%edx + xorl %eax,%ebp + movl %ecx,%esi roll $5,%ecx + pxor %xmm8,%xmm7 + andl %ebp,%edi + xorl %eax,%ebp movdqa %xmm9,32(%rsp) - addl %edi,%ebx - andl %edx,%esi + xorl %eax,%edi + addl %ecx,%ebx movdqa %xmm7,%xmm10 movdqa %xmm7,%xmm8 + rorl $7,%edx + addl %edi,%ebx + addl 56(%rsp),%eax xorl %ebp,%edx - addl %ecx,%ebx - rorl $7,%ecx - xorl %ebp,%esi pslldq $12,%xmm10 paddd %xmm7,%xmm7 movl %ebx,%edi - addl 56(%rsp),%eax - xorl %edx,%ecx roll $5,%ebx + andl %edx,%esi + xorl %ebp,%edx psrld $31,%xmm8 - addl %esi,%eax - andl %ecx,%edi - movdqa %xmm10,%xmm9 - xorl %edx,%ecx + xorl %ebp,%esi addl %ebx,%eax + movdqa %xmm10,%xmm9 + rorl $7,%ecx + addl %esi,%eax psrld $30,%xmm10 por %xmm8,%xmm7 - rorl $7,%ebx - xorl %edx,%edi - movl %eax,%esi addl 60(%rsp),%ebp - pslld $2,%xmm9 - pxor %xmm10,%xmm7 - xorl %ecx,%ebx - roll $5,%eax - movdqa -32(%r11),%xmm10 - addl %edi,%ebp - andl %ebx,%esi - pxor %xmm9,%xmm7 - xorl %ecx,%ebx + xorl %edx,%ecx + movl %eax,%esi + roll $5,%eax + pslld $2,%xmm9 + pxor %xmm10,%xmm7 + andl %ecx,%edi + xorl %edx,%ecx + movdqa 16(%r11),%xmm10 + xorl %edx,%edi addl %eax,%ebp + pxor %xmm9,%xmm7 + rorl $7,%ebx + addl %edi,%ebp movdqa %xmm7,%xmm9 - rorl $7,%eax + addl 0(%rsp),%edx pxor %xmm4,%xmm0 .byte 102,68,15,58,15,206,8 - xorl %ecx,%esi + xorl %ecx,%ebx movl %ebp,%edi - addl 0(%rsp),%edx - pxor %xmm1,%xmm0 - xorl %ebx,%eax roll $5,%ebp + pxor %xmm1,%xmm0 + andl %ebx,%esi + xorl %ecx,%ebx movdqa %xmm10,%xmm8 paddd %xmm7,%xmm10 - addl %esi,%edx - andl %eax,%edi + xorl %ecx,%esi + addl %ebp,%edx pxor %xmm9,%xmm0 + rorl $7,%eax + addl %esi,%edx + addl 4(%rsp),%ecx xorl %ebx,%eax - addl %ebp,%edx - rorl $7,%ebp - xorl %ebx,%edi movdqa %xmm0,%xmm9 movdqa %xmm10,48(%rsp) movl %edx,%esi - addl 4(%rsp),%ecx - xorl %eax,%ebp roll $5,%edx + andl %eax,%edi + xorl %ebx,%eax pslld $2,%xmm0 - addl %edi,%ecx - andl %ebp,%esi + xorl %ebx,%edi + addl %edx,%ecx psrld $30,%xmm9 + rorl $7,%ebp + addl %edi,%ecx + addl 8(%rsp),%ebx xorl %eax,%ebp - addl %edx,%ecx - rorl $7,%edx - xorl %eax,%esi movl %ecx,%edi - addl 8(%rsp),%ebx - por %xmm9,%xmm0 - xorl %ebp,%edx roll $5,%ecx + por %xmm9,%xmm0 + andl %ebp,%esi + xorl %eax,%ebp movdqa %xmm0,%xmm10 - addl %esi,%ebx - andl %edx,%edi - xorl %ebp,%edx + xorl %eax,%esi addl %ecx,%ebx + rorl $7,%edx + addl %esi,%ebx addl 12(%rsp),%eax - xorl %ebp,%edi + xorl %ebp,%edx movl %ebx,%esi roll $5,%ebx - addl %edi,%eax - xorl %edx,%esi - rorl $7,%ecx + andl %edx,%edi + xorl %ebp,%edx + xorl %ebp,%edi addl %ebx,%eax + rorl $7,%ecx + addl %edi,%eax addl 16(%rsp),%ebp pxor %xmm5,%xmm1 .byte 102,68,15,58,15,215,8 - xorl %ecx,%esi + xorl %edx,%esi movl %eax,%edi roll $5,%eax pxor %xmm2,%xmm1 - addl %esi,%ebp - xorl %ecx,%edi + xorl %ecx,%esi + addl %eax,%ebp movdqa %xmm8,%xmm9 paddd %xmm0,%xmm8 rorl $7,%ebx - addl %eax,%ebp + addl %esi,%ebp pxor %xmm10,%xmm1 addl 20(%rsp),%edx - xorl %ebx,%edi + xorl %ecx,%edi movl %ebp,%esi roll $5,%ebp movdqa %xmm1,%xmm10 movdqa %xmm8,0(%rsp) - addl %edi,%edx - xorl %ebx,%esi - rorl $7,%eax + xorl %ebx,%edi addl %ebp,%edx + rorl $7,%eax + addl %edi,%edx pslld $2,%xmm1 addl 24(%rsp),%ecx - xorl %eax,%esi + xorl %ebx,%esi psrld $30,%xmm10 movl %edx,%edi roll $5,%edx - addl %esi,%ecx - xorl %eax,%edi - rorl $7,%ebp + xorl %eax,%esi addl %edx,%ecx + rorl $7,%ebp + addl %esi,%ecx por %xmm10,%xmm1 addl 28(%rsp),%ebx - xorl %ebp,%edi + xorl %eax,%edi movdqa %xmm1,%xmm8 movl %ecx,%esi roll $5,%ecx - addl %edi,%ebx - xorl %ebp,%esi - rorl $7,%edx + xorl %ebp,%edi addl %ecx,%ebx + rorl $7,%edx + addl %edi,%ebx addl 32(%rsp),%eax pxor %xmm6,%xmm2 .byte 102,68,15,58,15,192,8 - xorl %edx,%esi + xorl %ebp,%esi movl %ebx,%edi roll $5,%ebx pxor %xmm3,%xmm2 - addl %esi,%eax - xorl %edx,%edi - movdqa 0(%r11),%xmm10 + xorl %edx,%esi + addl %ebx,%eax + movdqa 32(%r11),%xmm10 paddd %xmm1,%xmm9 rorl $7,%ecx - addl %ebx,%eax + addl %esi,%eax pxor %xmm8,%xmm2 addl 36(%rsp),%ebp - xorl %ecx,%edi + xorl %edx,%edi movl %eax,%esi roll $5,%eax movdqa %xmm2,%xmm8 movdqa %xmm9,16(%rsp) - addl %edi,%ebp - xorl %ecx,%esi - rorl $7,%ebx + xorl %ecx,%edi addl %eax,%ebp + rorl $7,%ebx + addl %edi,%ebp pslld $2,%xmm2 addl 40(%rsp),%edx - xorl %ebx,%esi + xorl %ecx,%esi psrld $30,%xmm8 movl %ebp,%edi roll $5,%ebp - addl %esi,%edx - xorl %ebx,%edi - rorl $7,%eax + xorl %ebx,%esi addl %ebp,%edx + rorl $7,%eax + addl %esi,%edx por %xmm8,%xmm2 addl 44(%rsp),%ecx - xorl %eax,%edi + xorl %ebx,%edi movdqa %xmm2,%xmm9 movl %edx,%esi roll $5,%edx - addl %edi,%ecx - xorl %eax,%esi - rorl $7,%ebp + xorl %eax,%edi addl %edx,%ecx + rorl $7,%ebp + addl %edi,%ecx addl 48(%rsp),%ebx pxor %xmm7,%xmm3 .byte 102,68,15,58,15,201,8 - xorl %ebp,%esi + xorl %eax,%esi movl %ecx,%edi roll $5,%ecx pxor %xmm4,%xmm3 - addl %esi,%ebx - xorl %ebp,%edi + xorl %ebp,%esi + addl %ecx,%ebx movdqa %xmm10,%xmm8 paddd %xmm2,%xmm10 rorl $7,%edx - addl %ecx,%ebx + addl %esi,%ebx pxor %xmm9,%xmm3 addl 52(%rsp),%eax - xorl %edx,%edi + xorl %ebp,%edi movl %ebx,%esi roll $5,%ebx movdqa %xmm3,%xmm9 movdqa %xmm10,32(%rsp) - addl %edi,%eax - xorl %edx,%esi - rorl $7,%ecx + xorl %edx,%edi addl %ebx,%eax + rorl $7,%ecx + addl %edi,%eax pslld $2,%xmm3 addl 56(%rsp),%ebp - xorl %ecx,%esi + xorl %edx,%esi psrld $30,%xmm9 movl %eax,%edi roll $5,%eax - addl %esi,%ebp - xorl %ecx,%edi - rorl $7,%ebx + xorl %ecx,%esi addl %eax,%ebp + rorl $7,%ebx + addl %esi,%ebp por %xmm9,%xmm3 addl 60(%rsp),%edx - xorl %ebx,%edi + xorl %ecx,%edi movdqa %xmm3,%xmm10 movl %ebp,%esi roll $5,%ebp - addl %edi,%edx - xorl %ebx,%esi - rorl $7,%eax + xorl %ebx,%edi addl %ebp,%edx + rorl $7,%eax + addl %edi,%edx addl 0(%rsp),%ecx pxor %xmm0,%xmm4 .byte 102,68,15,58,15,210,8 - xorl %eax,%esi + xorl %ebx,%esi movl %edx,%edi roll $5,%edx pxor %xmm5,%xmm4 - addl %esi,%ecx - xorl %eax,%edi + xorl %eax,%esi + addl %edx,%ecx movdqa %xmm8,%xmm9 paddd %xmm3,%xmm8 rorl $7,%ebp - addl %edx,%ecx + addl %esi,%ecx pxor %xmm10,%xmm4 addl 4(%rsp),%ebx - xorl %ebp,%edi + xorl %eax,%edi movl %ecx,%esi roll $5,%ecx movdqa %xmm4,%xmm10 movdqa %xmm8,48(%rsp) - addl %edi,%ebx - xorl %ebp,%esi - rorl $7,%edx + xorl %ebp,%edi addl %ecx,%ebx + rorl $7,%edx + addl %edi,%ebx pslld $2,%xmm4 addl 8(%rsp),%eax - xorl %edx,%esi + xorl %ebp,%esi psrld $30,%xmm10 movl %ebx,%edi roll $5,%ebx - addl %esi,%eax - xorl %edx,%edi - rorl $7,%ecx + xorl %edx,%esi addl %ebx,%eax + rorl $7,%ecx + addl %esi,%eax por %xmm10,%xmm4 addl 12(%rsp),%ebp - xorl %ecx,%edi + xorl %edx,%edi movdqa %xmm4,%xmm8 movl %eax,%esi roll $5,%eax - addl %edi,%ebp - xorl %ecx,%esi - rorl $7,%ebx + xorl %ecx,%edi addl %eax,%ebp + rorl $7,%ebx + addl %edi,%ebp addl 16(%rsp),%edx pxor %xmm1,%xmm5 .byte 102,68,15,58,15,195,8 - xorl %ebx,%esi + xorl %ecx,%esi movl %ebp,%edi roll $5,%ebp pxor %xmm6,%xmm5 - addl %esi,%edx - xorl %ebx,%edi + xorl %ebx,%esi + addl %ebp,%edx movdqa %xmm9,%xmm10 paddd %xmm4,%xmm9 rorl $7,%eax - addl %ebp,%edx + addl %esi,%edx pxor %xmm8,%xmm5 addl 20(%rsp),%ecx - xorl %eax,%edi + xorl %ebx,%edi movl %edx,%esi roll $5,%edx movdqa %xmm5,%xmm8 movdqa %xmm9,0(%rsp) - addl %edi,%ecx - xorl %eax,%esi - rorl $7,%ebp + xorl %eax,%edi addl %edx,%ecx + rorl $7,%ebp + addl %edi,%ecx pslld $2,%xmm5 addl 24(%rsp),%ebx - xorl %ebp,%esi + xorl %eax,%esi psrld $30,%xmm8 movl %ecx,%edi roll $5,%ecx - addl %esi,%ebx - xorl %ebp,%edi - rorl $7,%edx + xorl %ebp,%esi addl %ecx,%ebx + rorl $7,%edx + addl %esi,%ebx por %xmm8,%xmm5 addl 28(%rsp),%eax + xorl %ebp,%edi movdqa %xmm5,%xmm9 - rorl $7,%ecx movl %ebx,%esi - xorl %edx,%edi roll $5,%ebx - addl %edi,%eax - xorl %ecx,%esi - xorl %edx,%ecx + xorl %edx,%edi addl %ebx,%eax - addl 32(%rsp),%ebp + rorl $7,%ecx + addl %edi,%eax + movl %ecx,%edi pxor %xmm2,%xmm6 .byte 102,68,15,58,15,204,8 - andl %ecx,%esi xorl %edx,%ecx - rorl $7,%ebx + addl 32(%rsp),%ebp + andl %edx,%edi pxor %xmm7,%xmm6 - movl %eax,%edi - xorl %ecx,%esi + andl %ecx,%esi + rorl $7,%ebx movdqa %xmm10,%xmm8 paddd %xmm5,%xmm10 + addl %edi,%ebp + movl %eax,%edi + pxor %xmm9,%xmm6 roll $5,%eax addl %esi,%ebp - pxor %xmm9,%xmm6 - xorl %ebx,%edi - xorl %ecx,%ebx + xorl %edx,%ecx addl %eax,%ebp - addl 36(%rsp),%edx movdqa %xmm6,%xmm9 movdqa %xmm10,16(%rsp) - andl %ebx,%edi + movl %ebx,%esi xorl %ecx,%ebx - rorl $7,%eax - movl %ebp,%esi + addl 36(%rsp),%edx + andl %ecx,%esi pslld $2,%xmm6 - xorl %ebx,%edi + andl %ebx,%edi + rorl $7,%eax + psrld $30,%xmm9 + addl %esi,%edx + movl %ebp,%esi roll $5,%ebp - psrld $30,%xmm9 addl %edi,%edx - xorl %eax,%esi - xorl %ebx,%eax + xorl %ecx,%ebx addl %ebp,%edx - addl 40(%rsp),%ecx - andl %eax,%esi por %xmm9,%xmm6 + movl %eax,%edi xorl %ebx,%eax - rorl $7,%ebp movdqa %xmm6,%xmm10 + addl 40(%rsp),%ecx + andl %ebx,%edi + andl %eax,%esi + rorl $7,%ebp + addl %edi,%ecx movl %edx,%edi - xorl %eax,%esi roll $5,%edx addl %esi,%ecx - xorl %ebp,%edi - xorl %eax,%ebp + xorl %ebx,%eax addl %edx,%ecx + movl %ebp,%esi + xorl %eax,%ebp addl 44(%rsp),%ebx + andl %eax,%esi andl %ebp,%edi - xorl %eax,%ebp rorl $7,%edx + addl %esi,%ebx movl %ecx,%esi - xorl %ebp,%edi roll $5,%ecx addl %edi,%ebx - xorl %edx,%esi - xorl %ebp,%edx + xorl %eax,%ebp addl %ecx,%ebx - addl 48(%rsp),%eax + movl %edx,%edi pxor %xmm3,%xmm7 .byte 102,68,15,58,15,213,8 - andl %edx,%esi xorl %ebp,%edx - rorl $7,%ecx + addl 48(%rsp),%eax + andl %ebp,%edi pxor %xmm0,%xmm7 - movl %ebx,%edi - xorl %edx,%esi - movdqa 32(%r11),%xmm9 + andl %edx,%esi + rorl $7,%ecx + movdqa 48(%r11),%xmm9 paddd %xmm6,%xmm8 + addl %edi,%eax + movl %ebx,%edi + pxor %xmm10,%xmm7 roll $5,%ebx addl %esi,%eax - pxor %xmm10,%xmm7 - xorl %ecx,%edi - xorl %edx,%ecx + xorl %ebp,%edx addl %ebx,%eax - addl 52(%rsp),%ebp movdqa %xmm7,%xmm10 movdqa %xmm8,32(%rsp) - andl %ecx,%edi + movl %ecx,%esi xorl %edx,%ecx + addl 52(%rsp),%ebp + andl %edx,%esi + pslld $2,%xmm7 + andl %ecx,%edi rorl $7,%ebx + psrld $30,%xmm10 + addl %esi,%ebp movl %eax,%esi - pslld $2,%xmm7 - xorl %ecx,%edi roll $5,%eax - psrld $30,%xmm10 addl %edi,%ebp - xorl %ebx,%esi - xorl %ecx,%ebx + xorl %edx,%ecx addl %eax,%ebp - addl 56(%rsp),%edx - andl %ebx,%esi por %xmm10,%xmm7 + movl %ebx,%edi xorl %ecx,%ebx - rorl $7,%eax movdqa %xmm7,%xmm8 + addl 56(%rsp),%edx + andl %ecx,%edi + andl %ebx,%esi + rorl $7,%eax + addl %edi,%edx movl %ebp,%edi - xorl %ebx,%esi roll $5,%ebp addl %esi,%edx - xorl %eax,%edi - xorl %ebx,%eax + xorl %ecx,%ebx addl %ebp,%edx + movl %eax,%esi + xorl %ebx,%eax addl 60(%rsp),%ecx + andl %ebx,%esi andl %eax,%edi - xorl %ebx,%eax rorl $7,%ebp + addl %esi,%ecx movl %edx,%esi - xorl %eax,%edi roll $5,%edx addl %edi,%ecx - xorl %ebp,%esi - xorl %eax,%ebp + xorl %ebx,%eax addl %edx,%ecx - addl 0(%rsp),%ebx + movl %ebp,%edi pxor %xmm4,%xmm0 .byte 102,68,15,58,15,198,8 - andl %ebp,%esi xorl %eax,%ebp - rorl $7,%edx + addl 0(%rsp),%ebx + andl %eax,%edi pxor %xmm1,%xmm0 - movl %ecx,%edi - xorl %ebp,%esi + andl %ebp,%esi + rorl $7,%edx movdqa %xmm9,%xmm10 paddd %xmm7,%xmm9 + addl %edi,%ebx + movl %ecx,%edi + pxor %xmm8,%xmm0 roll $5,%ecx addl %esi,%ebx - pxor %xmm8,%xmm0 - xorl %edx,%edi - xorl %ebp,%edx + xorl %eax,%ebp addl %ecx,%ebx - addl 4(%rsp),%eax movdqa %xmm0,%xmm8 movdqa %xmm9,48(%rsp) - andl %edx,%edi + movl %edx,%esi xorl %ebp,%edx + addl 4(%rsp),%eax + andl %ebp,%esi + pslld $2,%xmm0 + andl %edx,%edi rorl $7,%ecx + psrld $30,%xmm8 + addl %esi,%eax movl %ebx,%esi - pslld $2,%xmm0 - xorl %edx,%edi roll $5,%ebx - psrld $30,%xmm8 addl %edi,%eax - xorl %ecx,%esi - xorl %edx,%ecx + xorl %ebp,%edx addl %ebx,%eax - addl 8(%rsp),%ebp - andl %ecx,%esi por %xmm8,%xmm0 + movl %ecx,%edi xorl %edx,%ecx - rorl $7,%ebx movdqa %xmm0,%xmm9 + addl 8(%rsp),%ebp + andl %edx,%edi + andl %ecx,%esi + rorl $7,%ebx + addl %edi,%ebp movl %eax,%edi - xorl %ecx,%esi roll $5,%eax addl %esi,%ebp - xorl %ebx,%edi - xorl %ecx,%ebx + xorl %edx,%ecx addl %eax,%ebp + movl %ebx,%esi + xorl %ecx,%ebx addl 12(%rsp),%edx + andl %ecx,%esi andl %ebx,%edi - xorl %ecx,%ebx rorl $7,%eax + addl %esi,%edx movl %ebp,%esi - xorl %ebx,%edi roll $5,%ebp addl %edi,%edx - xorl %eax,%esi - xorl %ebx,%eax + xorl %ecx,%ebx addl %ebp,%edx - addl 16(%rsp),%ecx + movl %eax,%edi pxor %xmm5,%xmm1 .byte 102,68,15,58,15,207,8 - andl %eax,%esi xorl %ebx,%eax - rorl $7,%ebp + addl 16(%rsp),%ecx + andl %ebx,%edi pxor %xmm2,%xmm1 - movl %edx,%edi - xorl %eax,%esi + andl %eax,%esi + rorl $7,%ebp movdqa %xmm10,%xmm8 paddd %xmm0,%xmm10 + addl %edi,%ecx + movl %edx,%edi + pxor %xmm9,%xmm1 roll $5,%edx addl %esi,%ecx - pxor %xmm9,%xmm1 - xorl %ebp,%edi - xorl %eax,%ebp + xorl %ebx,%eax addl %edx,%ecx - addl 20(%rsp),%ebx movdqa %xmm1,%xmm9 movdqa %xmm10,0(%rsp) - andl %ebp,%edi + movl %ebp,%esi xorl %eax,%ebp + addl 20(%rsp),%ebx + andl %eax,%esi + pslld $2,%xmm1 + andl %ebp,%edi rorl $7,%edx + psrld $30,%xmm9 + addl %esi,%ebx movl %ecx,%esi - pslld $2,%xmm1 - xorl %ebp,%edi roll $5,%ecx - psrld $30,%xmm9 addl %edi,%ebx - xorl %edx,%esi - xorl %ebp,%edx + xorl %eax,%ebp addl %ecx,%ebx - addl 24(%rsp),%eax - andl %edx,%esi por %xmm9,%xmm1 + movl %edx,%edi xorl %ebp,%edx - rorl $7,%ecx movdqa %xmm1,%xmm10 + addl 24(%rsp),%eax + andl %ebp,%edi + andl %edx,%esi + rorl $7,%ecx + addl %edi,%eax movl %ebx,%edi - xorl %edx,%esi roll $5,%ebx addl %esi,%eax - xorl %ecx,%edi - xorl %edx,%ecx + xorl %ebp,%edx addl %ebx,%eax + movl %ecx,%esi + xorl %edx,%ecx addl 28(%rsp),%ebp + andl %edx,%esi andl %ecx,%edi - xorl %edx,%ecx rorl $7,%ebx + addl %esi,%ebp movl %eax,%esi - xorl %ecx,%edi roll $5,%eax addl %edi,%ebp - xorl %ebx,%esi - xorl %ecx,%ebx + xorl %edx,%ecx addl %eax,%ebp - addl 32(%rsp),%edx + movl %ebx,%edi pxor %xmm6,%xmm2 .byte 102,68,15,58,15,208,8 - andl %ebx,%esi xorl %ecx,%ebx - rorl $7,%eax + addl 32(%rsp),%edx + andl %ecx,%edi pxor %xmm3,%xmm2 - movl %ebp,%edi - xorl %ebx,%esi + andl %ebx,%esi + rorl $7,%eax movdqa %xmm8,%xmm9 paddd %xmm1,%xmm8 + addl %edi,%edx + movl %ebp,%edi + pxor %xmm10,%xmm2 roll $5,%ebp addl %esi,%edx - pxor %xmm10,%xmm2 - xorl %eax,%edi - xorl %ebx,%eax + xorl %ecx,%ebx addl %ebp,%edx - addl 36(%rsp),%ecx movdqa %xmm2,%xmm10 movdqa %xmm8,16(%rsp) - andl %eax,%edi + movl %eax,%esi xorl %ebx,%eax + addl 36(%rsp),%ecx + andl %ebx,%esi + pslld $2,%xmm2 + andl %eax,%edi rorl $7,%ebp + psrld $30,%xmm10 + addl %esi,%ecx movl %edx,%esi - pslld $2,%xmm2 - xorl %eax,%edi roll $5,%edx - psrld $30,%xmm10 addl %edi,%ecx - xorl %ebp,%esi - xorl %eax,%ebp + xorl %ebx,%eax addl %edx,%ecx - addl 40(%rsp),%ebx - andl %ebp,%esi por %xmm10,%xmm2 + movl %ebp,%edi xorl %eax,%ebp - rorl $7,%edx movdqa %xmm2,%xmm8 + addl 40(%rsp),%ebx + andl %eax,%edi + andl %ebp,%esi + rorl $7,%edx + addl %edi,%ebx movl %ecx,%edi - xorl %ebp,%esi roll $5,%ecx addl %esi,%ebx - xorl %edx,%edi - xorl %ebp,%edx + xorl %eax,%ebp addl %ecx,%ebx + movl %edx,%esi + xorl %ebp,%edx addl 44(%rsp),%eax + andl %ebp,%esi andl %edx,%edi - xorl %ebp,%edx rorl $7,%ecx + addl %esi,%eax movl %ebx,%esi - xorl %edx,%edi roll $5,%ebx addl %edi,%eax - xorl %edx,%esi + xorl %ebp,%edx addl %ebx,%eax addl 48(%rsp),%ebp pxor %xmm7,%xmm3 .byte 102,68,15,58,15,193,8 - xorl %ecx,%esi + xorl %edx,%esi movl %eax,%edi roll $5,%eax pxor %xmm4,%xmm3 - addl %esi,%ebp - xorl %ecx,%edi + xorl %ecx,%esi + addl %eax,%ebp movdqa %xmm9,%xmm10 paddd %xmm2,%xmm9 rorl $7,%ebx - addl %eax,%ebp + addl %esi,%ebp pxor %xmm8,%xmm3 addl 52(%rsp),%edx - xorl %ebx,%edi + xorl %ecx,%edi movl %ebp,%esi roll $5,%ebp movdqa %xmm3,%xmm8 - movdqa %xmm9,32(%rsp) - addl %edi,%edx - xorl %ebx,%esi - rorl $7,%eax + movdqa %xmm9,32(%rsp) + xorl %ebx,%edi addl %ebp,%edx + rorl $7,%eax + addl %edi,%edx pslld $2,%xmm3 addl 56(%rsp),%ecx - xorl %eax,%esi + xorl %ebx,%esi psrld $30,%xmm8 movl %edx,%edi roll $5,%edx - addl %esi,%ecx - xorl %eax,%edi - rorl $7,%ebp + xorl %eax,%esi addl %edx,%ecx + rorl $7,%ebp + addl %esi,%ecx por %xmm8,%xmm3 addl 60(%rsp),%ebx - xorl %ebp,%edi + xorl %eax,%edi movl %ecx,%esi roll $5,%ecx - addl %edi,%ebx - xorl %ebp,%esi - rorl $7,%edx + xorl %ebp,%edi addl %ecx,%ebx + rorl $7,%edx + addl %edi,%ebx addl 0(%rsp),%eax paddd %xmm3,%xmm10 - xorl %edx,%esi + xorl %ebp,%esi movl %ebx,%edi roll $5,%ebx - addl %esi,%eax + xorl %edx,%esi movdqa %xmm10,48(%rsp) - xorl %edx,%edi - rorl $7,%ecx addl %ebx,%eax + rorl $7,%ecx + addl %esi,%eax addl 4(%rsp),%ebp - xorl %ecx,%edi + xorl %edx,%edi movl %eax,%esi roll $5,%eax - addl %edi,%ebp - xorl %ecx,%esi - rorl $7,%ebx + xorl %ecx,%edi addl %eax,%ebp + rorl $7,%ebx + addl %edi,%ebp addl 8(%rsp),%edx - xorl %ebx,%esi + xorl %ecx,%esi movl %ebp,%edi roll $5,%ebp - addl %esi,%edx - xorl %ebx,%edi - rorl $7,%eax + xorl %ebx,%esi addl %ebp,%edx + rorl $7,%eax + addl %esi,%edx addl 12(%rsp),%ecx - xorl %eax,%edi + xorl %ebx,%edi movl %edx,%esi roll $5,%edx - addl %edi,%ecx - xorl %eax,%esi - rorl $7,%ebp + xorl %eax,%edi addl %edx,%ecx + rorl $7,%ebp + addl %edi,%ecx cmpq %r10,%r9 je L$done_ssse3 movdqa 64(%r11),%xmm6 - movdqa -64(%r11),%xmm9 + movdqa 0(%r11),%xmm9 movdqu 0(%r9),%xmm0 movdqu 16(%r9),%xmm1 movdqu 32(%r9),%xmm2 @@ -2260,112 +2278,113 @@ L$oop_ssse3: .byte 102,15,56,0,198 addq $64,%r9 addl 16(%rsp),%ebx - xorl %ebp,%esi + xorl %eax,%esi .byte 102,15,56,0,206 movl %ecx,%edi roll $5,%ecx - addl %esi,%ebx paddd %xmm9,%xmm0 - xorl %ebp,%edi - rorl $7,%edx + xorl %ebp,%esi addl %ecx,%ebx - addl 20(%rsp),%eax + rorl $7,%edx + addl %esi,%ebx movdqa %xmm0,0(%rsp) - xorl %edx,%edi - movl %ebx,%esi + addl 20(%rsp),%eax + xorl %ebp,%edi psubd %xmm9,%xmm0 + movl %ebx,%esi roll $5,%ebx - addl %edi,%eax - xorl %edx,%esi - rorl $7,%ecx + xorl %edx,%edi addl %ebx,%eax + rorl $7,%ecx + addl %edi,%eax addl 24(%rsp),%ebp - xorl %ecx,%esi + xorl %edx,%esi movl %eax,%edi roll $5,%eax - addl %esi,%ebp - xorl %ecx,%edi - rorl $7,%ebx + xorl %ecx,%esi addl %eax,%ebp + rorl $7,%ebx + addl %esi,%ebp addl 28(%rsp),%edx - xorl %ebx,%edi + xorl %ecx,%edi movl %ebp,%esi roll $5,%ebp - addl %edi,%edx - xorl %ebx,%esi - rorl $7,%eax + xorl %ebx,%edi addl %ebp,%edx + rorl $7,%eax + addl %edi,%edx addl 32(%rsp),%ecx - xorl %eax,%esi + xorl %ebx,%esi .byte 102,15,56,0,214 movl %edx,%edi roll $5,%edx - addl %esi,%ecx paddd %xmm9,%xmm1 - xorl %eax,%edi - rorl $7,%ebp + xorl %eax,%esi addl %edx,%ecx - addl 36(%rsp),%ebx + rorl $7,%ebp + addl %esi,%ecx movdqa %xmm1,16(%rsp) - xorl %ebp,%edi - movl %ecx,%esi + addl 36(%rsp),%ebx + xorl %eax,%edi psubd %xmm9,%xmm1 + movl %ecx,%esi roll $5,%ecx - addl %edi,%ebx - xorl %ebp,%esi - rorl $7,%edx + xorl %ebp,%edi addl %ecx,%ebx + rorl $7,%edx + addl %edi,%ebx addl 40(%rsp),%eax - xorl %edx,%esi + xorl %ebp,%esi movl %ebx,%edi roll $5,%ebx - addl %esi,%eax - xorl %edx,%edi - rorl $7,%ecx + xorl %edx,%esi addl %ebx,%eax + rorl $7,%ecx + addl %esi,%eax addl 44(%rsp),%ebp - xorl %ecx,%edi + xorl %edx,%edi movl %eax,%esi roll $5,%eax - addl %edi,%ebp - xorl %ecx,%esi - rorl $7,%ebx + xorl %ecx,%edi addl %eax,%ebp + rorl $7,%ebx + addl %edi,%ebp addl 48(%rsp),%edx - xorl %ebx,%esi + xorl %ecx,%esi .byte 102,15,56,0,222 movl %ebp,%edi roll $5,%ebp - addl %esi,%edx paddd %xmm9,%xmm2 - xorl %ebx,%edi - rorl $7,%eax + xorl %ebx,%esi addl %ebp,%edx - addl 52(%rsp),%ecx + rorl $7,%eax + addl %esi,%edx movdqa %xmm2,32(%rsp) - xorl %eax,%edi - movl %edx,%esi + addl 52(%rsp),%ecx + xorl %ebx,%edi psubd %xmm9,%xmm2 + movl %edx,%esi roll $5,%edx - addl %edi,%ecx - xorl %eax,%esi - rorl $7,%ebp + xorl %eax,%edi addl %edx,%ecx + rorl $7,%ebp + addl %edi,%ecx addl 56(%rsp),%ebx - xorl %ebp,%esi + xorl %eax,%esi movl %ecx,%edi roll $5,%ecx - addl %esi,%ebx - xorl %ebp,%edi - rorl $7,%edx + xorl %ebp,%esi addl %ecx,%ebx + rorl $7,%edx + addl %esi,%ebx addl 60(%rsp),%eax - xorl %edx,%edi + xorl %ebp,%edi movl %ebx,%esi roll $5,%ebx - addl %edi,%eax - rorl $7,%ecx + xorl %edx,%edi addl %ebx,%eax + rorl $7,%ecx + addl %edi,%eax addl 0(%r8),%eax addl 4(%r8),%esi addl 8(%r8),%ecx @@ -2375,110 +2394,108 @@ L$oop_ssse3: movl %esi,4(%r8) movl %esi,%ebx movl %ecx,8(%r8) - movl %ecx,%edi movl %edx,12(%r8) - xorl %edx,%edi movl %ebp,16(%r8) - andl %edi,%esi jmp L$oop_ssse3 .p2align 4 L$done_ssse3: addl 16(%rsp),%ebx - xorl %ebp,%esi + xorl %eax,%esi movl %ecx,%edi roll $5,%ecx - addl %esi,%ebx - xorl %ebp,%edi - rorl $7,%edx + xorl %ebp,%esi addl %ecx,%ebx + rorl $7,%edx + addl %esi,%ebx addl 20(%rsp),%eax - xorl %edx,%edi + xorl %ebp,%edi movl %ebx,%esi roll $5,%ebx - addl %edi,%eax - xorl %edx,%esi - rorl $7,%ecx + xorl %edx,%edi addl %ebx,%eax + rorl $7,%ecx + addl %edi,%eax addl 24(%rsp),%ebp - xorl %ecx,%esi + xorl %edx,%esi movl %eax,%edi roll $5,%eax - addl %esi,%ebp - xorl %ecx,%edi - rorl $7,%ebx + xorl %ecx,%esi addl %eax,%ebp + rorl $7,%ebx + addl %esi,%ebp addl 28(%rsp),%edx - xorl %ebx,%edi + xorl %ecx,%edi movl %ebp,%esi roll $5,%ebp - addl %edi,%edx - xorl %ebx,%esi - rorl $7,%eax + xorl %ebx,%edi addl %ebp,%edx + rorl $7,%eax + addl %edi,%edx addl 32(%rsp),%ecx - xorl %eax,%esi + xorl %ebx,%esi movl %edx,%edi roll $5,%edx - addl %esi,%ecx - xorl %eax,%edi - rorl $7,%ebp + xorl %eax,%esi addl %edx,%ecx + rorl $7,%ebp + addl %esi,%ecx addl 36(%rsp),%ebx - xorl %ebp,%edi + xorl %eax,%edi movl %ecx,%esi roll $5,%ecx - addl %edi,%ebx - xorl %ebp,%esi - rorl $7,%edx + xorl %ebp,%edi addl %ecx,%ebx + rorl $7,%edx + addl %edi,%ebx addl 40(%rsp),%eax - xorl %edx,%esi + xorl %ebp,%esi movl %ebx,%edi roll $5,%ebx - addl %esi,%eax - xorl %edx,%edi - rorl $7,%ecx + xorl %edx,%esi addl %ebx,%eax + rorl $7,%ecx + addl %esi,%eax addl 44(%rsp),%ebp - xorl %ecx,%edi + xorl %edx,%edi movl %eax,%esi roll $5,%eax - addl %edi,%ebp - xorl %ecx,%esi - rorl $7,%ebx + xorl %ecx,%edi addl %eax,%ebp + rorl $7,%ebx + addl %edi,%ebp addl 48(%rsp),%edx - xorl %ebx,%esi + xorl %ecx,%esi movl %ebp,%edi roll $5,%ebp - addl %esi,%edx - xorl %ebx,%edi - rorl $7,%eax + xorl %ebx,%esi addl %ebp,%edx + rorl $7,%eax + addl %esi,%edx addl 52(%rsp),%ecx - xorl %eax,%edi + xorl %ebx,%edi movl %edx,%esi roll $5,%edx - addl %edi,%ecx - xorl %eax,%esi - rorl $7,%ebp + xorl %eax,%edi addl %edx,%ecx + rorl $7,%ebp + addl %edi,%ecx addl 56(%rsp),%ebx - xorl %ebp,%esi + xorl %eax,%esi movl %ecx,%edi roll $5,%ecx - addl %esi,%ebx - xorl %ebp,%edi - rorl $7,%edx + xorl %ebp,%esi addl %ecx,%ebx + rorl $7,%edx + addl %esi,%ebx addl 60(%rsp),%eax - xorl %edx,%edi + xorl %ebp,%edi movl %ebx,%esi roll $5,%ebx - addl %edi,%eax - rorl $7,%ecx + xorl %edx,%edi addl %ebx,%eax + rorl $7,%ecx + addl %edi,%eax addl 0(%r8),%eax addl 4(%r8),%esi addl 8(%r8),%ecx @@ -2499,17 +2516,11 @@ L$epilogue_ssse3: .p2align 6 K_XX_XX: -.long 0x5a827999,0x5a827999,0x5a827999,0x5a827999 -.long 0x5a827999,0x5a827999,0x5a827999,0x5a827999 -.long 0x6ed9eba1,0x6ed9eba1,0x6ed9eba1,0x6ed9eba1 -.long 0x6ed9eba1,0x6ed9eba1,0x6ed9eba1,0x6ed9eba1 -.long 0x8f1bbcdc,0x8f1bbcdc,0x8f1bbcdc,0x8f1bbcdc -.long 0x8f1bbcdc,0x8f1bbcdc,0x8f1bbcdc,0x8f1bbcdc -.long 0xca62c1d6,0xca62c1d6,0xca62c1d6,0xca62c1d6 -.long 0xca62c1d6,0xca62c1d6,0xca62c1d6,0xca62c1d6 -.long 0x00010203,0x04050607,0x08090a0b,0x0c0d0e0f -.long 0x00010203,0x04050607,0x08090a0b,0x0c0d0e0f +.long 0x5a827999,0x5a827999,0x5a827999,0x5a827999 +.long 0x6ed9eba1,0x6ed9eba1,0x6ed9eba1,0x6ed9eba1 +.long 0x8f1bbcdc,0x8f1bbcdc,0x8f1bbcdc,0x8f1bbcdc +.long 0xca62c1d6,0xca62c1d6,0xca62c1d6,0xca62c1d6 +.long 0x00010203,0x04050607,0x08090a0b,0x0c0d0e0f .byte 83,72,65,49,32,98,108,111,99,107,32,116,114,97,110,115,102,111,114,109,32,102,111,114,32,120,56,54,95,54,52,44,32,67,82,89,80,84,79,71,65,77,83,32,98,121,32,60,97,112,112,114,111,64,111,112,101,110,115,115,108,46,111,114,103,62,0 .p2align 6 -.section .note.GNU-stack,"",%progbits diff --git a/lib/accelerated/x86/macosx/sha256-ssse3-x86.s b/lib/accelerated/x86/macosx/sha256-ssse3-x86.s index 0e863d5d78..36e0a06f18 100644 --- a/lib/accelerated/x86/macosx/sha256-ssse3-x86.s +++ b/lib/accelerated/x86/macosx/sha256-ssse3-x86.s @@ -63,405 +63,195 @@ L000pic_point: movl %edi,4(%esp) movl %eax,8(%esp) movl %ebx,12(%esp) - movl L__gnutls_x86_cpuid_s$non_lazy_ptr-L001K256(%ebp),%edx - movl (%edx),%ecx - movl 4(%edx),%ebx - testl $1048576,%ecx - jnz L002loop - andl $1073741824,%ecx - andl $268435968,%ebx - orl %ebx,%ecx - andl $1342177280,%ecx - cmpl $1342177280,%ecx - je L003loop_shrd - subl %edi,%eax - cmpl $256,%eax - jae L004unrolled - jmp L002loop .align 4,0x90 L002loop: movl (%edi),%eax movl 4(%edi),%ebx movl 8(%edi),%ecx - bswap %eax movl 12(%edi),%edx + bswap %eax bswap %ebx - pushl %eax bswap %ecx - pushl %ebx bswap %edx + pushl %eax + pushl %ebx pushl %ecx pushl %edx movl 16(%edi),%eax movl 20(%edi),%ebx movl 24(%edi),%ecx - bswap %eax movl 28(%edi),%edx + bswap %eax bswap %ebx - pushl %eax bswap %ecx - pushl %ebx bswap %edx + pushl %eax + pushl %ebx pushl %ecx pushl %edx movl 32(%edi),%eax movl 36(%edi),%ebx movl 40(%edi),%ecx - bswap %eax movl 44(%edi),%edx + bswap %eax bswap %ebx - pushl %eax bswap %ecx - pushl %ebx bswap %edx + pushl %eax + pushl %ebx pushl %ecx pushl %edx movl 48(%edi),%eax movl 52(%edi),%ebx movl 56(%edi),%ecx - bswap %eax movl 60(%edi),%edx + bswap %eax bswap %ebx - pushl %eax bswap %ecx - pushl %ebx bswap %edx + pushl %eax + pushl %ebx pushl %ecx pushl %edx addl $64,%edi - leal -36(%esp),%esp - movl %edi,104(%esp) + subl $32,%esp + movl %edi,100(%esp) movl (%esi),%eax movl 4(%esi),%ebx movl 8(%esi),%ecx movl 12(%esi),%edi - movl %ebx,8(%esp) - xorl %ecx,%ebx - movl %ecx,12(%esp) - movl %edi,16(%esp) - movl %ebx,(%esp) + movl %ebx,4(%esp) + movl %ecx,8(%esp) + movl %edi,12(%esp) movl 16(%esi),%edx movl 20(%esi),%ebx movl 24(%esi),%ecx movl 28(%esi),%edi - movl %ebx,24(%esp) - movl %ecx,28(%esp) - movl %edi,32(%esp) + movl %ebx,20(%esp) + movl %ecx,24(%esp) + movl %edi,28(%esp) .align 4,0x90 -L00500_15: +L00300_15: + movl 92(%esp),%ebx movl %edx,%ecx - movl 24(%esp),%esi rorl $14,%ecx - movl 28(%esp),%edi + movl 20(%esp),%esi xorl %edx,%ecx - xorl %edi,%esi - movl 96(%esp),%ebx rorl $5,%ecx - andl %edx,%esi - movl %edx,20(%esp) - xorl %ecx,%edx - addl 32(%esp),%ebx + xorl %edx,%ecx + rorl $6,%ecx + movl 24(%esp),%edi + addl %ecx,%ebx xorl %edi,%esi - rorl $6,%edx + movl %edx,16(%esp) movl %eax,%ecx + andl %edx,%esi + movl 12(%esp),%edx + xorl %edi,%esi + movl %eax,%edi addl %esi,%ebx rorl $9,%ecx - addl %edx,%ebx - movl 8(%esp),%edi + addl 28(%esp),%ebx xorl %eax,%ecx - movl %eax,4(%esp) - leal -4(%esp),%esp rorl $11,%ecx - movl (%ebp),%esi + movl 4(%esp),%esi xorl %eax,%ecx - movl 20(%esp),%edx - xorl %edi,%eax rorl $2,%ecx - addl %esi,%ebx - movl %eax,(%esp) addl %ebx,%edx - andl 4(%esp),%eax + movl 8(%esp),%edi addl %ecx,%ebx - xorl %edi,%eax + movl %eax,(%esp) + movl %eax,%ecx + subl $4,%esp + orl %esi,%eax + andl %esi,%ecx + andl %edi,%eax + movl (%ebp),%esi + orl %ecx,%eax addl $4,%ebp addl %ebx,%eax + addl %esi,%edx + addl %esi,%eax cmpl $3248222580,%esi - jne L00500_15 - movl 156(%esp),%ecx - jmp L00616_63 + jne L00300_15 + movl 152(%esp),%ebx .align 4,0x90 -L00616_63: - movl %ecx,%ebx - movl 104(%esp),%esi - rorl $11,%ecx - movl %esi,%edi - rorl $2,%esi - xorl %ebx,%ecx +L00416_63: + movl %ebx,%esi + movl 100(%esp),%ecx + rorl $11,%esi + movl %ecx,%edi + xorl %ebx,%esi + rorl $7,%esi shrl $3,%ebx - rorl $7,%ecx - xorl %edi,%esi - xorl %ecx,%ebx - rorl $17,%esi - addl 160(%esp),%ebx - shrl $10,%edi - addl 124(%esp),%ebx + rorl $2,%edi + xorl %esi,%ebx + xorl %ecx,%edi + rorl $17,%edi + shrl $10,%ecx + addl 156(%esp),%ebx + xorl %ecx,%edi + addl 120(%esp),%ebx movl %edx,%ecx - xorl %esi,%edi - movl 24(%esp),%esi - rorl $14,%ecx addl %edi,%ebx - movl 28(%esp),%edi + rorl $14,%ecx + movl 20(%esp),%esi xorl %edx,%ecx - xorl %edi,%esi - movl %ebx,96(%esp) rorl $5,%ecx - andl %edx,%esi - movl %edx,20(%esp) - xorl %ecx,%edx - addl 32(%esp),%ebx + movl %ebx,92(%esp) + xorl %edx,%ecx + rorl $6,%ecx + movl 24(%esp),%edi + addl %ecx,%ebx xorl %edi,%esi - rorl $6,%edx + movl %edx,16(%esp) movl %eax,%ecx + andl %edx,%esi + movl 12(%esp),%edx + xorl %edi,%esi + movl %eax,%edi addl %esi,%ebx rorl $9,%ecx - addl %edx,%ebx - movl 8(%esp),%edi + addl 28(%esp),%ebx xorl %eax,%ecx - movl %eax,4(%esp) - leal -4(%esp),%esp rorl $11,%ecx - movl (%ebp),%esi + movl 4(%esp),%esi xorl %eax,%ecx - movl 20(%esp),%edx - xorl %edi,%eax rorl $2,%ecx - addl %esi,%ebx - movl %eax,(%esp) addl %ebx,%edx - andl 4(%esp),%eax - addl %ecx,%ebx - xorl %edi,%eax - movl 156(%esp),%ecx - addl $4,%ebp - addl %ebx,%eax - cmpl $3329325298,%esi - jne L00616_63 - movl 356(%esp),%esi - movl 8(%esp),%ebx - movl 16(%esp),%ecx - addl (%esi),%eax - addl 4(%esi),%ebx - addl 8(%esi),%edi - addl 12(%esi),%ecx - movl %eax,(%esi) - movl %ebx,4(%esi) - movl %edi,8(%esi) - movl %ecx,12(%esi) - movl 24(%esp),%eax - movl 28(%esp),%ebx - movl 32(%esp),%ecx - movl 360(%esp),%edi - addl 16(%esi),%edx - addl 20(%esi),%eax - addl 24(%esi),%ebx - addl 28(%esi),%ecx - movl %edx,16(%esi) - movl %eax,20(%esi) - movl %ebx,24(%esi) - movl %ecx,28(%esi) - leal 356(%esp),%esp - subl $256,%ebp - cmpl 8(%esp),%edi - jb L002loop - movl 12(%esp),%esp - popl %edi - popl %esi - popl %ebx - popl %ebp - ret -.align 5,0x90 -L003loop_shrd: - movl (%edi),%eax - movl 4(%edi),%ebx - movl 8(%edi),%ecx - bswap %eax - movl 12(%edi),%edx - bswap %ebx - pushl %eax - bswap %ecx - pushl %ebx - bswap %edx - pushl %ecx - pushl %edx - movl 16(%edi),%eax - movl 20(%edi),%ebx - movl 24(%edi),%ecx - bswap %eax - movl 28(%edi),%edx - bswap %ebx - pushl %eax - bswap %ecx - pushl %ebx - bswap %edx - pushl %ecx - pushl %edx - movl 32(%edi),%eax - movl 36(%edi),%ebx - movl 40(%edi),%ecx - bswap %eax - movl 44(%edi),%edx - bswap %ebx - pushl %eax - bswap %ecx - pushl %ebx - bswap %edx - pushl %ecx - pushl %edx - movl 48(%edi),%eax - movl 52(%edi),%ebx - movl 56(%edi),%ecx - bswap %eax - movl 60(%edi),%edx - bswap %ebx - pushl %eax - bswap %ecx - pushl %ebx - bswap %edx - pushl %ecx - pushl %edx - addl $64,%edi - leal -36(%esp),%esp - movl %edi,104(%esp) - movl (%esi),%eax - movl 4(%esi),%ebx - movl 8(%esi),%ecx - movl 12(%esi),%edi - movl %ebx,8(%esp) - xorl %ecx,%ebx - movl %ecx,12(%esp) - movl %edi,16(%esp) - movl %ebx,(%esp) - movl 16(%esi),%edx - movl 20(%esi),%ebx - movl 24(%esi),%ecx - movl 28(%esi),%edi - movl %ebx,24(%esp) - movl %ecx,28(%esp) - movl %edi,32(%esp) -.align 4,0x90 -L00700_15_shrd: - movl %edx,%ecx - movl 24(%esp),%esi - shrdl $14,%ecx,%ecx - movl 28(%esp),%edi - xorl %edx,%ecx - xorl %edi,%esi - movl 96(%esp),%ebx - shrdl $5,%ecx,%ecx - andl %edx,%esi - movl %edx,20(%esp) - xorl %ecx,%edx - addl 32(%esp),%ebx - xorl %edi,%esi - shrdl $6,%edx,%edx - movl %eax,%ecx - addl %esi,%ebx - shrdl $9,%ecx,%ecx - addl %edx,%ebx movl 8(%esp),%edi - xorl %eax,%ecx - movl %eax,4(%esp) - leal -4(%esp),%esp - shrdl $11,%ecx,%ecx - movl (%ebp),%esi - xorl %eax,%ecx - movl 20(%esp),%edx - xorl %edi,%eax - shrdl $2,%ecx,%ecx - addl %esi,%ebx - movl %eax,(%esp) - addl %ebx,%edx - andl 4(%esp),%eax addl %ecx,%ebx - xorl %edi,%eax - addl $4,%ebp - addl %ebx,%eax - cmpl $3248222580,%esi - jne L00700_15_shrd - movl 156(%esp),%ecx - jmp L00816_63_shrd -.align 4,0x90 -L00816_63_shrd: - movl %ecx,%ebx - movl 104(%esp),%esi - shrdl $11,%ecx,%ecx - movl %esi,%edi - shrdl $2,%esi,%esi - xorl %ebx,%ecx - shrl $3,%ebx - shrdl $7,%ecx,%ecx - xorl %edi,%esi - xorl %ecx,%ebx - shrdl $17,%esi,%esi - addl 160(%esp),%ebx - shrl $10,%edi - addl 124(%esp),%ebx - movl %edx,%ecx - xorl %esi,%edi - movl 24(%esp),%esi - shrdl $14,%ecx,%ecx - addl %edi,%ebx - movl 28(%esp),%edi - xorl %edx,%ecx - xorl %edi,%esi - movl %ebx,96(%esp) - shrdl $5,%ecx,%ecx - andl %edx,%esi - movl %edx,20(%esp) - xorl %ecx,%edx - addl 32(%esp),%ebx - xorl %edi,%esi - shrdl $6,%edx,%edx + movl %eax,(%esp) movl %eax,%ecx - addl %esi,%ebx - shrdl $9,%ecx,%ecx - addl %edx,%ebx - movl 8(%esp),%edi - xorl %eax,%ecx - movl %eax,4(%esp) - leal -4(%esp),%esp - shrdl $11,%ecx,%ecx + subl $4,%esp + orl %esi,%eax + andl %esi,%ecx + andl %edi,%eax movl (%ebp),%esi - xorl %eax,%ecx - movl 20(%esp),%edx - xorl %edi,%eax - shrdl $2,%ecx,%ecx - addl %esi,%ebx - movl %eax,(%esp) - addl %ebx,%edx - andl 4(%esp),%eax - addl %ecx,%ebx - xorl %edi,%eax - movl 156(%esp),%ecx + orl %ecx,%eax addl $4,%ebp addl %ebx,%eax + movl 152(%esp),%ebx + addl %esi,%edx + addl %esi,%eax cmpl $3329325298,%esi - jne L00816_63_shrd - movl 356(%esp),%esi - movl 8(%esp),%ebx - movl 16(%esp),%ecx + jne L00416_63 + movl 352(%esp),%esi + movl 4(%esp),%ebx + movl 8(%esp),%ecx + movl 12(%esp),%edi addl (%esi),%eax addl 4(%esi),%ebx - addl 8(%esi),%edi - addl 12(%esi),%ecx + addl 8(%esi),%ecx + addl 12(%esi),%edi movl %eax,(%esi) movl %ebx,4(%esi) - movl %edi,8(%esi) - movl %ecx,12(%esi) - movl 24(%esp),%eax - movl 28(%esp),%ebx - movl 32(%esp),%ecx - movl 360(%esp),%edi + movl %ecx,8(%esi) + movl %edi,12(%esi) + movl 20(%esp),%eax + movl 24(%esp),%ebx + movl 28(%esp),%ecx + movl 356(%esp),%edi addl 16(%esi),%edx addl 20(%esi),%eax addl 24(%esi),%ebx @@ -470,10 +260,10 @@ L00816_63_shrd: movl %eax,20(%esi) movl %ebx,24(%esi) movl %ecx,28(%esi) - leal 356(%esp),%esp + addl $352,%esp subl $256,%ebp cmpl 8(%esp),%edi - jb L003loop_shrd + jb L002loop movl 12(%esp),%esp popl %edi popl %esi @@ -482,2924 +272,25 @@ L00816_63_shrd: ret .align 6,0x90 L001K256: -.long 1116352408,1899447441,3049323471,3921009573,961987163,1508970993,2453635748,2870763221,3624381080,310598401,607225278,1426881987,1925078388,2162078206,2614888103,3248222580,3835390401,4022224774,264347078,604807628,770255983,1249150122,1555081692,1996064986,2554220882,2821834349,2952996808,3210313671,3336571891,3584528711,113926993,338241895,666307205,773529912,1294757372,1396182291,1695183700,1986661051,2177026350,2456956037,2730485921,2820302411,3259730800,3345764771,3516065817,3600352804,4094571909,275423344,430227734,506948616,659060556,883997877,958139571,1322822218,1537002063,1747873779,1955562222,2024104815,2227730452,2361852424,2428436474,2756734187,3204031479,3329325298 -.long 66051,67438087,134810123,202182159 +.long 1116352408,1899447441,3049323471,3921009573 +.long 961987163,1508970993,2453635748,2870763221 +.long 3624381080,310598401,607225278,1426881987 +.long 1925078388,2162078206,2614888103,3248222580 +.long 3835390401,4022224774,264347078,604807628 +.long 770255983,1249150122,1555081692,1996064986 +.long 2554220882,2821834349,2952996808,3210313671 +.long 3336571891,3584528711,113926993,338241895 +.long 666307205,773529912,1294757372,1396182291 +.long 1695183700,1986661051,2177026350,2456956037 +.long 2730485921,2820302411,3259730800,3345764771 +.long 3516065817,3600352804,4094571909,275423344 +.long 430227734,506948616,659060556,883997877 +.long 958139571,1322822218,1537002063,1747873779 +.long 1955562222,2024104815,2227730452,2361852424 +.long 2428436474,2756734187,3204031479,3329325298 .byte 83,72,65,50,53,54,32,98,108,111,99,107,32,116,114,97 .byte 110,115,102,111,114,109,32,102,111,114,32,120,56,54,44,32 .byte 67,82,89,80,84,79,71,65,77,83,32,98,121,32,60,97 .byte 112,112,114,111,64,111,112,101,110,115,115,108,46,111,114,103 .byte 62,0 -.align 4,0x90 -L004unrolled: - leal -96(%esp),%esp - movl (%esi),%eax - movl 4(%esi),%ebp - movl 8(%esi),%ecx - movl 12(%esi),%ebx - movl %ebp,4(%esp) - xorl %ecx,%ebp - movl %ecx,8(%esp) - movl %ebx,12(%esp) - movl 16(%esi),%edx - movl 20(%esi),%ebx - movl 24(%esi),%ecx - movl 28(%esi),%esi - movl %ebx,20(%esp) - movl %ecx,24(%esp) - movl %esi,28(%esp) - jmp L009grand_loop -.align 4,0x90 -L009grand_loop: - movl (%edi),%ebx - movl 4(%edi),%ecx - bswap %ebx - movl 8(%edi),%esi - bswap %ecx - movl %ebx,32(%esp) - bswap %esi - movl %ecx,36(%esp) - movl %esi,40(%esp) - movl 12(%edi),%ebx - movl 16(%edi),%ecx - bswap %ebx - movl 20(%edi),%esi - bswap %ecx - movl %ebx,44(%esp) - bswap %esi - movl %ecx,48(%esp) - movl %esi,52(%esp) - movl 24(%edi),%ebx - movl 28(%edi),%ecx - bswap %ebx - movl 32(%edi),%esi - bswap %ecx - movl %ebx,56(%esp) - bswap %esi - movl %ecx,60(%esp) - movl %esi,64(%esp) - movl 36(%edi),%ebx - movl 40(%edi),%ecx - bswap %ebx - movl 44(%edi),%esi - bswap %ecx - movl %ebx,68(%esp) - bswap %esi - movl %ecx,72(%esp) - movl %esi,76(%esp) - movl 48(%edi),%ebx - movl 52(%edi),%ecx - bswap %ebx - movl 56(%edi),%esi - bswap %ecx - movl %ebx,80(%esp) - bswap %esi - movl %ecx,84(%esp) - movl %esi,88(%esp) - movl 60(%edi),%ebx - addl $64,%edi - bswap %ebx - movl %edi,100(%esp) - movl %ebx,92(%esp) - movl %edx,%ecx - movl 20(%esp),%esi - rorl $14,%edx - movl 24(%esp),%edi - xorl %ecx,%edx - movl 32(%esp),%ebx - xorl %edi,%esi - rorl $5,%edx - andl %ecx,%esi - movl %ecx,16(%esp) - xorl %ecx,%edx - addl 28(%esp),%ebx - xorl %esi,%edi - rorl $6,%edx - movl %eax,%ecx - addl %edi,%ebx - rorl $9,%ecx - movl %eax,%esi - movl 4(%esp),%edi - xorl %eax,%ecx - movl %eax,(%esp) - xorl %edi,%eax - rorl $11,%ecx - andl %eax,%ebp - leal 1116352408(%ebx,%edx,1),%edx - xorl %esi,%ecx - xorl %edi,%ebp - rorl $2,%ecx - addl %edx,%ebp - addl 12(%esp),%edx - addl %ecx,%ebp - movl %edx,%esi - movl 16(%esp),%ecx - rorl $14,%edx - movl 20(%esp),%edi - xorl %esi,%edx - movl 36(%esp),%ebx - xorl %edi,%ecx - rorl $5,%edx - andl %esi,%ecx - movl %esi,12(%esp) - xorl %esi,%edx - addl 24(%esp),%ebx - xorl %ecx,%edi - rorl $6,%edx - movl %ebp,%esi - addl %edi,%ebx - rorl $9,%esi - movl %ebp,%ecx - movl (%esp),%edi - xorl %ebp,%esi - movl %ebp,28(%esp) - xorl %edi,%ebp - rorl $11,%esi - andl %ebp,%eax - leal 1899447441(%ebx,%edx,1),%edx - xorl %ecx,%esi - xorl %edi,%eax - rorl $2,%esi - addl %edx,%eax - addl 8(%esp),%edx - addl %esi,%eax - movl %edx,%ecx - movl 12(%esp),%esi - rorl $14,%edx - movl 16(%esp),%edi - xorl %ecx,%edx - movl 40(%esp),%ebx - xorl %edi,%esi - rorl $5,%edx - andl %ecx,%esi - movl %ecx,8(%esp) - xorl %ecx,%edx - addl 20(%esp),%ebx - xorl %esi,%edi - rorl $6,%edx - movl %eax,%ecx - addl %edi,%ebx - rorl $9,%ecx - movl %eax,%esi - movl 28(%esp),%edi - xorl %eax,%ecx - movl %eax,24(%esp) - xorl %edi,%eax - rorl $11,%ecx - andl %eax,%ebp - leal 3049323471(%ebx,%edx,1),%edx - xorl %esi,%ecx - xorl %edi,%ebp - rorl $2,%ecx - addl %edx,%ebp - addl 4(%esp),%edx - addl %ecx,%ebp - movl %edx,%esi - movl 8(%esp),%ecx - rorl $14,%edx - movl 12(%esp),%edi - xorl %esi,%edx - movl 44(%esp),%ebx - xorl %edi,%ecx - rorl $5,%edx - andl %esi,%ecx - movl %esi,4(%esp) - xorl %esi,%edx - addl 16(%esp),%ebx - xorl %ecx,%edi - rorl $6,%edx - movl %ebp,%esi - addl %edi,%ebx - rorl $9,%esi - movl %ebp,%ecx - movl 24(%esp),%edi - xorl %ebp,%esi - movl %ebp,20(%esp) - xorl %edi,%ebp - rorl $11,%esi - andl %ebp,%eax - leal 3921009573(%ebx,%edx,1),%edx - xorl %ecx,%esi - xorl %edi,%eax - rorl $2,%esi - addl %edx,%eax - addl (%esp),%edx - addl %esi,%eax - movl %edx,%ecx - movl 4(%esp),%esi - rorl $14,%edx - movl 8(%esp),%edi - xorl %ecx,%edx - movl 48(%esp),%ebx - xorl %edi,%esi - rorl $5,%edx - andl %ecx,%esi - movl %ecx,(%esp) - xorl %ecx,%edx - addl 12(%esp),%ebx - xorl %esi,%edi - rorl $6,%edx - movl %eax,%ecx - addl %edi,%ebx - rorl $9,%ecx - movl %eax,%esi - movl 20(%esp),%edi - xorl %eax,%ecx - movl %eax,16(%esp) - xorl %edi,%eax - rorl $11,%ecx - andl %eax,%ebp - leal 961987163(%ebx,%edx,1),%edx - xorl %esi,%ecx - xorl %edi,%ebp - rorl $2,%ecx - addl %edx,%ebp - addl 28(%esp),%edx - addl %ecx,%ebp - movl %edx,%esi - movl (%esp),%ecx - rorl $14,%edx - movl 4(%esp),%edi - xorl %esi,%edx - movl 52(%esp),%ebx - xorl %edi,%ecx - rorl $5,%edx - andl %esi,%ecx - movl %esi,28(%esp) - xorl %esi,%edx - addl 8(%esp),%ebx - xorl %ecx,%edi - rorl $6,%edx - movl %ebp,%esi - addl %edi,%ebx - rorl $9,%esi - movl %ebp,%ecx - movl 16(%esp),%edi - xorl %ebp,%esi - movl %ebp,12(%esp) - xorl %edi,%ebp - rorl $11,%esi - andl %ebp,%eax - leal 1508970993(%ebx,%edx,1),%edx - xorl %ecx,%esi - xorl %edi,%eax - rorl $2,%esi - addl %edx,%eax - addl 24(%esp),%edx - addl %esi,%eax - movl %edx,%ecx - movl 28(%esp),%esi - rorl $14,%edx - movl (%esp),%edi - xorl %ecx,%edx - movl 56(%esp),%ebx - xorl %edi,%esi - rorl $5,%edx - andl %ecx,%esi - movl %ecx,24(%esp) - xorl %ecx,%edx - addl 4(%esp),%ebx - xorl %esi,%edi - rorl $6,%edx - movl %eax,%ecx - addl %edi,%ebx - rorl $9,%ecx - movl %eax,%esi - movl 12(%esp),%edi - xorl %eax,%ecx - movl %eax,8(%esp) - xorl %edi,%eax - rorl $11,%ecx - andl %eax,%ebp - leal 2453635748(%ebx,%edx,1),%edx - xorl %esi,%ecx - xorl %edi,%ebp - rorl $2,%ecx - addl %edx,%ebp - addl 20(%esp),%edx - addl %ecx,%ebp - movl %edx,%esi - movl 24(%esp),%ecx - rorl $14,%edx - movl 28(%esp),%edi - xorl %esi,%edx - movl 60(%esp),%ebx - xorl %edi,%ecx - rorl $5,%edx - andl %esi,%ecx - movl %esi,20(%esp) - xorl %esi,%edx - addl (%esp),%ebx - xorl %ecx,%edi - rorl $6,%edx - movl %ebp,%esi - addl %edi,%ebx - rorl $9,%esi - movl %ebp,%ecx - movl 8(%esp),%edi - xorl %ebp,%esi - movl %ebp,4(%esp) - xorl %edi,%ebp - rorl $11,%esi - andl %ebp,%eax - leal 2870763221(%ebx,%edx,1),%edx - xorl %ecx,%esi - xorl %edi,%eax - rorl $2,%esi - addl %edx,%eax - addl 16(%esp),%edx - addl %esi,%eax - movl %edx,%ecx - movl 20(%esp),%esi - rorl $14,%edx - movl 24(%esp),%edi - xorl %ecx,%edx - movl 64(%esp),%ebx - xorl %edi,%esi - rorl $5,%edx - andl %ecx,%esi - movl %ecx,16(%esp) - xorl %ecx,%edx - addl 28(%esp),%ebx - xorl %esi,%edi - rorl $6,%edx - movl %eax,%ecx - addl %edi,%ebx - rorl $9,%ecx - movl %eax,%esi - movl 4(%esp),%edi - xorl %eax,%ecx - movl %eax,(%esp) - xorl %edi,%eax - rorl $11,%ecx - andl %eax,%ebp - leal 3624381080(%ebx,%edx,1),%edx - xorl %esi,%ecx - xorl %edi,%ebp - rorl $2,%ecx - addl %edx,%ebp - addl 12(%esp),%edx - addl %ecx,%ebp - movl %edx,%esi - movl 16(%esp),%ecx - rorl $14,%edx - movl 20(%esp),%edi - xorl %esi,%edx - movl 68(%esp),%ebx - xorl %edi,%ecx - rorl $5,%edx - andl %esi,%ecx - movl %esi,12(%esp) - xorl %esi,%edx - addl 24(%esp),%ebx - xorl %ecx,%edi - rorl $6,%edx - movl %ebp,%esi - addl %edi,%ebx - rorl $9,%esi - movl %ebp,%ecx - movl (%esp),%edi - xorl %ebp,%esi - movl %ebp,28(%esp) - xorl %edi,%ebp - rorl $11,%esi - andl %ebp,%eax - leal 310598401(%ebx,%edx,1),%edx - xorl %ecx,%esi - xorl %edi,%eax - rorl $2,%esi - addl %edx,%eax - addl 8(%esp),%edx - addl %esi,%eax - movl %edx,%ecx - movl 12(%esp),%esi - rorl $14,%edx - movl 16(%esp),%edi - xorl %ecx,%edx - movl 72(%esp),%ebx - xorl %edi,%esi - rorl $5,%edx - andl %ecx,%esi - movl %ecx,8(%esp) - xorl %ecx,%edx - addl 20(%esp),%ebx - xorl %esi,%edi - rorl $6,%edx - movl %eax,%ecx - addl %edi,%ebx - rorl $9,%ecx - movl %eax,%esi - movl 28(%esp),%edi - xorl %eax,%ecx - movl %eax,24(%esp) - xorl %edi,%eax - rorl $11,%ecx - andl %eax,%ebp - leal 607225278(%ebx,%edx,1),%edx - xorl %esi,%ecx - xorl %edi,%ebp - rorl $2,%ecx - addl %edx,%ebp - addl 4(%esp),%edx - addl %ecx,%ebp - movl %edx,%esi - movl 8(%esp),%ecx - rorl $14,%edx - movl 12(%esp),%edi - xorl %esi,%edx - movl 76(%esp),%ebx - xorl %edi,%ecx - rorl $5,%edx - andl %esi,%ecx - movl %esi,4(%esp) - xorl %esi,%edx - addl 16(%esp),%ebx - xorl %ecx,%edi - rorl $6,%edx - movl %ebp,%esi - addl %edi,%ebx - rorl $9,%esi - movl %ebp,%ecx - movl 24(%esp),%edi - xorl %ebp,%esi - movl %ebp,20(%esp) - xorl %edi,%ebp - rorl $11,%esi - andl %ebp,%eax - leal 1426881987(%ebx,%edx,1),%edx - xorl %ecx,%esi - xorl %edi,%eax - rorl $2,%esi - addl %edx,%eax - addl (%esp),%edx - addl %esi,%eax - movl %edx,%ecx - movl 4(%esp),%esi - rorl $14,%edx - movl 8(%esp),%edi - xorl %ecx,%edx - movl 80(%esp),%ebx - xorl %edi,%esi - rorl $5,%edx - andl %ecx,%esi - movl %ecx,(%esp) - xorl %ecx,%edx - addl 12(%esp),%ebx - xorl %esi,%edi - rorl $6,%edx - movl %eax,%ecx - addl %edi,%ebx - rorl $9,%ecx - movl %eax,%esi - movl 20(%esp),%edi - xorl %eax,%ecx - movl %eax,16(%esp) - xorl %edi,%eax - rorl $11,%ecx - andl %eax,%ebp - leal 1925078388(%ebx,%edx,1),%edx - xorl %esi,%ecx - xorl %edi,%ebp - rorl $2,%ecx - addl %edx,%ebp - addl 28(%esp),%edx - addl %ecx,%ebp - movl %edx,%esi - movl (%esp),%ecx - rorl $14,%edx - movl 4(%esp),%edi - xorl %esi,%edx - movl 84(%esp),%ebx - xorl %edi,%ecx - rorl $5,%edx - andl %esi,%ecx - movl %esi,28(%esp) - xorl %esi,%edx - addl 8(%esp),%ebx - xorl %ecx,%edi - rorl $6,%edx - movl %ebp,%esi - addl %edi,%ebx - rorl $9,%esi - movl %ebp,%ecx - movl 16(%esp),%edi - xorl %ebp,%esi - movl %ebp,12(%esp) - xorl %edi,%ebp - rorl $11,%esi - andl %ebp,%eax - leal 2162078206(%ebx,%edx,1),%edx - xorl %ecx,%esi - xorl %edi,%eax - rorl $2,%esi - addl %edx,%eax - addl 24(%esp),%edx - addl %esi,%eax - movl %edx,%ecx - movl 28(%esp),%esi - rorl $14,%edx - movl (%esp),%edi - xorl %ecx,%edx - movl 88(%esp),%ebx - xorl %edi,%esi - rorl $5,%edx - andl %ecx,%esi - movl %ecx,24(%esp) - xorl %ecx,%edx - addl 4(%esp),%ebx - xorl %esi,%edi - rorl $6,%edx - movl %eax,%ecx - addl %edi,%ebx - rorl $9,%ecx - movl %eax,%esi - movl 12(%esp),%edi - xorl %eax,%ecx - movl %eax,8(%esp) - xorl %edi,%eax - rorl $11,%ecx - andl %eax,%ebp - leal 2614888103(%ebx,%edx,1),%edx - xorl %esi,%ecx - xorl %edi,%ebp - rorl $2,%ecx - addl %edx,%ebp - addl 20(%esp),%edx - addl %ecx,%ebp - movl %edx,%esi - movl 24(%esp),%ecx - rorl $14,%edx - movl 28(%esp),%edi - xorl %esi,%edx - movl 92(%esp),%ebx - xorl %edi,%ecx - rorl $5,%edx - andl %esi,%ecx - movl %esi,20(%esp) - xorl %esi,%edx - addl (%esp),%ebx - xorl %ecx,%edi - rorl $6,%edx - movl %ebp,%esi - addl %edi,%ebx - rorl $9,%esi - movl %ebp,%ecx - movl 8(%esp),%edi - xorl %ebp,%esi - movl %ebp,4(%esp) - xorl %edi,%ebp - rorl $11,%esi - andl %ebp,%eax - leal 3248222580(%ebx,%edx,1),%edx - xorl %ecx,%esi - xorl %edi,%eax - movl 36(%esp),%ecx - rorl $2,%esi - addl %edx,%eax - addl 16(%esp),%edx - addl %esi,%eax - movl 88(%esp),%esi - movl %ecx,%ebx - rorl $11,%ecx - movl %esi,%edi - rorl $2,%esi - xorl %ebx,%ecx - shrl $3,%ebx - rorl $7,%ecx - xorl %edi,%esi - xorl %ecx,%ebx - rorl $17,%esi - addl 32(%esp),%ebx - shrl $10,%edi - addl 68(%esp),%ebx - movl %edx,%ecx - xorl %esi,%edi - movl 20(%esp),%esi - rorl $14,%edx - addl %edi,%ebx - movl 24(%esp),%edi - xorl %ecx,%edx - movl %ebx,32(%esp) - xorl %edi,%esi - rorl $5,%edx - andl %ecx,%esi - movl %ecx,16(%esp) - xorl %ecx,%edx - addl 28(%esp),%ebx - xorl %esi,%edi - rorl $6,%edx - movl %eax,%ecx - addl %edi,%ebx - rorl $9,%ecx - movl %eax,%esi - movl 4(%esp),%edi - xorl %eax,%ecx - movl %eax,(%esp) - xorl %edi,%eax - rorl $11,%ecx - andl %eax,%ebp - leal 3835390401(%ebx,%edx,1),%edx - xorl %esi,%ecx - xorl %edi,%ebp - movl 40(%esp),%esi - rorl $2,%ecx - addl %edx,%ebp - addl 12(%esp),%edx - addl %ecx,%ebp - movl 92(%esp),%ecx - movl %esi,%ebx - rorl $11,%esi - movl %ecx,%edi - rorl $2,%ecx - xorl %ebx,%esi - shrl $3,%ebx - rorl $7,%esi - xorl %edi,%ecx - xorl %esi,%ebx - rorl $17,%ecx - addl 36(%esp),%ebx - shrl $10,%edi - addl 72(%esp),%ebx - movl %edx,%esi - xorl %ecx,%edi - movl 16(%esp),%ecx - rorl $14,%edx - addl %edi,%ebx - movl 20(%esp),%edi - xorl %esi,%edx - movl %ebx,36(%esp) - xorl %edi,%ecx - rorl $5,%edx - andl %esi,%ecx - movl %esi,12(%esp) - xorl %esi,%edx - addl 24(%esp),%ebx - xorl %ecx,%edi - rorl $6,%edx - movl %ebp,%esi - addl %edi,%ebx - rorl $9,%esi - movl %ebp,%ecx - movl (%esp),%edi - xorl %ebp,%esi - movl %ebp,28(%esp) - xorl %edi,%ebp - rorl $11,%esi - andl %ebp,%eax - leal 4022224774(%ebx,%edx,1),%edx - xorl %ecx,%esi - xorl %edi,%eax - movl 44(%esp),%ecx - rorl $2,%esi - addl %edx,%eax - addl 8(%esp),%edx - addl %esi,%eax - movl 32(%esp),%esi - movl %ecx,%ebx - rorl $11,%ecx - movl %esi,%edi - rorl $2,%esi - xorl %ebx,%ecx - shrl $3,%ebx - rorl $7,%ecx - xorl %edi,%esi - xorl %ecx,%ebx - rorl $17,%esi - addl 40(%esp),%ebx - shrl $10,%edi - addl 76(%esp),%ebx - movl %edx,%ecx - xorl %esi,%edi - movl 12(%esp),%esi - rorl $14,%edx - addl %edi,%ebx - movl 16(%esp),%edi - xorl %ecx,%edx - movl %ebx,40(%esp) - xorl %edi,%esi - rorl $5,%edx - andl %ecx,%esi - movl %ecx,8(%esp) - xorl %ecx,%edx - addl 20(%esp),%ebx - xorl %esi,%edi - rorl $6,%edx - movl %eax,%ecx - addl %edi,%ebx - rorl $9,%ecx - movl %eax,%esi - movl 28(%esp),%edi - xorl %eax,%ecx - movl %eax,24(%esp) - xorl %edi,%eax - rorl $11,%ecx - andl %eax,%ebp - leal 264347078(%ebx,%edx,1),%edx - xorl %esi,%ecx - xorl %edi,%ebp - movl 48(%esp),%esi - rorl $2,%ecx - addl %edx,%ebp - addl 4(%esp),%edx - addl %ecx,%ebp - movl 36(%esp),%ecx - movl %esi,%ebx - rorl $11,%esi - movl %ecx,%edi - rorl $2,%ecx - xorl %ebx,%esi - shrl $3,%ebx - rorl $7,%esi - xorl %edi,%ecx - xorl %esi,%ebx - rorl $17,%ecx - addl 44(%esp),%ebx - shrl $10,%edi - addl 80(%esp),%ebx - movl %edx,%esi - xorl %ecx,%edi - movl 8(%esp),%ecx - rorl $14,%edx - addl %edi,%ebx - movl 12(%esp),%edi - xorl %esi,%edx - movl %ebx,44(%esp) - xorl %edi,%ecx - rorl $5,%edx - andl %esi,%ecx - movl %esi,4(%esp) - xorl %esi,%edx - addl 16(%esp),%ebx - xorl %ecx,%edi - rorl $6,%edx - movl %ebp,%esi - addl %edi,%ebx - rorl $9,%esi - movl %ebp,%ecx - movl 24(%esp),%edi - xorl %ebp,%esi - movl %ebp,20(%esp) - xorl %edi,%ebp - rorl $11,%esi - andl %ebp,%eax - leal 604807628(%ebx,%edx,1),%edx - xorl %ecx,%esi - xorl %edi,%eax - movl 52(%esp),%ecx - rorl $2,%esi - addl %edx,%eax - addl (%esp),%edx - addl %esi,%eax - movl 40(%esp),%esi - movl %ecx,%ebx - rorl $11,%ecx - movl %esi,%edi - rorl $2,%esi - xorl %ebx,%ecx - shrl $3,%ebx - rorl $7,%ecx - xorl %edi,%esi - xorl %ecx,%ebx - rorl $17,%esi - addl 48(%esp),%ebx - shrl $10,%edi - addl 84(%esp),%ebx - movl %edx,%ecx - xorl %esi,%edi - movl 4(%esp),%esi - rorl $14,%edx - addl %edi,%ebx - movl 8(%esp),%edi - xorl %ecx,%edx - movl %ebx,48(%esp) - xorl %edi,%esi - rorl $5,%edx - andl %ecx,%esi - movl %ecx,(%esp) - xorl %ecx,%edx - addl 12(%esp),%ebx - xorl %esi,%edi - rorl $6,%edx - movl %eax,%ecx - addl %edi,%ebx - rorl $9,%ecx - movl %eax,%esi - movl 20(%esp),%edi - xorl %eax,%ecx - movl %eax,16(%esp) - xorl %edi,%eax - rorl $11,%ecx - andl %eax,%ebp - leal 770255983(%ebx,%edx,1),%edx - xorl %esi,%ecx - xorl %edi,%ebp - movl 56(%esp),%esi - rorl $2,%ecx - addl %edx,%ebp - addl 28(%esp),%edx - addl %ecx,%ebp - movl 44(%esp),%ecx - movl %esi,%ebx - rorl $11,%esi - movl %ecx,%edi - rorl $2,%ecx - xorl %ebx,%esi - shrl $3,%ebx - rorl $7,%esi - xorl %edi,%ecx - xorl %esi,%ebx - rorl $17,%ecx - addl 52(%esp),%ebx - shrl $10,%edi - addl 88(%esp),%ebx - movl %edx,%esi - xorl %ecx,%edi - movl (%esp),%ecx - rorl $14,%edx - addl %edi,%ebx - movl 4(%esp),%edi - xorl %esi,%edx - movl %ebx,52(%esp) - xorl %edi,%ecx - rorl $5,%edx - andl %esi,%ecx - movl %esi,28(%esp) - xorl %esi,%edx - addl 8(%esp),%ebx - xorl %ecx,%edi - rorl $6,%edx - movl %ebp,%esi - addl %edi,%ebx - rorl $9,%esi - movl %ebp,%ecx - movl 16(%esp),%edi - xorl %ebp,%esi - movl %ebp,12(%esp) - xorl %edi,%ebp - rorl $11,%esi - andl %ebp,%eax - leal 1249150122(%ebx,%edx,1),%edx - xorl %ecx,%esi - xorl %edi,%eax - movl 60(%esp),%ecx - rorl $2,%esi - addl %edx,%eax - addl 24(%esp),%edx - addl %esi,%eax - movl 48(%esp),%esi - movl %ecx,%ebx - rorl $11,%ecx - movl %esi,%edi - rorl $2,%esi - xorl %ebx,%ecx - shrl $3,%ebx - rorl $7,%ecx - xorl %edi,%esi - xorl %ecx,%ebx - rorl $17,%esi - addl 56(%esp),%ebx - shrl $10,%edi - addl 92(%esp),%ebx - movl %edx,%ecx - xorl %esi,%edi - movl 28(%esp),%esi - rorl $14,%edx - addl %edi,%ebx - movl (%esp),%edi - xorl %ecx,%edx - movl %ebx,56(%esp) - xorl %edi,%esi - rorl $5,%edx - andl %ecx,%esi - movl %ecx,24(%esp) - xorl %ecx,%edx - addl 4(%esp),%ebx - xorl %esi,%edi - rorl $6,%edx - movl %eax,%ecx - addl %edi,%ebx - rorl $9,%ecx - movl %eax,%esi - movl 12(%esp),%edi - xorl %eax,%ecx - movl %eax,8(%esp) - xorl %edi,%eax - rorl $11,%ecx - andl %eax,%ebp - leal 1555081692(%ebx,%edx,1),%edx - xorl %esi,%ecx - xorl %edi,%ebp - movl 64(%esp),%esi - rorl $2,%ecx - addl %edx,%ebp - addl 20(%esp),%edx - addl %ecx,%ebp - movl 52(%esp),%ecx - movl %esi,%ebx - rorl $11,%esi - movl %ecx,%edi - rorl $2,%ecx - xorl %ebx,%esi - shrl $3,%ebx - rorl $7,%esi - xorl %edi,%ecx - xorl %esi,%ebx - rorl $17,%ecx - addl 60(%esp),%ebx - shrl $10,%edi - addl 32(%esp),%ebx - movl %edx,%esi - xorl %ecx,%edi - movl 24(%esp),%ecx - rorl $14,%edx - addl %edi,%ebx - movl 28(%esp),%edi - xorl %esi,%edx - movl %ebx,60(%esp) - xorl %edi,%ecx - rorl $5,%edx - andl %esi,%ecx - movl %esi,20(%esp) - xorl %esi,%edx - addl (%esp),%ebx - xorl %ecx,%edi - rorl $6,%edx - movl %ebp,%esi - addl %edi,%ebx - rorl $9,%esi - movl %ebp,%ecx - movl 8(%esp),%edi - xorl %ebp,%esi - movl %ebp,4(%esp) - xorl %edi,%ebp - rorl $11,%esi - andl %ebp,%eax - leal 1996064986(%ebx,%edx,1),%edx - xorl %ecx,%esi - xorl %edi,%eax - movl 68(%esp),%ecx - rorl $2,%esi - addl %edx,%eax - addl 16(%esp),%edx - addl %esi,%eax - movl 56(%esp),%esi - movl %ecx,%ebx - rorl $11,%ecx - movl %esi,%edi - rorl $2,%esi - xorl %ebx,%ecx - shrl $3,%ebx - rorl $7,%ecx - xorl %edi,%esi - xorl %ecx,%ebx - rorl $17,%esi - addl 64(%esp),%ebx - shrl $10,%edi - addl 36(%esp),%ebx - movl %edx,%ecx - xorl %esi,%edi - movl 20(%esp),%esi - rorl $14,%edx - addl %edi,%ebx - movl 24(%esp),%edi - xorl %ecx,%edx - movl %ebx,64(%esp) - xorl %edi,%esi - rorl $5,%edx - andl %ecx,%esi - movl %ecx,16(%esp) - xorl %ecx,%edx - addl 28(%esp),%ebx - xorl %esi,%edi - rorl $6,%edx - movl %eax,%ecx - addl %edi,%ebx - rorl $9,%ecx - movl %eax,%esi - movl 4(%esp),%edi - xorl %eax,%ecx - movl %eax,(%esp) - xorl %edi,%eax - rorl $11,%ecx - andl %eax,%ebp - leal 2554220882(%ebx,%edx,1),%edx - xorl %esi,%ecx - xorl %edi,%ebp - movl 72(%esp),%esi - rorl $2,%ecx - addl %edx,%ebp - addl 12(%esp),%edx - addl %ecx,%ebp - movl 60(%esp),%ecx - movl %esi,%ebx - rorl $11,%esi - movl %ecx,%edi - rorl $2,%ecx - xorl %ebx,%esi - shrl $3,%ebx - rorl $7,%esi - xorl %edi,%ecx - xorl %esi,%ebx - rorl $17,%ecx - addl 68(%esp),%ebx - shrl $10,%edi - addl 40(%esp),%ebx - movl %edx,%esi - xorl %ecx,%edi - movl 16(%esp),%ecx - rorl $14,%edx - addl %edi,%ebx - movl 20(%esp),%edi - xorl %esi,%edx - movl %ebx,68(%esp) - xorl %edi,%ecx - rorl $5,%edx - andl %esi,%ecx - movl %esi,12(%esp) - xorl %esi,%edx - addl 24(%esp),%ebx - xorl %ecx,%edi - rorl $6,%edx - movl %ebp,%esi - addl %edi,%ebx - rorl $9,%esi - movl %ebp,%ecx - movl (%esp),%edi - xorl %ebp,%esi - movl %ebp,28(%esp) - xorl %edi,%ebp - rorl $11,%esi - andl %ebp,%eax - leal 2821834349(%ebx,%edx,1),%edx - xorl %ecx,%esi - xorl %edi,%eax - movl 76(%esp),%ecx - rorl $2,%esi - addl %edx,%eax - addl 8(%esp),%edx - addl %esi,%eax - movl 64(%esp),%esi - movl %ecx,%ebx - rorl $11,%ecx - movl %esi,%edi - rorl $2,%esi - xorl %ebx,%ecx - shrl $3,%ebx - rorl $7,%ecx - xorl %edi,%esi - xorl %ecx,%ebx - rorl $17,%esi - addl 72(%esp),%ebx - shrl $10,%edi - addl 44(%esp),%ebx - movl %edx,%ecx - xorl %esi,%edi - movl 12(%esp),%esi - rorl $14,%edx - addl %edi,%ebx - movl 16(%esp),%edi - xorl %ecx,%edx - movl %ebx,72(%esp) - xorl %edi,%esi - rorl $5,%edx - andl %ecx,%esi - movl %ecx,8(%esp) - xorl %ecx,%edx - addl 20(%esp),%ebx - xorl %esi,%edi - rorl $6,%edx - movl %eax,%ecx - addl %edi,%ebx - rorl $9,%ecx - movl %eax,%esi - movl 28(%esp),%edi - xorl %eax,%ecx - movl %eax,24(%esp) - xorl %edi,%eax - rorl $11,%ecx - andl %eax,%ebp - leal 2952996808(%ebx,%edx,1),%edx - xorl %esi,%ecx - xorl %edi,%ebp - movl 80(%esp),%esi - rorl $2,%ecx - addl %edx,%ebp - addl 4(%esp),%edx - addl %ecx,%ebp - movl 68(%esp),%ecx - movl %esi,%ebx - rorl $11,%esi - movl %ecx,%edi - rorl $2,%ecx - xorl %ebx,%esi - shrl $3,%ebx - rorl $7,%esi - xorl %edi,%ecx - xorl %esi,%ebx - rorl $17,%ecx - addl 76(%esp),%ebx - shrl $10,%edi - addl 48(%esp),%ebx - movl %edx,%esi - xorl %ecx,%edi - movl 8(%esp),%ecx - rorl $14,%edx - addl %edi,%ebx - movl 12(%esp),%edi - xorl %esi,%edx - movl %ebx,76(%esp) - xorl %edi,%ecx - rorl $5,%edx - andl %esi,%ecx - movl %esi,4(%esp) - xorl %esi,%edx - addl 16(%esp),%ebx - xorl %ecx,%edi - rorl $6,%edx - movl %ebp,%esi - addl %edi,%ebx - rorl $9,%esi - movl %ebp,%ecx - movl 24(%esp),%edi - xorl %ebp,%esi - movl %ebp,20(%esp) - xorl %edi,%ebp - rorl $11,%esi - andl %ebp,%eax - leal 3210313671(%ebx,%edx,1),%edx - xorl %ecx,%esi - xorl %edi,%eax - movl 84(%esp),%ecx - rorl $2,%esi - addl %edx,%eax - addl (%esp),%edx - addl %esi,%eax - movl 72(%esp),%esi - movl %ecx,%ebx - rorl $11,%ecx - movl %esi,%edi - rorl $2,%esi - xorl %ebx,%ecx - shrl $3,%ebx - rorl $7,%ecx - xorl %edi,%esi - xorl %ecx,%ebx - rorl $17,%esi - addl 80(%esp),%ebx - shrl $10,%edi - addl 52(%esp),%ebx - movl %edx,%ecx - xorl %esi,%edi - movl 4(%esp),%esi - rorl $14,%edx - addl %edi,%ebx - movl 8(%esp),%edi - xorl %ecx,%edx - movl %ebx,80(%esp) - xorl %edi,%esi - rorl $5,%edx - andl %ecx,%esi - movl %ecx,(%esp) - xorl %ecx,%edx - addl 12(%esp),%ebx - xorl %esi,%edi - rorl $6,%edx - movl %eax,%ecx - addl %edi,%ebx - rorl $9,%ecx - movl %eax,%esi - movl 20(%esp),%edi - xorl %eax,%ecx - movl %eax,16(%esp) - xorl %edi,%eax - rorl $11,%ecx - andl %eax,%ebp - leal 3336571891(%ebx,%edx,1),%edx - xorl %esi,%ecx - xorl %edi,%ebp - movl 88(%esp),%esi - rorl $2,%ecx - addl %edx,%ebp - addl 28(%esp),%edx - addl %ecx,%ebp - movl 76(%esp),%ecx - movl %esi,%ebx - rorl $11,%esi - movl %ecx,%edi - rorl $2,%ecx - xorl %ebx,%esi - shrl $3,%ebx - rorl $7,%esi - xorl %edi,%ecx - xorl %esi,%ebx - rorl $17,%ecx - addl 84(%esp),%ebx - shrl $10,%edi - addl 56(%esp),%ebx - movl %edx,%esi - xorl %ecx,%edi - movl (%esp),%ecx - rorl $14,%edx - addl %edi,%ebx - movl 4(%esp),%edi - xorl %esi,%edx - movl %ebx,84(%esp) - xorl %edi,%ecx - rorl $5,%edx - andl %esi,%ecx - movl %esi,28(%esp) - xorl %esi,%edx - addl 8(%esp),%ebx - xorl %ecx,%edi - rorl $6,%edx - movl %ebp,%esi - addl %edi,%ebx - rorl $9,%esi - movl %ebp,%ecx - movl 16(%esp),%edi - xorl %ebp,%esi - movl %ebp,12(%esp) - xorl %edi,%ebp - rorl $11,%esi - andl %ebp,%eax - leal 3584528711(%ebx,%edx,1),%edx - xorl %ecx,%esi - xorl %edi,%eax - movl 92(%esp),%ecx - rorl $2,%esi - addl %edx,%eax - addl 24(%esp),%edx - addl %esi,%eax - movl 80(%esp),%esi - movl %ecx,%ebx - rorl $11,%ecx - movl %esi,%edi - rorl $2,%esi - xorl %ebx,%ecx - shrl $3,%ebx - rorl $7,%ecx - xorl %edi,%esi - xorl %ecx,%ebx - rorl $17,%esi - addl 88(%esp),%ebx - shrl $10,%edi - addl 60(%esp),%ebx - movl %edx,%ecx - xorl %esi,%edi - movl 28(%esp),%esi - rorl $14,%edx - addl %edi,%ebx - movl (%esp),%edi - xorl %ecx,%edx - movl %ebx,88(%esp) - xorl %edi,%esi - rorl $5,%edx - andl %ecx,%esi - movl %ecx,24(%esp) - xorl %ecx,%edx - addl 4(%esp),%ebx - xorl %esi,%edi - rorl $6,%edx - movl %eax,%ecx - addl %edi,%ebx - rorl $9,%ecx - movl %eax,%esi - movl 12(%esp),%edi - xorl %eax,%ecx - movl %eax,8(%esp) - xorl %edi,%eax - rorl $11,%ecx - andl %eax,%ebp - leal 113926993(%ebx,%edx,1),%edx - xorl %esi,%ecx - xorl %edi,%ebp - movl 32(%esp),%esi - rorl $2,%ecx - addl %edx,%ebp - addl 20(%esp),%edx - addl %ecx,%ebp - movl 84(%esp),%ecx - movl %esi,%ebx - rorl $11,%esi - movl %ecx,%edi - rorl $2,%ecx - xorl %ebx,%esi - shrl $3,%ebx - rorl $7,%esi - xorl %edi,%ecx - xorl %esi,%ebx - rorl $17,%ecx - addl 92(%esp),%ebx - shrl $10,%edi - addl 64(%esp),%ebx - movl %edx,%esi - xorl %ecx,%edi - movl 24(%esp),%ecx - rorl $14,%edx - addl %edi,%ebx - movl 28(%esp),%edi - xorl %esi,%edx - movl %ebx,92(%esp) - xorl %edi,%ecx - rorl $5,%edx - andl %esi,%ecx - movl %esi,20(%esp) - xorl %esi,%edx - addl (%esp),%ebx - xorl %ecx,%edi - rorl $6,%edx - movl %ebp,%esi - addl %edi,%ebx - rorl $9,%esi - movl %ebp,%ecx - movl 8(%esp),%edi - xorl %ebp,%esi - movl %ebp,4(%esp) - xorl %edi,%ebp - rorl $11,%esi - andl %ebp,%eax - leal 338241895(%ebx,%edx,1),%edx - xorl %ecx,%esi - xorl %edi,%eax - movl 36(%esp),%ecx - rorl $2,%esi - addl %edx,%eax - addl 16(%esp),%edx - addl %esi,%eax - movl 88(%esp),%esi - movl %ecx,%ebx - rorl $11,%ecx - movl %esi,%edi - rorl $2,%esi - xorl %ebx,%ecx - shrl $3,%ebx - rorl $7,%ecx - xorl %edi,%esi - xorl %ecx,%ebx - rorl $17,%esi - addl 32(%esp),%ebx - shrl $10,%edi - addl 68(%esp),%ebx - movl %edx,%ecx - xorl %esi,%edi - movl 20(%esp),%esi - rorl $14,%edx - addl %edi,%ebx - movl 24(%esp),%edi - xorl %ecx,%edx - movl %ebx,32(%esp) - xorl %edi,%esi - rorl $5,%edx - andl %ecx,%esi - movl %ecx,16(%esp) - xorl %ecx,%edx - addl 28(%esp),%ebx - xorl %esi,%edi - rorl $6,%edx - movl %eax,%ecx - addl %edi,%ebx - rorl $9,%ecx - movl %eax,%esi - movl 4(%esp),%edi - xorl %eax,%ecx - movl %eax,(%esp) - xorl %edi,%eax - rorl $11,%ecx - andl %eax,%ebp - leal 666307205(%ebx,%edx,1),%edx - xorl %esi,%ecx - xorl %edi,%ebp - movl 40(%esp),%esi - rorl $2,%ecx - addl %edx,%ebp - addl 12(%esp),%edx - addl %ecx,%ebp - movl 92(%esp),%ecx - movl %esi,%ebx - rorl $11,%esi - movl %ecx,%edi - rorl $2,%ecx - xorl %ebx,%esi - shrl $3,%ebx - rorl $7,%esi - xorl %edi,%ecx - xorl %esi,%ebx - rorl $17,%ecx - addl 36(%esp),%ebx - shrl $10,%edi - addl 72(%esp),%ebx - movl %edx,%esi - xorl %ecx,%edi - movl 16(%esp),%ecx - rorl $14,%edx - addl %edi,%ebx - movl 20(%esp),%edi - xorl %esi,%edx - movl %ebx,36(%esp) - xorl %edi,%ecx - rorl $5,%edx - andl %esi,%ecx - movl %esi,12(%esp) - xorl %esi,%edx - addl 24(%esp),%ebx - xorl %ecx,%edi - rorl $6,%edx - movl %ebp,%esi - addl %edi,%ebx - rorl $9,%esi - movl %ebp,%ecx - movl (%esp),%edi - xorl %ebp,%esi - movl %ebp,28(%esp) - xorl %edi,%ebp - rorl $11,%esi - andl %ebp,%eax - leal 773529912(%ebx,%edx,1),%edx - xorl %ecx,%esi - xorl %edi,%eax - movl 44(%esp),%ecx - rorl $2,%esi - addl %edx,%eax - addl 8(%esp),%edx - addl %esi,%eax - movl 32(%esp),%esi - movl %ecx,%ebx - rorl $11,%ecx - movl %esi,%edi - rorl $2,%esi - xorl %ebx,%ecx - shrl $3,%ebx - rorl $7,%ecx - xorl %edi,%esi - xorl %ecx,%ebx - rorl $17,%esi - addl 40(%esp),%ebx - shrl $10,%edi - addl 76(%esp),%ebx - movl %edx,%ecx - xorl %esi,%edi - movl 12(%esp),%esi - rorl $14,%edx - addl %edi,%ebx - movl 16(%esp),%edi - xorl %ecx,%edx - movl %ebx,40(%esp) - xorl %edi,%esi - rorl $5,%edx - andl %ecx,%esi - movl %ecx,8(%esp) - xorl %ecx,%edx - addl 20(%esp),%ebx - xorl %esi,%edi - rorl $6,%edx - movl %eax,%ecx - addl %edi,%ebx - rorl $9,%ecx - movl %eax,%esi - movl 28(%esp),%edi - xorl %eax,%ecx - movl %eax,24(%esp) - xorl %edi,%eax - rorl $11,%ecx - andl %eax,%ebp - leal 1294757372(%ebx,%edx,1),%edx - xorl %esi,%ecx - xorl %edi,%ebp - movl 48(%esp),%esi - rorl $2,%ecx - addl %edx,%ebp - addl 4(%esp),%edx - addl %ecx,%ebp - movl 36(%esp),%ecx - movl %esi,%ebx - rorl $11,%esi - movl %ecx,%edi - rorl $2,%ecx - xorl %ebx,%esi - shrl $3,%ebx - rorl $7,%esi - xorl %edi,%ecx - xorl %esi,%ebx - rorl $17,%ecx - addl 44(%esp),%ebx - shrl $10,%edi - addl 80(%esp),%ebx - movl %edx,%esi - xorl %ecx,%edi - movl 8(%esp),%ecx - rorl $14,%edx - addl %edi,%ebx - movl 12(%esp),%edi - xorl %esi,%edx - movl %ebx,44(%esp) - xorl %edi,%ecx - rorl $5,%edx - andl %esi,%ecx - movl %esi,4(%esp) - xorl %esi,%edx - addl 16(%esp),%ebx - xorl %ecx,%edi - rorl $6,%edx - movl %ebp,%esi - addl %edi,%ebx - rorl $9,%esi - movl %ebp,%ecx - movl 24(%esp),%edi - xorl %ebp,%esi - movl %ebp,20(%esp) - xorl %edi,%ebp - rorl $11,%esi - andl %ebp,%eax - leal 1396182291(%ebx,%edx,1),%edx - xorl %ecx,%esi - xorl %edi,%eax - movl 52(%esp),%ecx - rorl $2,%esi - addl %edx,%eax - addl (%esp),%edx - addl %esi,%eax - movl 40(%esp),%esi - movl %ecx,%ebx - rorl $11,%ecx - movl %esi,%edi - rorl $2,%esi - xorl %ebx,%ecx - shrl $3,%ebx - rorl $7,%ecx - xorl %edi,%esi - xorl %ecx,%ebx - rorl $17,%esi - addl 48(%esp),%ebx - shrl $10,%edi - addl 84(%esp),%ebx - movl %edx,%ecx - xorl %esi,%edi - movl 4(%esp),%esi - rorl $14,%edx - addl %edi,%ebx - movl 8(%esp),%edi - xorl %ecx,%edx - movl %ebx,48(%esp) - xorl %edi,%esi - rorl $5,%edx - andl %ecx,%esi - movl %ecx,(%esp) - xorl %ecx,%edx - addl 12(%esp),%ebx - xorl %esi,%edi - rorl $6,%edx - movl %eax,%ecx - addl %edi,%ebx - rorl $9,%ecx - movl %eax,%esi - movl 20(%esp),%edi - xorl %eax,%ecx - movl %eax,16(%esp) - xorl %edi,%eax - rorl $11,%ecx - andl %eax,%ebp - leal 1695183700(%ebx,%edx,1),%edx - xorl %esi,%ecx - xorl %edi,%ebp - movl 56(%esp),%esi - rorl $2,%ecx - addl %edx,%ebp - addl 28(%esp),%edx - addl %ecx,%ebp - movl 44(%esp),%ecx - movl %esi,%ebx - rorl $11,%esi - movl %ecx,%edi - rorl $2,%ecx - xorl %ebx,%esi - shrl $3,%ebx - rorl $7,%esi - xorl %edi,%ecx - xorl %esi,%ebx - rorl $17,%ecx - addl 52(%esp),%ebx - shrl $10,%edi - addl 88(%esp),%ebx - movl %edx,%esi - xorl %ecx,%edi - movl (%esp),%ecx - rorl $14,%edx - addl %edi,%ebx - movl 4(%esp),%edi - xorl %esi,%edx - movl %ebx,52(%esp) - xorl %edi,%ecx - rorl $5,%edx - andl %esi,%ecx - movl %esi,28(%esp) - xorl %esi,%edx - addl 8(%esp),%ebx - xorl %ecx,%edi - rorl $6,%edx - movl %ebp,%esi - addl %edi,%ebx - rorl $9,%esi - movl %ebp,%ecx - movl 16(%esp),%edi - xorl %ebp,%esi - movl %ebp,12(%esp) - xorl %edi,%ebp - rorl $11,%esi - andl %ebp,%eax - leal 1986661051(%ebx,%edx,1),%edx - xorl %ecx,%esi - xorl %edi,%eax - movl 60(%esp),%ecx - rorl $2,%esi - addl %edx,%eax - addl 24(%esp),%edx - addl %esi,%eax - movl 48(%esp),%esi - movl %ecx,%ebx - rorl $11,%ecx - movl %esi,%edi - rorl $2,%esi - xorl %ebx,%ecx - shrl $3,%ebx - rorl $7,%ecx - xorl %edi,%esi - xorl %ecx,%ebx - rorl $17,%esi - addl 56(%esp),%ebx - shrl $10,%edi - addl 92(%esp),%ebx - movl %edx,%ecx - xorl %esi,%edi - movl 28(%esp),%esi - rorl $14,%edx - addl %edi,%ebx - movl (%esp),%edi - xorl %ecx,%edx - movl %ebx,56(%esp) - xorl %edi,%esi - rorl $5,%edx - andl %ecx,%esi - movl %ecx,24(%esp) - xorl %ecx,%edx - addl 4(%esp),%ebx - xorl %esi,%edi - rorl $6,%edx - movl %eax,%ecx - addl %edi,%ebx - rorl $9,%ecx - movl %eax,%esi - movl 12(%esp),%edi - xorl %eax,%ecx - movl %eax,8(%esp) - xorl %edi,%eax - rorl $11,%ecx - andl %eax,%ebp - leal 2177026350(%ebx,%edx,1),%edx - xorl %esi,%ecx - xorl %edi,%ebp - movl 64(%esp),%esi - rorl $2,%ecx - addl %edx,%ebp - addl 20(%esp),%edx - addl %ecx,%ebp - movl 52(%esp),%ecx - movl %esi,%ebx - rorl $11,%esi - movl %ecx,%edi - rorl $2,%ecx - xorl %ebx,%esi - shrl $3,%ebx - rorl $7,%esi - xorl %edi,%ecx - xorl %esi,%ebx - rorl $17,%ecx - addl 60(%esp),%ebx - shrl $10,%edi - addl 32(%esp),%ebx - movl %edx,%esi - xorl %ecx,%edi - movl 24(%esp),%ecx - rorl $14,%edx - addl %edi,%ebx - movl 28(%esp),%edi - xorl %esi,%edx - movl %ebx,60(%esp) - xorl %edi,%ecx - rorl $5,%edx - andl %esi,%ecx - movl %esi,20(%esp) - xorl %esi,%edx - addl (%esp),%ebx - xorl %ecx,%edi - rorl $6,%edx - movl %ebp,%esi - addl %edi,%ebx - rorl $9,%esi - movl %ebp,%ecx - movl 8(%esp),%edi - xorl %ebp,%esi - movl %ebp,4(%esp) - xorl %edi,%ebp - rorl $11,%esi - andl %ebp,%eax - leal 2456956037(%ebx,%edx,1),%edx - xorl %ecx,%esi - xorl %edi,%eax - movl 68(%esp),%ecx - rorl $2,%esi - addl %edx,%eax - addl 16(%esp),%edx - addl %esi,%eax - movl 56(%esp),%esi - movl %ecx,%ebx - rorl $11,%ecx - movl %esi,%edi - rorl $2,%esi - xorl %ebx,%ecx - shrl $3,%ebx - rorl $7,%ecx - xorl %edi,%esi - xorl %ecx,%ebx - rorl $17,%esi - addl 64(%esp),%ebx - shrl $10,%edi - addl 36(%esp),%ebx - movl %edx,%ecx - xorl %esi,%edi - movl 20(%esp),%esi - rorl $14,%edx - addl %edi,%ebx - movl 24(%esp),%edi - xorl %ecx,%edx - movl %ebx,64(%esp) - xorl %edi,%esi - rorl $5,%edx - andl %ecx,%esi - movl %ecx,16(%esp) - xorl %ecx,%edx - addl 28(%esp),%ebx - xorl %esi,%edi - rorl $6,%edx - movl %eax,%ecx - addl %edi,%ebx - rorl $9,%ecx - movl %eax,%esi - movl 4(%esp),%edi - xorl %eax,%ecx - movl %eax,(%esp) - xorl %edi,%eax - rorl $11,%ecx - andl %eax,%ebp - leal 2730485921(%ebx,%edx,1),%edx - xorl %esi,%ecx - xorl %edi,%ebp - movl 72(%esp),%esi - rorl $2,%ecx - addl %edx,%ebp - addl 12(%esp),%edx - addl %ecx,%ebp - movl 60(%esp),%ecx - movl %esi,%ebx - rorl $11,%esi - movl %ecx,%edi - rorl $2,%ecx - xorl %ebx,%esi - shrl $3,%ebx - rorl $7,%esi - xorl %edi,%ecx - xorl %esi,%ebx - rorl $17,%ecx - addl 68(%esp),%ebx - shrl $10,%edi - addl 40(%esp),%ebx - movl %edx,%esi - xorl %ecx,%edi - movl 16(%esp),%ecx - rorl $14,%edx - addl %edi,%ebx - movl 20(%esp),%edi - xorl %esi,%edx - movl %ebx,68(%esp) - xorl %edi,%ecx - rorl $5,%edx - andl %esi,%ecx - movl %esi,12(%esp) - xorl %esi,%edx - addl 24(%esp),%ebx - xorl %ecx,%edi - rorl $6,%edx - movl %ebp,%esi - addl %edi,%ebx - rorl $9,%esi - movl %ebp,%ecx - movl (%esp),%edi - xorl %ebp,%esi - movl %ebp,28(%esp) - xorl %edi,%ebp - rorl $11,%esi - andl %ebp,%eax - leal 2820302411(%ebx,%edx,1),%edx - xorl %ecx,%esi - xorl %edi,%eax - movl 76(%esp),%ecx - rorl $2,%esi - addl %edx,%eax - addl 8(%esp),%edx - addl %esi,%eax - movl 64(%esp),%esi - movl %ecx,%ebx - rorl $11,%ecx - movl %esi,%edi - rorl $2,%esi - xorl %ebx,%ecx - shrl $3,%ebx - rorl $7,%ecx - xorl %edi,%esi - xorl %ecx,%ebx - rorl $17,%esi - addl 72(%esp),%ebx - shrl $10,%edi - addl 44(%esp),%ebx - movl %edx,%ecx - xorl %esi,%edi - movl 12(%esp),%esi - rorl $14,%edx - addl %edi,%ebx - movl 16(%esp),%edi - xorl %ecx,%edx - movl %ebx,72(%esp) - xorl %edi,%esi - rorl $5,%edx - andl %ecx,%esi - movl %ecx,8(%esp) - xorl %ecx,%edx - addl 20(%esp),%ebx - xorl %esi,%edi - rorl $6,%edx - movl %eax,%ecx - addl %edi,%ebx - rorl $9,%ecx - movl %eax,%esi - movl 28(%esp),%edi - xorl %eax,%ecx - movl %eax,24(%esp) - xorl %edi,%eax - rorl $11,%ecx - andl %eax,%ebp - leal 3259730800(%ebx,%edx,1),%edx - xorl %esi,%ecx - xorl %edi,%ebp - movl 80(%esp),%esi - rorl $2,%ecx - addl %edx,%ebp - addl 4(%esp),%edx - addl %ecx,%ebp - movl 68(%esp),%ecx - movl %esi,%ebx - rorl $11,%esi - movl %ecx,%edi - rorl $2,%ecx - xorl %ebx,%esi - shrl $3,%ebx - rorl $7,%esi - xorl %edi,%ecx - xorl %esi,%ebx - rorl $17,%ecx - addl 76(%esp),%ebx - shrl $10,%edi - addl 48(%esp),%ebx - movl %edx,%esi - xorl %ecx,%edi - movl 8(%esp),%ecx - rorl $14,%edx - addl %edi,%ebx - movl 12(%esp),%edi - xorl %esi,%edx - movl %ebx,76(%esp) - xorl %edi,%ecx - rorl $5,%edx - andl %esi,%ecx - movl %esi,4(%esp) - xorl %esi,%edx - addl 16(%esp),%ebx - xorl %ecx,%edi - rorl $6,%edx - movl %ebp,%esi - addl %edi,%ebx - rorl $9,%esi - movl %ebp,%ecx - movl 24(%esp),%edi - xorl %ebp,%esi - movl %ebp,20(%esp) - xorl %edi,%ebp - rorl $11,%esi - andl %ebp,%eax - leal 3345764771(%ebx,%edx,1),%edx - xorl %ecx,%esi - xorl %edi,%eax - movl 84(%esp),%ecx - rorl $2,%esi - addl %edx,%eax - addl (%esp),%edx - addl %esi,%eax - movl 72(%esp),%esi - movl %ecx,%ebx - rorl $11,%ecx - movl %esi,%edi - rorl $2,%esi - xorl %ebx,%ecx - shrl $3,%ebx - rorl $7,%ecx - xorl %edi,%esi - xorl %ecx,%ebx - rorl $17,%esi - addl 80(%esp),%ebx - shrl $10,%edi - addl 52(%esp),%ebx - movl %edx,%ecx - xorl %esi,%edi - movl 4(%esp),%esi - rorl $14,%edx - addl %edi,%ebx - movl 8(%esp),%edi - xorl %ecx,%edx - movl %ebx,80(%esp) - xorl %edi,%esi - rorl $5,%edx - andl %ecx,%esi - movl %ecx,(%esp) - xorl %ecx,%edx - addl 12(%esp),%ebx - xorl %esi,%edi - rorl $6,%edx - movl %eax,%ecx - addl %edi,%ebx - rorl $9,%ecx - movl %eax,%esi - movl 20(%esp),%edi - xorl %eax,%ecx - movl %eax,16(%esp) - xorl %edi,%eax - rorl $11,%ecx - andl %eax,%ebp - leal 3516065817(%ebx,%edx,1),%edx - xorl %esi,%ecx - xorl %edi,%ebp - movl 88(%esp),%esi - rorl $2,%ecx - addl %edx,%ebp - addl 28(%esp),%edx - addl %ecx,%ebp - movl 76(%esp),%ecx - movl %esi,%ebx - rorl $11,%esi - movl %ecx,%edi - rorl $2,%ecx - xorl %ebx,%esi - shrl $3,%ebx - rorl $7,%esi - xorl %edi,%ecx - xorl %esi,%ebx - rorl $17,%ecx - addl 84(%esp),%ebx - shrl $10,%edi - addl 56(%esp),%ebx - movl %edx,%esi - xorl %ecx,%edi - movl (%esp),%ecx - rorl $14,%edx - addl %edi,%ebx - movl 4(%esp),%edi - xorl %esi,%edx - movl %ebx,84(%esp) - xorl %edi,%ecx - rorl $5,%edx - andl %esi,%ecx - movl %esi,28(%esp) - xorl %esi,%edx - addl 8(%esp),%ebx - xorl %ecx,%edi - rorl $6,%edx - movl %ebp,%esi - addl %edi,%ebx - rorl $9,%esi - movl %ebp,%ecx - movl 16(%esp),%edi - xorl %ebp,%esi - movl %ebp,12(%esp) - xorl %edi,%ebp - rorl $11,%esi - andl %ebp,%eax - leal 3600352804(%ebx,%edx,1),%edx - xorl %ecx,%esi - xorl %edi,%eax - movl 92(%esp),%ecx - rorl $2,%esi - addl %edx,%eax - addl 24(%esp),%edx - addl %esi,%eax - movl 80(%esp),%esi - movl %ecx,%ebx - rorl $11,%ecx - movl %esi,%edi - rorl $2,%esi - xorl %ebx,%ecx - shrl $3,%ebx - rorl $7,%ecx - xorl %edi,%esi - xorl %ecx,%ebx - rorl $17,%esi - addl 88(%esp),%ebx - shrl $10,%edi - addl 60(%esp),%ebx - movl %edx,%ecx - xorl %esi,%edi - movl 28(%esp),%esi - rorl $14,%edx - addl %edi,%ebx - movl (%esp),%edi - xorl %ecx,%edx - movl %ebx,88(%esp) - xorl %edi,%esi - rorl $5,%edx - andl %ecx,%esi - movl %ecx,24(%esp) - xorl %ecx,%edx - addl 4(%esp),%ebx - xorl %esi,%edi - rorl $6,%edx - movl %eax,%ecx - addl %edi,%ebx - rorl $9,%ecx - movl %eax,%esi - movl 12(%esp),%edi - xorl %eax,%ecx - movl %eax,8(%esp) - xorl %edi,%eax - rorl $11,%ecx - andl %eax,%ebp - leal 4094571909(%ebx,%edx,1),%edx - xorl %esi,%ecx - xorl %edi,%ebp - movl 32(%esp),%esi - rorl $2,%ecx - addl %edx,%ebp - addl 20(%esp),%edx - addl %ecx,%ebp - movl 84(%esp),%ecx - movl %esi,%ebx - rorl $11,%esi - movl %ecx,%edi - rorl $2,%ecx - xorl %ebx,%esi - shrl $3,%ebx - rorl $7,%esi - xorl %edi,%ecx - xorl %esi,%ebx - rorl $17,%ecx - addl 92(%esp),%ebx - shrl $10,%edi - addl 64(%esp),%ebx - movl %edx,%esi - xorl %ecx,%edi - movl 24(%esp),%ecx - rorl $14,%edx - addl %edi,%ebx - movl 28(%esp),%edi - xorl %esi,%edx - movl %ebx,92(%esp) - xorl %edi,%ecx - rorl $5,%edx - andl %esi,%ecx - movl %esi,20(%esp) - xorl %esi,%edx - addl (%esp),%ebx - xorl %ecx,%edi - rorl $6,%edx - movl %ebp,%esi - addl %edi,%ebx - rorl $9,%esi - movl %ebp,%ecx - movl 8(%esp),%edi - xorl %ebp,%esi - movl %ebp,4(%esp) - xorl %edi,%ebp - rorl $11,%esi - andl %ebp,%eax - leal 275423344(%ebx,%edx,1),%edx - xorl %ecx,%esi - xorl %edi,%eax - movl 36(%esp),%ecx - rorl $2,%esi - addl %edx,%eax - addl 16(%esp),%edx - addl %esi,%eax - movl 88(%esp),%esi - movl %ecx,%ebx - rorl $11,%ecx - movl %esi,%edi - rorl $2,%esi - xorl %ebx,%ecx - shrl $3,%ebx - rorl $7,%ecx - xorl %edi,%esi - xorl %ecx,%ebx - rorl $17,%esi - addl 32(%esp),%ebx - shrl $10,%edi - addl 68(%esp),%ebx - movl %edx,%ecx - xorl %esi,%edi - movl 20(%esp),%esi - rorl $14,%edx - addl %edi,%ebx - movl 24(%esp),%edi - xorl %ecx,%edx - movl %ebx,32(%esp) - xorl %edi,%esi - rorl $5,%edx - andl %ecx,%esi - movl %ecx,16(%esp) - xorl %ecx,%edx - addl 28(%esp),%ebx - xorl %esi,%edi - rorl $6,%edx - movl %eax,%ecx - addl %edi,%ebx - rorl $9,%ecx - movl %eax,%esi - movl 4(%esp),%edi - xorl %eax,%ecx - movl %eax,(%esp) - xorl %edi,%eax - rorl $11,%ecx - andl %eax,%ebp - leal 430227734(%ebx,%edx,1),%edx - xorl %esi,%ecx - xorl %edi,%ebp - movl 40(%esp),%esi - rorl $2,%ecx - addl %edx,%ebp - addl 12(%esp),%edx - addl %ecx,%ebp - movl 92(%esp),%ecx - movl %esi,%ebx - rorl $11,%esi - movl %ecx,%edi - rorl $2,%ecx - xorl %ebx,%esi - shrl $3,%ebx - rorl $7,%esi - xorl %edi,%ecx - xorl %esi,%ebx - rorl $17,%ecx - addl 36(%esp),%ebx - shrl $10,%edi - addl 72(%esp),%ebx - movl %edx,%esi - xorl %ecx,%edi - movl 16(%esp),%ecx - rorl $14,%edx - addl %edi,%ebx - movl 20(%esp),%edi - xorl %esi,%edx - movl %ebx,36(%esp) - xorl %edi,%ecx - rorl $5,%edx - andl %esi,%ecx - movl %esi,12(%esp) - xorl %esi,%edx - addl 24(%esp),%ebx - xorl %ecx,%edi - rorl $6,%edx - movl %ebp,%esi - addl %edi,%ebx - rorl $9,%esi - movl %ebp,%ecx - movl (%esp),%edi - xorl %ebp,%esi - movl %ebp,28(%esp) - xorl %edi,%ebp - rorl $11,%esi - andl %ebp,%eax - leal 506948616(%ebx,%edx,1),%edx - xorl %ecx,%esi - xorl %edi,%eax - movl 44(%esp),%ecx - rorl $2,%esi - addl %edx,%eax - addl 8(%esp),%edx - addl %esi,%eax - movl 32(%esp),%esi - movl %ecx,%ebx - rorl $11,%ecx - movl %esi,%edi - rorl $2,%esi - xorl %ebx,%ecx - shrl $3,%ebx - rorl $7,%ecx - xorl %edi,%esi - xorl %ecx,%ebx - rorl $17,%esi - addl 40(%esp),%ebx - shrl $10,%edi - addl 76(%esp),%ebx - movl %edx,%ecx - xorl %esi,%edi - movl 12(%esp),%esi - rorl $14,%edx - addl %edi,%ebx - movl 16(%esp),%edi - xorl %ecx,%edx - movl %ebx,40(%esp) - xorl %edi,%esi - rorl $5,%edx - andl %ecx,%esi - movl %ecx,8(%esp) - xorl %ecx,%edx - addl 20(%esp),%ebx - xorl %esi,%edi - rorl $6,%edx - movl %eax,%ecx - addl %edi,%ebx - rorl $9,%ecx - movl %eax,%esi - movl 28(%esp),%edi - xorl %eax,%ecx - movl %eax,24(%esp) - xorl %edi,%eax - rorl $11,%ecx - andl %eax,%ebp - leal 659060556(%ebx,%edx,1),%edx - xorl %esi,%ecx - xorl %edi,%ebp - movl 48(%esp),%esi - rorl $2,%ecx - addl %edx,%ebp - addl 4(%esp),%edx - addl %ecx,%ebp - movl 36(%esp),%ecx - movl %esi,%ebx - rorl $11,%esi - movl %ecx,%edi - rorl $2,%ecx - xorl %ebx,%esi - shrl $3,%ebx - rorl $7,%esi - xorl %edi,%ecx - xorl %esi,%ebx - rorl $17,%ecx - addl 44(%esp),%ebx - shrl $10,%edi - addl 80(%esp),%ebx - movl %edx,%esi - xorl %ecx,%edi - movl 8(%esp),%ecx - rorl $14,%edx - addl %edi,%ebx - movl 12(%esp),%edi - xorl %esi,%edx - movl %ebx,44(%esp) - xorl %edi,%ecx - rorl $5,%edx - andl %esi,%ecx - movl %esi,4(%esp) - xorl %esi,%edx - addl 16(%esp),%ebx - xorl %ecx,%edi - rorl $6,%edx - movl %ebp,%esi - addl %edi,%ebx - rorl $9,%esi - movl %ebp,%ecx - movl 24(%esp),%edi - xorl %ebp,%esi - movl %ebp,20(%esp) - xorl %edi,%ebp - rorl $11,%esi - andl %ebp,%eax - leal 883997877(%ebx,%edx,1),%edx - xorl %ecx,%esi - xorl %edi,%eax - movl 52(%esp),%ecx - rorl $2,%esi - addl %edx,%eax - addl (%esp),%edx - addl %esi,%eax - movl 40(%esp),%esi - movl %ecx,%ebx - rorl $11,%ecx - movl %esi,%edi - rorl $2,%esi - xorl %ebx,%ecx - shrl $3,%ebx - rorl $7,%ecx - xorl %edi,%esi - xorl %ecx,%ebx - rorl $17,%esi - addl 48(%esp),%ebx - shrl $10,%edi - addl 84(%esp),%ebx - movl %edx,%ecx - xorl %esi,%edi - movl 4(%esp),%esi - rorl $14,%edx - addl %edi,%ebx - movl 8(%esp),%edi - xorl %ecx,%edx - movl %ebx,48(%esp) - xorl %edi,%esi - rorl $5,%edx - andl %ecx,%esi - movl %ecx,(%esp) - xorl %ecx,%edx - addl 12(%esp),%ebx - xorl %esi,%edi - rorl $6,%edx - movl %eax,%ecx - addl %edi,%ebx - rorl $9,%ecx - movl %eax,%esi - movl 20(%esp),%edi - xorl %eax,%ecx - movl %eax,16(%esp) - xorl %edi,%eax - rorl $11,%ecx - andl %eax,%ebp - leal 958139571(%ebx,%edx,1),%edx - xorl %esi,%ecx - xorl %edi,%ebp - movl 56(%esp),%esi - rorl $2,%ecx - addl %edx,%ebp - addl 28(%esp),%edx - addl %ecx,%ebp - movl 44(%esp),%ecx - movl %esi,%ebx - rorl $11,%esi - movl %ecx,%edi - rorl $2,%ecx - xorl %ebx,%esi - shrl $3,%ebx - rorl $7,%esi - xorl %edi,%ecx - xorl %esi,%ebx - rorl $17,%ecx - addl 52(%esp),%ebx - shrl $10,%edi - addl 88(%esp),%ebx - movl %edx,%esi - xorl %ecx,%edi - movl (%esp),%ecx - rorl $14,%edx - addl %edi,%ebx - movl 4(%esp),%edi - xorl %esi,%edx - movl %ebx,52(%esp) - xorl %edi,%ecx - rorl $5,%edx - andl %esi,%ecx - movl %esi,28(%esp) - xorl %esi,%edx - addl 8(%esp),%ebx - xorl %ecx,%edi - rorl $6,%edx - movl %ebp,%esi - addl %edi,%ebx - rorl $9,%esi - movl %ebp,%ecx - movl 16(%esp),%edi - xorl %ebp,%esi - movl %ebp,12(%esp) - xorl %edi,%ebp - rorl $11,%esi - andl %ebp,%eax - leal 1322822218(%ebx,%edx,1),%edx - xorl %ecx,%esi - xorl %edi,%eax - movl 60(%esp),%ecx - rorl $2,%esi - addl %edx,%eax - addl 24(%esp),%edx - addl %esi,%eax - movl 48(%esp),%esi - movl %ecx,%ebx - rorl $11,%ecx - movl %esi,%edi - rorl $2,%esi - xorl %ebx,%ecx - shrl $3,%ebx - rorl $7,%ecx - xorl %edi,%esi - xorl %ecx,%ebx - rorl $17,%esi - addl 56(%esp),%ebx - shrl $10,%edi - addl 92(%esp),%ebx - movl %edx,%ecx - xorl %esi,%edi - movl 28(%esp),%esi - rorl $14,%edx - addl %edi,%ebx - movl (%esp),%edi - xorl %ecx,%edx - movl %ebx,56(%esp) - xorl %edi,%esi - rorl $5,%edx - andl %ecx,%esi - movl %ecx,24(%esp) - xorl %ecx,%edx - addl 4(%esp),%ebx - xorl %esi,%edi - rorl $6,%edx - movl %eax,%ecx - addl %edi,%ebx - rorl $9,%ecx - movl %eax,%esi - movl 12(%esp),%edi - xorl %eax,%ecx - movl %eax,8(%esp) - xorl %edi,%eax - rorl $11,%ecx - andl %eax,%ebp - leal 1537002063(%ebx,%edx,1),%edx - xorl %esi,%ecx - xorl %edi,%ebp - movl 64(%esp),%esi - rorl $2,%ecx - addl %edx,%ebp - addl 20(%esp),%edx - addl %ecx,%ebp - movl 52(%esp),%ecx - movl %esi,%ebx - rorl $11,%esi - movl %ecx,%edi - rorl $2,%ecx - xorl %ebx,%esi - shrl $3,%ebx - rorl $7,%esi - xorl %edi,%ecx - xorl %esi,%ebx - rorl $17,%ecx - addl 60(%esp),%ebx - shrl $10,%edi - addl 32(%esp),%ebx - movl %edx,%esi - xorl %ecx,%edi - movl 24(%esp),%ecx - rorl $14,%edx - addl %edi,%ebx - movl 28(%esp),%edi - xorl %esi,%edx - movl %ebx,60(%esp) - xorl %edi,%ecx - rorl $5,%edx - andl %esi,%ecx - movl %esi,20(%esp) - xorl %esi,%edx - addl (%esp),%ebx - xorl %ecx,%edi - rorl $6,%edx - movl %ebp,%esi - addl %edi,%ebx - rorl $9,%esi - movl %ebp,%ecx - movl 8(%esp),%edi - xorl %ebp,%esi - movl %ebp,4(%esp) - xorl %edi,%ebp - rorl $11,%esi - andl %ebp,%eax - leal 1747873779(%ebx,%edx,1),%edx - xorl %ecx,%esi - xorl %edi,%eax - movl 68(%esp),%ecx - rorl $2,%esi - addl %edx,%eax - addl 16(%esp),%edx - addl %esi,%eax - movl 56(%esp),%esi - movl %ecx,%ebx - rorl $11,%ecx - movl %esi,%edi - rorl $2,%esi - xorl %ebx,%ecx - shrl $3,%ebx - rorl $7,%ecx - xorl %edi,%esi - xorl %ecx,%ebx - rorl $17,%esi - addl 64(%esp),%ebx - shrl $10,%edi - addl 36(%esp),%ebx - movl %edx,%ecx - xorl %esi,%edi - movl 20(%esp),%esi - rorl $14,%edx - addl %edi,%ebx - movl 24(%esp),%edi - xorl %ecx,%edx - movl %ebx,64(%esp) - xorl %edi,%esi - rorl $5,%edx - andl %ecx,%esi - movl %ecx,16(%esp) - xorl %ecx,%edx - addl 28(%esp),%ebx - xorl %esi,%edi - rorl $6,%edx - movl %eax,%ecx - addl %edi,%ebx - rorl $9,%ecx - movl %eax,%esi - movl 4(%esp),%edi - xorl %eax,%ecx - movl %eax,(%esp) - xorl %edi,%eax - rorl $11,%ecx - andl %eax,%ebp - leal 1955562222(%ebx,%edx,1),%edx - xorl %esi,%ecx - xorl %edi,%ebp - movl 72(%esp),%esi - rorl $2,%ecx - addl %edx,%ebp - addl 12(%esp),%edx - addl %ecx,%ebp - movl 60(%esp),%ecx - movl %esi,%ebx - rorl $11,%esi - movl %ecx,%edi - rorl $2,%ecx - xorl %ebx,%esi - shrl $3,%ebx - rorl $7,%esi - xorl %edi,%ecx - xorl %esi,%ebx - rorl $17,%ecx - addl 68(%esp),%ebx - shrl $10,%edi - addl 40(%esp),%ebx - movl %edx,%esi - xorl %ecx,%edi - movl 16(%esp),%ecx - rorl $14,%edx - addl %edi,%ebx - movl 20(%esp),%edi - xorl %esi,%edx - movl %ebx,68(%esp) - xorl %edi,%ecx - rorl $5,%edx - andl %esi,%ecx - movl %esi,12(%esp) - xorl %esi,%edx - addl 24(%esp),%ebx - xorl %ecx,%edi - rorl $6,%edx - movl %ebp,%esi - addl %edi,%ebx - rorl $9,%esi - movl %ebp,%ecx - movl (%esp),%edi - xorl %ebp,%esi - movl %ebp,28(%esp) - xorl %edi,%ebp - rorl $11,%esi - andl %ebp,%eax - leal 2024104815(%ebx,%edx,1),%edx - xorl %ecx,%esi - xorl %edi,%eax - movl 76(%esp),%ecx - rorl $2,%esi - addl %edx,%eax - addl 8(%esp),%edx - addl %esi,%eax - movl 64(%esp),%esi - movl %ecx,%ebx - rorl $11,%ecx - movl %esi,%edi - rorl $2,%esi - xorl %ebx,%ecx - shrl $3,%ebx - rorl $7,%ecx - xorl %edi,%esi - xorl %ecx,%ebx - rorl $17,%esi - addl 72(%esp),%ebx - shrl $10,%edi - addl 44(%esp),%ebx - movl %edx,%ecx - xorl %esi,%edi - movl 12(%esp),%esi - rorl $14,%edx - addl %edi,%ebx - movl 16(%esp),%edi - xorl %ecx,%edx - movl %ebx,72(%esp) - xorl %edi,%esi - rorl $5,%edx - andl %ecx,%esi - movl %ecx,8(%esp) - xorl %ecx,%edx - addl 20(%esp),%ebx - xorl %esi,%edi - rorl $6,%edx - movl %eax,%ecx - addl %edi,%ebx - rorl $9,%ecx - movl %eax,%esi - movl 28(%esp),%edi - xorl %eax,%ecx - movl %eax,24(%esp) - xorl %edi,%eax - rorl $11,%ecx - andl %eax,%ebp - leal 2227730452(%ebx,%edx,1),%edx - xorl %esi,%ecx - xorl %edi,%ebp - movl 80(%esp),%esi - rorl $2,%ecx - addl %edx,%ebp - addl 4(%esp),%edx - addl %ecx,%ebp - movl 68(%esp),%ecx - movl %esi,%ebx - rorl $11,%esi - movl %ecx,%edi - rorl $2,%ecx - xorl %ebx,%esi - shrl $3,%ebx - rorl $7,%esi - xorl %edi,%ecx - xorl %esi,%ebx - rorl $17,%ecx - addl 76(%esp),%ebx - shrl $10,%edi - addl 48(%esp),%ebx - movl %edx,%esi - xorl %ecx,%edi - movl 8(%esp),%ecx - rorl $14,%edx - addl %edi,%ebx - movl 12(%esp),%edi - xorl %esi,%edx - movl %ebx,76(%esp) - xorl %edi,%ecx - rorl $5,%edx - andl %esi,%ecx - movl %esi,4(%esp) - xorl %esi,%edx - addl 16(%esp),%ebx - xorl %ecx,%edi - rorl $6,%edx - movl %ebp,%esi - addl %edi,%ebx - rorl $9,%esi - movl %ebp,%ecx - movl 24(%esp),%edi - xorl %ebp,%esi - movl %ebp,20(%esp) - xorl %edi,%ebp - rorl $11,%esi - andl %ebp,%eax - leal 2361852424(%ebx,%edx,1),%edx - xorl %ecx,%esi - xorl %edi,%eax - movl 84(%esp),%ecx - rorl $2,%esi - addl %edx,%eax - addl (%esp),%edx - addl %esi,%eax - movl 72(%esp),%esi - movl %ecx,%ebx - rorl $11,%ecx - movl %esi,%edi - rorl $2,%esi - xorl %ebx,%ecx - shrl $3,%ebx - rorl $7,%ecx - xorl %edi,%esi - xorl %ecx,%ebx - rorl $17,%esi - addl 80(%esp),%ebx - shrl $10,%edi - addl 52(%esp),%ebx - movl %edx,%ecx - xorl %esi,%edi - movl 4(%esp),%esi - rorl $14,%edx - addl %edi,%ebx - movl 8(%esp),%edi - xorl %ecx,%edx - movl %ebx,80(%esp) - xorl %edi,%esi - rorl $5,%edx - andl %ecx,%esi - movl %ecx,(%esp) - xorl %ecx,%edx - addl 12(%esp),%ebx - xorl %esi,%edi - rorl $6,%edx - movl %eax,%ecx - addl %edi,%ebx - rorl $9,%ecx - movl %eax,%esi - movl 20(%esp),%edi - xorl %eax,%ecx - movl %eax,16(%esp) - xorl %edi,%eax - rorl $11,%ecx - andl %eax,%ebp - leal 2428436474(%ebx,%edx,1),%edx - xorl %esi,%ecx - xorl %edi,%ebp - movl 88(%esp),%esi - rorl $2,%ecx - addl %edx,%ebp - addl 28(%esp),%edx - addl %ecx,%ebp - movl 76(%esp),%ecx - movl %esi,%ebx - rorl $11,%esi - movl %ecx,%edi - rorl $2,%ecx - xorl %ebx,%esi - shrl $3,%ebx - rorl $7,%esi - xorl %edi,%ecx - xorl %esi,%ebx - rorl $17,%ecx - addl 84(%esp),%ebx - shrl $10,%edi - addl 56(%esp),%ebx - movl %edx,%esi - xorl %ecx,%edi - movl (%esp),%ecx - rorl $14,%edx - addl %edi,%ebx - movl 4(%esp),%edi - xorl %esi,%edx - movl %ebx,84(%esp) - xorl %edi,%ecx - rorl $5,%edx - andl %esi,%ecx - movl %esi,28(%esp) - xorl %esi,%edx - addl 8(%esp),%ebx - xorl %ecx,%edi - rorl $6,%edx - movl %ebp,%esi - addl %edi,%ebx - rorl $9,%esi - movl %ebp,%ecx - movl 16(%esp),%edi - xorl %ebp,%esi - movl %ebp,12(%esp) - xorl %edi,%ebp - rorl $11,%esi - andl %ebp,%eax - leal 2756734187(%ebx,%edx,1),%edx - xorl %ecx,%esi - xorl %edi,%eax - movl 92(%esp),%ecx - rorl $2,%esi - addl %edx,%eax - addl 24(%esp),%edx - addl %esi,%eax - movl 80(%esp),%esi - movl %ecx,%ebx - rorl $11,%ecx - movl %esi,%edi - rorl $2,%esi - xorl %ebx,%ecx - shrl $3,%ebx - rorl $7,%ecx - xorl %edi,%esi - xorl %ecx,%ebx - rorl $17,%esi - addl 88(%esp),%ebx - shrl $10,%edi - addl 60(%esp),%ebx - movl %edx,%ecx - xorl %esi,%edi - movl 28(%esp),%esi - rorl $14,%edx - addl %edi,%ebx - movl (%esp),%edi - xorl %ecx,%edx - xorl %edi,%esi - rorl $5,%edx - andl %ecx,%esi - movl %ecx,24(%esp) - xorl %ecx,%edx - addl 4(%esp),%ebx - xorl %esi,%edi - rorl $6,%edx - movl %eax,%ecx - addl %edi,%ebx - rorl $9,%ecx - movl %eax,%esi - movl 12(%esp),%edi - xorl %eax,%ecx - movl %eax,8(%esp) - xorl %edi,%eax - rorl $11,%ecx - andl %eax,%ebp - leal 3204031479(%ebx,%edx,1),%edx - xorl %esi,%ecx - xorl %edi,%ebp - movl 32(%esp),%esi - rorl $2,%ecx - addl %edx,%ebp - addl 20(%esp),%edx - addl %ecx,%ebp - movl 84(%esp),%ecx - movl %esi,%ebx - rorl $11,%esi - movl %ecx,%edi - rorl $2,%ecx - xorl %ebx,%esi - shrl $3,%ebx - rorl $7,%esi - xorl %edi,%ecx - xorl %esi,%ebx - rorl $17,%ecx - addl 92(%esp),%ebx - shrl $10,%edi - addl 64(%esp),%ebx - movl %edx,%esi - xorl %ecx,%edi - movl 24(%esp),%ecx - rorl $14,%edx - addl %edi,%ebx - movl 28(%esp),%edi - xorl %esi,%edx - xorl %edi,%ecx - rorl $5,%edx - andl %esi,%ecx - movl %esi,20(%esp) - xorl %esi,%edx - addl (%esp),%ebx - xorl %ecx,%edi - rorl $6,%edx - movl %ebp,%esi - addl %edi,%ebx - rorl $9,%esi - movl %ebp,%ecx - movl 8(%esp),%edi - xorl %ebp,%esi - movl %ebp,4(%esp) - xorl %edi,%ebp - rorl $11,%esi - andl %ebp,%eax - leal 3329325298(%ebx,%edx,1),%edx - xorl %ecx,%esi - xorl %edi,%eax - rorl $2,%esi - addl %edx,%eax - addl 16(%esp),%edx - addl %esi,%eax - movl 96(%esp),%esi - xorl %edi,%ebp - movl 12(%esp),%ecx - addl (%esi),%eax - addl 4(%esi),%ebp - addl 8(%esi),%edi - addl 12(%esi),%ecx - movl %eax,(%esi) - movl %ebp,4(%esi) - movl %edi,8(%esi) - movl %ecx,12(%esi) - movl %ebp,4(%esp) - xorl %edi,%ebp - movl %edi,8(%esp) - movl %ecx,12(%esp) - movl 20(%esp),%edi - movl 24(%esp),%ebx - movl 28(%esp),%ecx - addl 16(%esi),%edx - addl 20(%esi),%edi - addl 24(%esi),%ebx - addl 28(%esi),%ecx - movl %edx,16(%esi) - movl %edi,20(%esi) - movl %ebx,24(%esi) - movl %ecx,28(%esi) - movl %edi,20(%esp) - movl 100(%esp),%edi - movl %ebx,24(%esp) - movl %ecx,28(%esp) - cmpl 104(%esp),%edi - jb L009grand_loop - movl 108(%esp),%esp - popl %edi - popl %esi - popl %ebx - popl %ebp - ret -.section __IMPORT,__pointers,non_lazy_symbol_pointers -L__gnutls_x86_cpuid_s$non_lazy_ptr: -.indirect_symbol __gnutls_x86_cpuid_s -.long 0 -.comm __gnutls_x86_cpuid_s,16,2 -.section .note.GNU-stack,"",%progbits diff --git a/lib/accelerated/x86/macosx/sha512-ssse3-x86.s b/lib/accelerated/x86/macosx/sha512-ssse3-x86.s index 2294b4ad13..0014a8116b 100644 --- a/lib/accelerated/x86/macosx/sha512-ssse3-x86.s +++ b/lib/accelerated/x86/macosx/sha512-ssse3-x86.s @@ -593,12 +593,9 @@ L001K512: .long 4234509866,1501505948 .long 987167468,1607167915 .long 1246189591,1816402316 -.long 67438087,66051 -.long 202182159,134810123 .byte 83,72,65,53,49,50,32,98,108,111,99,107,32,116,114,97 .byte 110,115,102,111,114,109,32,102,111,114,32,120,56,54,44,32 .byte 67,82,89,80,84,79,71,65,77,83,32,98,121,32,60,97 .byte 112,112,114,111,64,111,112,101,110,115,115,108,46,111,114,103 .byte 62,0 -.section .note.GNU-stack,"",%progbits diff --git a/lib/accelerated/x86/macosx/sha512-ssse3-x86_64.s b/lib/accelerated/x86/macosx/sha512-ssse3-x86_64.s index 99102b5d54..23da5e6e39 100644 --- a/lib/accelerated/x86/macosx/sha512-ssse3-x86_64.s +++ b/lib/accelerated/x86/macosx/sha512-ssse3-x86_64.s @@ -39,17 +39,10 @@ # .text - .globl _sha256_block_data_order .p2align 4 _sha256_block_data_order: - leaq __gnutls_x86_cpuid_s(%rip),%r11 - movl 0(%r11),%r9d - movl 4(%r11),%r10d - movl 8(%r11),%r11d - testl $512,%r10d - jnz L$ssse3_shortcut pushq %rbx pushq %rbp pushq %r12 @@ -67,6 +60,8 @@ _sha256_block_data_order: movq %r11,64+24(%rsp) L$prologue: + leaq K256(%rip),%rbp + movl 0(%rdi),%eax movl 4(%rdi),%ebx movl 8(%rdi),%ecx @@ -79,1632 +74,1694 @@ L$prologue: .p2align 4 L$loop: - movl %ebx,%edi - leaq K256(%rip),%rbp - xorl %ecx,%edi + xorq %rdi,%rdi movl 0(%rsi),%r12d movl %r8d,%r13d movl %eax,%r14d bswapl %r12d rorl $14,%r13d movl %r9d,%r15d + movl %r12d,0(%rsp) - xorl %r8d,%r13d rorl $9,%r14d + xorl %r8d,%r13d xorl %r10d,%r15d - movl %r12d,0(%rsp) - xorl %eax,%r14d - andl %r8d,%r15d - rorl $5,%r13d addl %r11d,%r12d - xorl %r10d,%r15d + xorl %eax,%r14d + + addl (%rbp,%rdi,4),%r12d + andl %r8d,%r15d + movl %ebx,%r11d rorl $11,%r14d xorl %r8d,%r13d - addl %r15d,%r12d + xorl %r10d,%r15d - movl %eax,%r15d - addl (%rbp),%r12d + xorl %ecx,%r11d xorl %eax,%r14d + addl %r15d,%r12d + movl %ebx,%r15d - xorl %ebx,%r15d rorl $6,%r13d - movl %ebx,%r11d + andl %eax,%r11d + andl %ecx,%r15d - andl %r15d,%edi rorl $2,%r14d addl %r13d,%r12d + addl %r15d,%r11d - xorl %edi,%r11d addl %r12d,%edx addl %r12d,%r11d - - leaq 4(%rbp),%rbp + leaq 1(%rdi),%rdi addl %r14d,%r11d + movl 4(%rsi),%r12d movl %edx,%r13d movl %r11d,%r14d bswapl %r12d rorl $14,%r13d - movl %r8d,%edi + movl %r8d,%r15d + movl %r12d,4(%rsp) - xorl %edx,%r13d rorl $9,%r14d - xorl %r9d,%edi - - movl %r12d,4(%rsp) - xorl %r11d,%r14d - andl %edx,%edi + xorl %edx,%r13d + xorl %r9d,%r15d rorl $5,%r13d addl %r10d,%r12d - xorl %r9d,%edi + xorl %r11d,%r14d + + addl (%rbp,%rdi,4),%r12d + andl %edx,%r15d + movl %eax,%r10d rorl $11,%r14d xorl %edx,%r13d - addl %edi,%r12d + xorl %r9d,%r15d - movl %r11d,%edi - addl (%rbp),%r12d + xorl %ebx,%r10d xorl %r11d,%r14d + addl %r15d,%r12d + movl %eax,%r15d - xorl %eax,%edi rorl $6,%r13d - movl %eax,%r10d + andl %r11d,%r10d + andl %ebx,%r15d - andl %edi,%r15d rorl $2,%r14d addl %r13d,%r12d + addl %r15d,%r10d - xorl %r15d,%r10d addl %r12d,%ecx addl %r12d,%r10d - - leaq 4(%rbp),%rbp + leaq 1(%rdi),%rdi addl %r14d,%r10d + movl 8(%rsi),%r12d movl %ecx,%r13d movl %r10d,%r14d bswapl %r12d rorl $14,%r13d movl %edx,%r15d + movl %r12d,8(%rsp) - xorl %ecx,%r13d rorl $9,%r14d + xorl %ecx,%r13d xorl %r8d,%r15d - movl %r12d,8(%rsp) - xorl %r10d,%r14d - andl %ecx,%r15d - rorl $5,%r13d addl %r9d,%r12d - xorl %r8d,%r15d + xorl %r10d,%r14d + + addl (%rbp,%rdi,4),%r12d + andl %ecx,%r15d + movl %r11d,%r9d rorl $11,%r14d xorl %ecx,%r13d - addl %r15d,%r12d + xorl %r8d,%r15d - movl %r10d,%r15d - addl (%rbp),%r12d + xorl %eax,%r9d xorl %r10d,%r14d + addl %r15d,%r12d + movl %r11d,%r15d - xorl %r11d,%r15d rorl $6,%r13d - movl %r11d,%r9d + andl %r10d,%r9d + andl %eax,%r15d - andl %r15d,%edi rorl $2,%r14d addl %r13d,%r12d + addl %r15d,%r9d - xorl %edi,%r9d addl %r12d,%ebx addl %r12d,%r9d - - leaq 4(%rbp),%rbp + leaq 1(%rdi),%rdi addl %r14d,%r9d + movl 12(%rsi),%r12d movl %ebx,%r13d movl %r9d,%r14d bswapl %r12d rorl $14,%r13d - movl %ecx,%edi + movl %ecx,%r15d + movl %r12d,12(%rsp) - xorl %ebx,%r13d rorl $9,%r14d - xorl %edx,%edi - - movl %r12d,12(%rsp) - xorl %r9d,%r14d - andl %ebx,%edi + xorl %ebx,%r13d + xorl %edx,%r15d rorl $5,%r13d addl %r8d,%r12d - xorl %edx,%edi + xorl %r9d,%r14d + + addl (%rbp,%rdi,4),%r12d + andl %ebx,%r15d + movl %r10d,%r8d rorl $11,%r14d xorl %ebx,%r13d - addl %edi,%r12d + xorl %edx,%r15d - movl %r9d,%edi - addl (%rbp),%r12d + xorl %r11d,%r8d xorl %r9d,%r14d + addl %r15d,%r12d + movl %r10d,%r15d - xorl %r10d,%edi rorl $6,%r13d - movl %r10d,%r8d + andl %r9d,%r8d + andl %r11d,%r15d - andl %edi,%r15d rorl $2,%r14d addl %r13d,%r12d + addl %r15d,%r8d - xorl %r15d,%r8d addl %r12d,%eax addl %r12d,%r8d - - leaq 20(%rbp),%rbp + leaq 1(%rdi),%rdi addl %r14d,%r8d + movl 16(%rsi),%r12d movl %eax,%r13d movl %r8d,%r14d bswapl %r12d rorl $14,%r13d movl %ebx,%r15d + movl %r12d,16(%rsp) - xorl %eax,%r13d rorl $9,%r14d + xorl %eax,%r13d xorl %ecx,%r15d - movl %r12d,16(%rsp) - xorl %r8d,%r14d - andl %eax,%r15d - rorl $5,%r13d addl %edx,%r12d - xorl %ecx,%r15d + xorl %r8d,%r14d + + addl (%rbp,%rdi,4),%r12d + andl %eax,%r15d + movl %r9d,%edx rorl $11,%r14d xorl %eax,%r13d - addl %r15d,%r12d + xorl %ecx,%r15d - movl %r8d,%r15d - addl (%rbp),%r12d + xorl %r10d,%edx xorl %r8d,%r14d + addl %r15d,%r12d + movl %r9d,%r15d - xorl %r9d,%r15d rorl $6,%r13d - movl %r9d,%edx + andl %r8d,%edx + andl %r10d,%r15d - andl %r15d,%edi rorl $2,%r14d addl %r13d,%r12d + addl %r15d,%edx - xorl %edi,%edx addl %r12d,%r11d addl %r12d,%edx - - leaq 4(%rbp),%rbp + leaq 1(%rdi),%rdi addl %r14d,%edx + movl 20(%rsi),%r12d movl %r11d,%r13d movl %edx,%r14d bswapl %r12d rorl $14,%r13d - movl %eax,%edi + movl %eax,%r15d + movl %r12d,20(%rsp) - xorl %r11d,%r13d rorl $9,%r14d - xorl %ebx,%edi - - movl %r12d,20(%rsp) - xorl %edx,%r14d - andl %r11d,%edi + xorl %r11d,%r13d + xorl %ebx,%r15d rorl $5,%r13d addl %ecx,%r12d - xorl %ebx,%edi + xorl %edx,%r14d + + addl (%rbp,%rdi,4),%r12d + andl %r11d,%r15d + movl %r8d,%ecx rorl $11,%r14d xorl %r11d,%r13d - addl %edi,%r12d + xorl %ebx,%r15d - movl %edx,%edi - addl (%rbp),%r12d + xorl %r9d,%ecx xorl %edx,%r14d + addl %r15d,%r12d + movl %r8d,%r15d - xorl %r8d,%edi rorl $6,%r13d - movl %r8d,%ecx + andl %edx,%ecx + andl %r9d,%r15d - andl %edi,%r15d rorl $2,%r14d addl %r13d,%r12d + addl %r15d,%ecx - xorl %r15d,%ecx addl %r12d,%r10d addl %r12d,%ecx - - leaq 4(%rbp),%rbp + leaq 1(%rdi),%rdi addl %r14d,%ecx + movl 24(%rsi),%r12d movl %r10d,%r13d movl %ecx,%r14d bswapl %r12d rorl $14,%r13d movl %r11d,%r15d + movl %r12d,24(%rsp) - xorl %r10d,%r13d rorl $9,%r14d + xorl %r10d,%r13d xorl %eax,%r15d - movl %r12d,24(%rsp) - xorl %ecx,%r14d - andl %r10d,%r15d - rorl $5,%r13d addl %ebx,%r12d - xorl %eax,%r15d + xorl %ecx,%r14d + + addl (%rbp,%rdi,4),%r12d + andl %r10d,%r15d + movl %edx,%ebx rorl $11,%r14d xorl %r10d,%r13d - addl %r15d,%r12d + xorl %eax,%r15d - movl %ecx,%r15d - addl (%rbp),%r12d + xorl %r8d,%ebx xorl %ecx,%r14d + addl %r15d,%r12d + movl %edx,%r15d - xorl %edx,%r15d rorl $6,%r13d - movl %edx,%ebx + andl %ecx,%ebx + andl %r8d,%r15d - andl %r15d,%edi rorl $2,%r14d addl %r13d,%r12d + addl %r15d,%ebx - xorl %edi,%ebx addl %r12d,%r9d addl %r12d,%ebx - - leaq 4(%rbp),%rbp + leaq 1(%rdi),%rdi addl %r14d,%ebx + movl 28(%rsi),%r12d movl %r9d,%r13d movl %ebx,%r14d bswapl %r12d rorl $14,%r13d - movl %r10d,%edi + movl %r10d,%r15d + movl %r12d,28(%rsp) - xorl %r9d,%r13d rorl $9,%r14d - xorl %r11d,%edi - - movl %r12d,28(%rsp) - xorl %ebx,%r14d - andl %r9d,%edi + xorl %r9d,%r13d + xorl %r11d,%r15d rorl $5,%r13d addl %eax,%r12d - xorl %r11d,%edi + xorl %ebx,%r14d + + addl (%rbp,%rdi,4),%r12d + andl %r9d,%r15d + movl %ecx,%eax rorl $11,%r14d xorl %r9d,%r13d - addl %edi,%r12d + xorl %r11d,%r15d - movl %ebx,%edi - addl (%rbp),%r12d + xorl %edx,%eax xorl %ebx,%r14d + addl %r15d,%r12d + movl %ecx,%r15d - xorl %ecx,%edi rorl $6,%r13d - movl %ecx,%eax + andl %ebx,%eax + andl %edx,%r15d - andl %edi,%r15d rorl $2,%r14d addl %r13d,%r12d + addl %r15d,%eax - xorl %r15d,%eax addl %r12d,%r8d addl %r12d,%eax - - leaq 20(%rbp),%rbp + leaq 1(%rdi),%rdi addl %r14d,%eax + movl 32(%rsi),%r12d movl %r8d,%r13d movl %eax,%r14d bswapl %r12d rorl $14,%r13d movl %r9d,%r15d + movl %r12d,32(%rsp) - xorl %r8d,%r13d rorl $9,%r14d + xorl %r8d,%r13d xorl %r10d,%r15d - movl %r12d,32(%rsp) - xorl %eax,%r14d - andl %r8d,%r15d - rorl $5,%r13d addl %r11d,%r12d - xorl %r10d,%r15d + xorl %eax,%r14d + + addl (%rbp,%rdi,4),%r12d + andl %r8d,%r15d + movl %ebx,%r11d rorl $11,%r14d xorl %r8d,%r13d - addl %r15d,%r12d + xorl %r10d,%r15d - movl %eax,%r15d - addl (%rbp),%r12d + xorl %ecx,%r11d xorl %eax,%r14d + addl %r15d,%r12d + movl %ebx,%r15d - xorl %ebx,%r15d rorl $6,%r13d - movl %ebx,%r11d + andl %eax,%r11d + andl %ecx,%r15d - andl %r15d,%edi rorl $2,%r14d addl %r13d,%r12d + addl %r15d,%r11d - xorl %edi,%r11d addl %r12d,%edx addl %r12d,%r11d - - leaq 4(%rbp),%rbp + leaq 1(%rdi),%rdi addl %r14d,%r11d + movl 36(%rsi),%r12d movl %edx,%r13d movl %r11d,%r14d bswapl %r12d rorl $14,%r13d - movl %r8d,%edi + movl %r8d,%r15d + movl %r12d,36(%rsp) - xorl %edx,%r13d rorl $9,%r14d - xorl %r9d,%edi - - movl %r12d,36(%rsp) - xorl %r11d,%r14d - andl %edx,%edi + xorl %edx,%r13d + xorl %r9d,%r15d rorl $5,%r13d addl %r10d,%r12d - xorl %r9d,%edi + xorl %r11d,%r14d + + addl (%rbp,%rdi,4),%r12d + andl %edx,%r15d + movl %eax,%r10d rorl $11,%r14d xorl %edx,%r13d - addl %edi,%r12d + xorl %r9d,%r15d - movl %r11d,%edi - addl (%rbp),%r12d + xorl %ebx,%r10d xorl %r11d,%r14d + addl %r15d,%r12d + movl %eax,%r15d - xorl %eax,%edi rorl $6,%r13d - movl %eax,%r10d + andl %r11d,%r10d + andl %ebx,%r15d - andl %edi,%r15d rorl $2,%r14d addl %r13d,%r12d + addl %r15d,%r10d - xorl %r15d,%r10d addl %r12d,%ecx addl %r12d,%r10d - - leaq 4(%rbp),%rbp + leaq 1(%rdi),%rdi addl %r14d,%r10d + movl 40(%rsi),%r12d movl %ecx,%r13d movl %r10d,%r14d bswapl %r12d rorl $14,%r13d movl %edx,%r15d + movl %r12d,40(%rsp) - xorl %ecx,%r13d rorl $9,%r14d + xorl %ecx,%r13d xorl %r8d,%r15d - movl %r12d,40(%rsp) - xorl %r10d,%r14d - andl %ecx,%r15d - rorl $5,%r13d addl %r9d,%r12d - xorl %r8d,%r15d + xorl %r10d,%r14d + + addl (%rbp,%rdi,4),%r12d + andl %ecx,%r15d + movl %r11d,%r9d rorl $11,%r14d xorl %ecx,%r13d - addl %r15d,%r12d + xorl %r8d,%r15d - movl %r10d,%r15d - addl (%rbp),%r12d + xorl %eax,%r9d xorl %r10d,%r14d + addl %r15d,%r12d + movl %r11d,%r15d - xorl %r11d,%r15d rorl $6,%r13d - movl %r11d,%r9d + andl %r10d,%r9d + andl %eax,%r15d - andl %r15d,%edi rorl $2,%r14d addl %r13d,%r12d + addl %r15d,%r9d - xorl %edi,%r9d addl %r12d,%ebx addl %r12d,%r9d - - leaq 4(%rbp),%rbp + leaq 1(%rdi),%rdi addl %r14d,%r9d + movl 44(%rsi),%r12d movl %ebx,%r13d movl %r9d,%r14d bswapl %r12d rorl $14,%r13d - movl %ecx,%edi + movl %ecx,%r15d + movl %r12d,44(%rsp) - xorl %ebx,%r13d rorl $9,%r14d - xorl %edx,%edi - - movl %r12d,44(%rsp) - xorl %r9d,%r14d - andl %ebx,%edi + xorl %ebx,%r13d + xorl %edx,%r15d rorl $5,%r13d addl %r8d,%r12d - xorl %edx,%edi + xorl %r9d,%r14d + + addl (%rbp,%rdi,4),%r12d + andl %ebx,%r15d + movl %r10d,%r8d rorl $11,%r14d xorl %ebx,%r13d - addl %edi,%r12d + xorl %edx,%r15d - movl %r9d,%edi - addl (%rbp),%r12d + xorl %r11d,%r8d xorl %r9d,%r14d + addl %r15d,%r12d + movl %r10d,%r15d - xorl %r10d,%edi rorl $6,%r13d - movl %r10d,%r8d + andl %r9d,%r8d + andl %r11d,%r15d - andl %edi,%r15d rorl $2,%r14d addl %r13d,%r12d + addl %r15d,%r8d - xorl %r15d,%r8d addl %r12d,%eax addl %r12d,%r8d - - leaq 20(%rbp),%rbp + leaq 1(%rdi),%rdi addl %r14d,%r8d + movl 48(%rsi),%r12d movl %eax,%r13d movl %r8d,%r14d bswapl %r12d rorl $14,%r13d movl %ebx,%r15d + movl %r12d,48(%rsp) - xorl %eax,%r13d rorl $9,%r14d + xorl %eax,%r13d xorl %ecx,%r15d - movl %r12d,48(%rsp) - xorl %r8d,%r14d - andl %eax,%r15d - rorl $5,%r13d addl %edx,%r12d - xorl %ecx,%r15d + xorl %r8d,%r14d + + addl (%rbp,%rdi,4),%r12d + andl %eax,%r15d + movl %r9d,%edx rorl $11,%r14d xorl %eax,%r13d - addl %r15d,%r12d + xorl %ecx,%r15d - movl %r8d,%r15d - addl (%rbp),%r12d + xorl %r10d,%edx xorl %r8d,%r14d + addl %r15d,%r12d + movl %r9d,%r15d - xorl %r9d,%r15d rorl $6,%r13d - movl %r9d,%edx + andl %r8d,%edx + andl %r10d,%r15d - andl %r15d,%edi rorl $2,%r14d addl %r13d,%r12d + addl %r15d,%edx - xorl %edi,%edx addl %r12d,%r11d addl %r12d,%edx - - leaq 4(%rbp),%rbp + leaq 1(%rdi),%rdi addl %r14d,%edx + movl 52(%rsi),%r12d movl %r11d,%r13d movl %edx,%r14d bswapl %r12d rorl $14,%r13d - movl %eax,%edi + movl %eax,%r15d + movl %r12d,52(%rsp) - xorl %r11d,%r13d rorl $9,%r14d - xorl %ebx,%edi - - movl %r12d,52(%rsp) - xorl %edx,%r14d - andl %r11d,%edi + xorl %r11d,%r13d + xorl %ebx,%r15d rorl $5,%r13d addl %ecx,%r12d - xorl %ebx,%edi + xorl %edx,%r14d + + addl (%rbp,%rdi,4),%r12d + andl %r11d,%r15d + movl %r8d,%ecx rorl $11,%r14d xorl %r11d,%r13d - addl %edi,%r12d + xorl %ebx,%r15d - movl %edx,%edi - addl (%rbp),%r12d + xorl %r9d,%ecx xorl %edx,%r14d + addl %r15d,%r12d + movl %r8d,%r15d - xorl %r8d,%edi rorl $6,%r13d - movl %r8d,%ecx + andl %edx,%ecx + andl %r9d,%r15d - andl %edi,%r15d rorl $2,%r14d addl %r13d,%r12d + addl %r15d,%ecx - xorl %r15d,%ecx addl %r12d,%r10d addl %r12d,%ecx - - leaq 4(%rbp),%rbp + leaq 1(%rdi),%rdi addl %r14d,%ecx + movl 56(%rsi),%r12d movl %r10d,%r13d movl %ecx,%r14d bswapl %r12d rorl $14,%r13d movl %r11d,%r15d + movl %r12d,56(%rsp) - xorl %r10d,%r13d rorl $9,%r14d + xorl %r10d,%r13d xorl %eax,%r15d - movl %r12d,56(%rsp) - xorl %ecx,%r14d - andl %r10d,%r15d - rorl $5,%r13d addl %ebx,%r12d - xorl %eax,%r15d + xorl %ecx,%r14d + + addl (%rbp,%rdi,4),%r12d + andl %r10d,%r15d + movl %edx,%ebx rorl $11,%r14d xorl %r10d,%r13d - addl %r15d,%r12d + xorl %eax,%r15d - movl %ecx,%r15d - addl (%rbp),%r12d + xorl %r8d,%ebx xorl %ecx,%r14d + addl %r15d,%r12d + movl %edx,%r15d - xorl %edx,%r15d rorl $6,%r13d - movl %edx,%ebx + andl %ecx,%ebx + andl %r8d,%r15d - andl %r15d,%edi rorl $2,%r14d addl %r13d,%r12d + addl %r15d,%ebx - xorl %edi,%ebx addl %r12d,%r9d addl %r12d,%ebx - - leaq 4(%rbp),%rbp + leaq 1(%rdi),%rdi addl %r14d,%ebx + movl 60(%rsi),%r12d movl %r9d,%r13d movl %ebx,%r14d bswapl %r12d rorl $14,%r13d - movl %r10d,%edi + movl %r10d,%r15d + movl %r12d,60(%rsp) - xorl %r9d,%r13d rorl $9,%r14d - xorl %r11d,%edi - - movl %r12d,60(%rsp) - xorl %ebx,%r14d - andl %r9d,%edi + xorl %r9d,%r13d + xorl %r11d,%r15d rorl $5,%r13d addl %eax,%r12d - xorl %r11d,%edi + xorl %ebx,%r14d + + addl (%rbp,%rdi,4),%r12d + andl %r9d,%r15d + movl %ecx,%eax rorl $11,%r14d xorl %r9d,%r13d - addl %edi,%r12d + xorl %r11d,%r15d - movl %ebx,%edi - addl (%rbp),%r12d + xorl %edx,%eax xorl %ebx,%r14d + addl %r15d,%r12d + movl %ecx,%r15d - xorl %ecx,%edi rorl $6,%r13d - movl %ecx,%eax + andl %ebx,%eax + andl %edx,%r15d - andl %edi,%r15d rorl $2,%r14d addl %r13d,%r12d + addl %r15d,%eax - xorl %r15d,%eax addl %r12d,%r8d addl %r12d,%eax + leaq 1(%rdi),%rdi + addl %r14d,%eax - leaq 20(%rbp),%rbp jmp L$rounds_16_xx .p2align 4 L$rounds_16_xx: movl 4(%rsp),%r13d - movl 56(%rsp),%r15d - + movl 56(%rsp),%r14d movl %r13d,%r12d - rorl $11,%r13d - addl %r14d,%eax - movl %r15d,%r14d - rorl $2,%r15d + movl %r14d,%r15d + + rorl $11,%r12d + xorl %r13d,%r12d + shrl $3,%r13d + rorl $7,%r12d xorl %r12d,%r13d - shrl $3,%r12d - rorl $7,%r13d + movl 36(%rsp),%r12d + + rorl $2,%r15d xorl %r14d,%r15d shrl $10,%r14d rorl $17,%r15d - xorl %r13d,%r12d - xorl %r14d,%r15d - addl 36(%rsp),%r12d + addl %r13d,%r12d + xorl %r15d,%r14d addl 0(%rsp),%r12d movl %r8d,%r13d - addl %r15d,%r12d + addl %r14d,%r12d movl %eax,%r14d rorl $14,%r13d movl %r9d,%r15d + movl %r12d,0(%rsp) - xorl %r8d,%r13d rorl $9,%r14d + xorl %r8d,%r13d xorl %r10d,%r15d - movl %r12d,0(%rsp) - xorl %eax,%r14d - andl %r8d,%r15d - rorl $5,%r13d addl %r11d,%r12d - xorl %r10d,%r15d + xorl %eax,%r14d + + addl (%rbp,%rdi,4),%r12d + andl %r8d,%r15d + movl %ebx,%r11d rorl $11,%r14d xorl %r8d,%r13d - addl %r15d,%r12d + xorl %r10d,%r15d - movl %eax,%r15d - addl (%rbp),%r12d + xorl %ecx,%r11d xorl %eax,%r14d + addl %r15d,%r12d + movl %ebx,%r15d - xorl %ebx,%r15d rorl $6,%r13d - movl %ebx,%r11d + andl %eax,%r11d + andl %ecx,%r15d - andl %r15d,%edi rorl $2,%r14d addl %r13d,%r12d + addl %r15d,%r11d - xorl %edi,%r11d addl %r12d,%edx addl %r12d,%r11d + leaq 1(%rdi),%rdi + addl %r14d,%r11d - leaq 4(%rbp),%rbp movl 8(%rsp),%r13d - movl 60(%rsp),%edi - + movl 60(%rsp),%r14d movl %r13d,%r12d - rorl $11,%r13d - addl %r14d,%r11d - movl %edi,%r14d - rorl $2,%edi + movl %r14d,%r15d + + rorl $11,%r12d + xorl %r13d,%r12d + shrl $3,%r13d + rorl $7,%r12d xorl %r12d,%r13d - shrl $3,%r12d - rorl $7,%r13d - xorl %r14d,%edi + movl 40(%rsp),%r12d + + rorl $2,%r15d + xorl %r14d,%r15d shrl $10,%r14d - rorl $17,%edi - xorl %r13d,%r12d - xorl %r14d,%edi - addl 40(%rsp),%r12d + rorl $17,%r15d + addl %r13d,%r12d + xorl %r15d,%r14d addl 4(%rsp),%r12d movl %edx,%r13d - addl %edi,%r12d + addl %r14d,%r12d movl %r11d,%r14d rorl $14,%r13d - movl %r8d,%edi + movl %r8d,%r15d + movl %r12d,4(%rsp) - xorl %edx,%r13d rorl $9,%r14d - xorl %r9d,%edi - - movl %r12d,4(%rsp) - xorl %r11d,%r14d - andl %edx,%edi + xorl %edx,%r13d + xorl %r9d,%r15d rorl $5,%r13d addl %r10d,%r12d - xorl %r9d,%edi + xorl %r11d,%r14d + + addl (%rbp,%rdi,4),%r12d + andl %edx,%r15d + movl %eax,%r10d rorl $11,%r14d xorl %edx,%r13d - addl %edi,%r12d + xorl %r9d,%r15d - movl %r11d,%edi - addl (%rbp),%r12d + xorl %ebx,%r10d xorl %r11d,%r14d + addl %r15d,%r12d + movl %eax,%r15d - xorl %eax,%edi rorl $6,%r13d - movl %eax,%r10d + andl %r11d,%r10d + andl %ebx,%r15d - andl %edi,%r15d rorl $2,%r14d addl %r13d,%r12d + addl %r15d,%r10d - xorl %r15d,%r10d addl %r12d,%ecx addl %r12d,%r10d + leaq 1(%rdi),%rdi + addl %r14d,%r10d - leaq 4(%rbp),%rbp movl 12(%rsp),%r13d - movl 0(%rsp),%r15d - + movl 0(%rsp),%r14d movl %r13d,%r12d - rorl $11,%r13d - addl %r14d,%r10d - movl %r15d,%r14d - rorl $2,%r15d + movl %r14d,%r15d + + rorl $11,%r12d + xorl %r13d,%r12d + shrl $3,%r13d + rorl $7,%r12d xorl %r12d,%r13d - shrl $3,%r12d - rorl $7,%r13d + movl 44(%rsp),%r12d + + rorl $2,%r15d xorl %r14d,%r15d shrl $10,%r14d rorl $17,%r15d - xorl %r13d,%r12d - xorl %r14d,%r15d - addl 44(%rsp),%r12d + addl %r13d,%r12d + xorl %r15d,%r14d addl 8(%rsp),%r12d movl %ecx,%r13d - addl %r15d,%r12d + addl %r14d,%r12d movl %r10d,%r14d rorl $14,%r13d movl %edx,%r15d + movl %r12d,8(%rsp) - xorl %ecx,%r13d rorl $9,%r14d + xorl %ecx,%r13d xorl %r8d,%r15d - movl %r12d,8(%rsp) - xorl %r10d,%r14d - andl %ecx,%r15d - rorl $5,%r13d addl %r9d,%r12d - xorl %r8d,%r15d + xorl %r10d,%r14d + + addl (%rbp,%rdi,4),%r12d + andl %ecx,%r15d + movl %r11d,%r9d rorl $11,%r14d xorl %ecx,%r13d - addl %r15d,%r12d + xorl %r8d,%r15d - movl %r10d,%r15d - addl (%rbp),%r12d + xorl %eax,%r9d xorl %r10d,%r14d + addl %r15d,%r12d + movl %r11d,%r15d - xorl %r11d,%r15d rorl $6,%r13d - movl %r11d,%r9d + andl %r10d,%r9d + andl %eax,%r15d - andl %r15d,%edi rorl $2,%r14d addl %r13d,%r12d + addl %r15d,%r9d - xorl %edi,%r9d addl %r12d,%ebx addl %r12d,%r9d + leaq 1(%rdi),%rdi + addl %r14d,%r9d - leaq 4(%rbp),%rbp movl 16(%rsp),%r13d - movl 4(%rsp),%edi - + movl 4(%rsp),%r14d movl %r13d,%r12d - rorl $11,%r13d - addl %r14d,%r9d - movl %edi,%r14d - rorl $2,%edi + movl %r14d,%r15d + + rorl $11,%r12d + xorl %r13d,%r12d + shrl $3,%r13d + rorl $7,%r12d xorl %r12d,%r13d - shrl $3,%r12d - rorl $7,%r13d - xorl %r14d,%edi + movl 48(%rsp),%r12d + + rorl $2,%r15d + xorl %r14d,%r15d shrl $10,%r14d - rorl $17,%edi - xorl %r13d,%r12d - xorl %r14d,%edi - addl 48(%rsp),%r12d + rorl $17,%r15d + addl %r13d,%r12d + xorl %r15d,%r14d addl 12(%rsp),%r12d movl %ebx,%r13d - addl %edi,%r12d + addl %r14d,%r12d movl %r9d,%r14d rorl $14,%r13d - movl %ecx,%edi + movl %ecx,%r15d + movl %r12d,12(%rsp) - xorl %ebx,%r13d rorl $9,%r14d - xorl %edx,%edi - - movl %r12d,12(%rsp) - xorl %r9d,%r14d - andl %ebx,%edi + xorl %ebx,%r13d + xorl %edx,%r15d rorl $5,%r13d addl %r8d,%r12d - xorl %edx,%edi + xorl %r9d,%r14d + + addl (%rbp,%rdi,4),%r12d + andl %ebx,%r15d + movl %r10d,%r8d rorl $11,%r14d xorl %ebx,%r13d - addl %edi,%r12d + xorl %edx,%r15d - movl %r9d,%edi - addl (%rbp),%r12d + xorl %r11d,%r8d xorl %r9d,%r14d + addl %r15d,%r12d + movl %r10d,%r15d - xorl %r10d,%edi rorl $6,%r13d - movl %r10d,%r8d + andl %r9d,%r8d + andl %r11d,%r15d - andl %edi,%r15d rorl $2,%r14d addl %r13d,%r12d + addl %r15d,%r8d - xorl %r15d,%r8d addl %r12d,%eax addl %r12d,%r8d + leaq 1(%rdi),%rdi + addl %r14d,%r8d - leaq 20(%rbp),%rbp movl 20(%rsp),%r13d - movl 8(%rsp),%r15d - + movl 8(%rsp),%r14d movl %r13d,%r12d - rorl $11,%r13d - addl %r14d,%r8d - movl %r15d,%r14d - rorl $2,%r15d + movl %r14d,%r15d + rorl $11,%r12d + xorl %r13d,%r12d + shrl $3,%r13d + + rorl $7,%r12d xorl %r12d,%r13d - shrl $3,%r12d - rorl $7,%r13d + movl 52(%rsp),%r12d + + rorl $2,%r15d xorl %r14d,%r15d shrl $10,%r14d rorl $17,%r15d - xorl %r13d,%r12d - xorl %r14d,%r15d - addl 52(%rsp),%r12d + addl %r13d,%r12d + xorl %r15d,%r14d addl 16(%rsp),%r12d movl %eax,%r13d - addl %r15d,%r12d + addl %r14d,%r12d movl %r8d,%r14d rorl $14,%r13d movl %ebx,%r15d + movl %r12d,16(%rsp) - xorl %eax,%r13d rorl $9,%r14d + xorl %eax,%r13d xorl %ecx,%r15d - movl %r12d,16(%rsp) - xorl %r8d,%r14d - andl %eax,%r15d - rorl $5,%r13d addl %edx,%r12d - xorl %ecx,%r15d + xorl %r8d,%r14d + + addl (%rbp,%rdi,4),%r12d + andl %eax,%r15d + movl %r9d,%edx rorl $11,%r14d xorl %eax,%r13d - addl %r15d,%r12d + xorl %ecx,%r15d - movl %r8d,%r15d - addl (%rbp),%r12d + xorl %r10d,%edx xorl %r8d,%r14d + addl %r15d,%r12d + movl %r9d,%r15d - xorl %r9d,%r15d rorl $6,%r13d - movl %r9d,%edx + andl %r8d,%edx + andl %r10d,%r15d - andl %r15d,%edi rorl $2,%r14d addl %r13d,%r12d + addl %r15d,%edx - xorl %edi,%edx addl %r12d,%r11d addl %r12d,%edx + leaq 1(%rdi),%rdi + addl %r14d,%edx - leaq 4(%rbp),%rbp movl 24(%rsp),%r13d - movl 12(%rsp),%edi - + movl 12(%rsp),%r14d movl %r13d,%r12d - rorl $11,%r13d - addl %r14d,%edx - movl %edi,%r14d - rorl $2,%edi + movl %r14d,%r15d + rorl $11,%r12d + xorl %r13d,%r12d + shrl $3,%r13d + + rorl $7,%r12d xorl %r12d,%r13d - shrl $3,%r12d - rorl $7,%r13d - xorl %r14d,%edi + movl 56(%rsp),%r12d + + rorl $2,%r15d + xorl %r14d,%r15d shrl $10,%r14d - rorl $17,%edi - xorl %r13d,%r12d - xorl %r14d,%edi - addl 56(%rsp),%r12d + rorl $17,%r15d + addl %r13d,%r12d + xorl %r15d,%r14d addl 20(%rsp),%r12d movl %r11d,%r13d - addl %edi,%r12d + addl %r14d,%r12d movl %edx,%r14d rorl $14,%r13d - movl %eax,%edi + movl %eax,%r15d + movl %r12d,20(%rsp) - xorl %r11d,%r13d rorl $9,%r14d - xorl %ebx,%edi - - movl %r12d,20(%rsp) - xorl %edx,%r14d - andl %r11d,%edi + xorl %r11d,%r13d + xorl %ebx,%r15d rorl $5,%r13d addl %ecx,%r12d - xorl %ebx,%edi + xorl %edx,%r14d + + addl (%rbp,%rdi,4),%r12d + andl %r11d,%r15d + movl %r8d,%ecx rorl $11,%r14d xorl %r11d,%r13d - addl %edi,%r12d + xorl %ebx,%r15d - movl %edx,%edi - addl (%rbp),%r12d + xorl %r9d,%ecx xorl %edx,%r14d + addl %r15d,%r12d + movl %r8d,%r15d - xorl %r8d,%edi rorl $6,%r13d - movl %r8d,%ecx + andl %edx,%ecx + andl %r9d,%r15d - andl %edi,%r15d rorl $2,%r14d addl %r13d,%r12d + addl %r15d,%ecx - xorl %r15d,%ecx addl %r12d,%r10d addl %r12d,%ecx + leaq 1(%rdi),%rdi + addl %r14d,%ecx - leaq 4(%rbp),%rbp movl 28(%rsp),%r13d - movl 16(%rsp),%r15d - + movl 16(%rsp),%r14d movl %r13d,%r12d - rorl $11,%r13d - addl %r14d,%ecx - movl %r15d,%r14d - rorl $2,%r15d + movl %r14d,%r15d + + rorl $11,%r12d + xorl %r13d,%r12d + shrl $3,%r13d + rorl $7,%r12d xorl %r12d,%r13d - shrl $3,%r12d - rorl $7,%r13d - xorl %r14d,%r15d + movl 60(%rsp),%r12d + + rorl $2,%r15d + xorl %r14d,%r15d shrl $10,%r14d rorl $17,%r15d - xorl %r13d,%r12d - xorl %r14d,%r15d - addl 60(%rsp),%r12d + addl %r13d,%r12d + xorl %r15d,%r14d addl 24(%rsp),%r12d movl %r10d,%r13d - addl %r15d,%r12d + addl %r14d,%r12d movl %ecx,%r14d rorl $14,%r13d movl %r11d,%r15d + movl %r12d,24(%rsp) - xorl %r10d,%r13d rorl $9,%r14d + xorl %r10d,%r13d xorl %eax,%r15d - movl %r12d,24(%rsp) - xorl %ecx,%r14d - andl %r10d,%r15d - rorl $5,%r13d addl %ebx,%r12d - xorl %eax,%r15d + xorl %ecx,%r14d + + addl (%rbp,%rdi,4),%r12d + andl %r10d,%r15d + movl %edx,%ebx rorl $11,%r14d xorl %r10d,%r13d - addl %r15d,%r12d + xorl %eax,%r15d - movl %ecx,%r15d - addl (%rbp),%r12d + xorl %r8d,%ebx xorl %ecx,%r14d + addl %r15d,%r12d + movl %edx,%r15d - xorl %edx,%r15d rorl $6,%r13d - movl %edx,%ebx + andl %ecx,%ebx + andl %r8d,%r15d - andl %r15d,%edi rorl $2,%r14d addl %r13d,%r12d + addl %r15d,%ebx - xorl %edi,%ebx addl %r12d,%r9d addl %r12d,%ebx + leaq 1(%rdi),%rdi + addl %r14d,%ebx - leaq 4(%rbp),%rbp movl 32(%rsp),%r13d - movl 20(%rsp),%edi - + movl 20(%rsp),%r14d movl %r13d,%r12d - rorl $11,%r13d - addl %r14d,%ebx - movl %edi,%r14d - rorl $2,%edi + movl %r14d,%r15d + + rorl $11,%r12d + xorl %r13d,%r12d + shrl $3,%r13d + rorl $7,%r12d xorl %r12d,%r13d - shrl $3,%r12d - rorl $7,%r13d - xorl %r14d,%edi + movl 0(%rsp),%r12d + + rorl $2,%r15d + xorl %r14d,%r15d shrl $10,%r14d - rorl $17,%edi - xorl %r13d,%r12d - xorl %r14d,%edi - addl 0(%rsp),%r12d + rorl $17,%r15d + addl %r13d,%r12d + xorl %r15d,%r14d addl 28(%rsp),%r12d movl %r9d,%r13d - addl %edi,%r12d + addl %r14d,%r12d movl %ebx,%r14d rorl $14,%r13d - movl %r10d,%edi + movl %r10d,%r15d + movl %r12d,28(%rsp) - xorl %r9d,%r13d rorl $9,%r14d - xorl %r11d,%edi - - movl %r12d,28(%rsp) - xorl %ebx,%r14d - andl %r9d,%edi + xorl %r9d,%r13d + xorl %r11d,%r15d rorl $5,%r13d addl %eax,%r12d - xorl %r11d,%edi + xorl %ebx,%r14d + + addl (%rbp,%rdi,4),%r12d + andl %r9d,%r15d + movl %ecx,%eax rorl $11,%r14d xorl %r9d,%r13d - addl %edi,%r12d + xorl %r11d,%r15d - movl %ebx,%edi - addl (%rbp),%r12d + xorl %edx,%eax xorl %ebx,%r14d + addl %r15d,%r12d + movl %ecx,%r15d - xorl %ecx,%edi rorl $6,%r13d - movl %ecx,%eax + andl %ebx,%eax + andl %edx,%r15d - andl %edi,%r15d rorl $2,%r14d addl %r13d,%r12d + addl %r15d,%eax - xorl %r15d,%eax addl %r12d,%r8d addl %r12d,%eax + leaq 1(%rdi),%rdi + addl %r14d,%eax - leaq 20(%rbp),%rbp movl 36(%rsp),%r13d - movl 24(%rsp),%r15d - + movl 24(%rsp),%r14d movl %r13d,%r12d - rorl $11,%r13d - addl %r14d,%eax - movl %r15d,%r14d - rorl $2,%r15d + movl %r14d,%r15d + rorl $11,%r12d + xorl %r13d,%r12d + shrl $3,%r13d + + rorl $7,%r12d xorl %r12d,%r13d - shrl $3,%r12d - rorl $7,%r13d + movl 4(%rsp),%r12d + + rorl $2,%r15d xorl %r14d,%r15d shrl $10,%r14d rorl $17,%r15d - xorl %r13d,%r12d - xorl %r14d,%r15d - addl 4(%rsp),%r12d + addl %r13d,%r12d + xorl %r15d,%r14d addl 32(%rsp),%r12d movl %r8d,%r13d - addl %r15d,%r12d + addl %r14d,%r12d movl %eax,%r14d rorl $14,%r13d movl %r9d,%r15d + movl %r12d,32(%rsp) - xorl %r8d,%r13d rorl $9,%r14d + xorl %r8d,%r13d xorl %r10d,%r15d - movl %r12d,32(%rsp) - xorl %eax,%r14d - andl %r8d,%r15d - rorl $5,%r13d addl %r11d,%r12d - xorl %r10d,%r15d + xorl %eax,%r14d + + addl (%rbp,%rdi,4),%r12d + andl %r8d,%r15d + movl %ebx,%r11d rorl $11,%r14d xorl %r8d,%r13d - addl %r15d,%r12d + xorl %r10d,%r15d - movl %eax,%r15d - addl (%rbp),%r12d + xorl %ecx,%r11d xorl %eax,%r14d + addl %r15d,%r12d + movl %ebx,%r15d - xorl %ebx,%r15d rorl $6,%r13d - movl %ebx,%r11d + andl %eax,%r11d + andl %ecx,%r15d - andl %r15d,%edi rorl $2,%r14d addl %r13d,%r12d + addl %r15d,%r11d - xorl %edi,%r11d addl %r12d,%edx addl %r12d,%r11d + leaq 1(%rdi),%rdi + addl %r14d,%r11d - leaq 4(%rbp),%rbp movl 40(%rsp),%r13d - movl 28(%rsp),%edi - + movl 28(%rsp),%r14d movl %r13d,%r12d - rorl $11,%r13d - addl %r14d,%r11d - movl %edi,%r14d - rorl $2,%edi + movl %r14d,%r15d + rorl $11,%r12d + xorl %r13d,%r12d + shrl $3,%r13d + + rorl $7,%r12d xorl %r12d,%r13d - shrl $3,%r12d - rorl $7,%r13d - xorl %r14d,%edi + movl 8(%rsp),%r12d + + rorl $2,%r15d + xorl %r14d,%r15d shrl $10,%r14d - rorl $17,%edi - xorl %r13d,%r12d - xorl %r14d,%edi - addl 8(%rsp),%r12d + rorl $17,%r15d + addl %r13d,%r12d + xorl %r15d,%r14d addl 36(%rsp),%r12d movl %edx,%r13d - addl %edi,%r12d + addl %r14d,%r12d movl %r11d,%r14d rorl $14,%r13d - movl %r8d,%edi + movl %r8d,%r15d + movl %r12d,36(%rsp) - xorl %edx,%r13d rorl $9,%r14d - xorl %r9d,%edi - - movl %r12d,36(%rsp) - xorl %r11d,%r14d - andl %edx,%edi + xorl %edx,%r13d + xorl %r9d,%r15d rorl $5,%r13d addl %r10d,%r12d - xorl %r9d,%edi + xorl %r11d,%r14d + + addl (%rbp,%rdi,4),%r12d + andl %edx,%r15d + movl %eax,%r10d rorl $11,%r14d xorl %edx,%r13d - addl %edi,%r12d + xorl %r9d,%r15d - movl %r11d,%edi - addl (%rbp),%r12d + xorl %ebx,%r10d xorl %r11d,%r14d + addl %r15d,%r12d + movl %eax,%r15d - xorl %eax,%edi rorl $6,%r13d - movl %eax,%r10d + andl %r11d,%r10d + andl %ebx,%r15d - andl %edi,%r15d rorl $2,%r14d addl %r13d,%r12d + addl %r15d,%r10d - xorl %r15d,%r10d addl %r12d,%ecx addl %r12d,%r10d + leaq 1(%rdi),%rdi + addl %r14d,%r10d - leaq 4(%rbp),%rbp movl 44(%rsp),%r13d - movl 32(%rsp),%r15d - + movl 32(%rsp),%r14d movl %r13d,%r12d - rorl $11,%r13d - addl %r14d,%r10d - movl %r15d,%r14d - rorl $2,%r15d + movl %r14d,%r15d + + rorl $11,%r12d + xorl %r13d,%r12d + shrl $3,%r13d + rorl $7,%r12d xorl %r12d,%r13d - shrl $3,%r12d - rorl $7,%r13d + movl 12(%rsp),%r12d + + rorl $2,%r15d xorl %r14d,%r15d shrl $10,%r14d rorl $17,%r15d - xorl %r13d,%r12d - xorl %r14d,%r15d - addl 12(%rsp),%r12d + addl %r13d,%r12d + xorl %r15d,%r14d addl 40(%rsp),%r12d movl %ecx,%r13d - addl %r15d,%r12d + addl %r14d,%r12d movl %r10d,%r14d rorl $14,%r13d movl %edx,%r15d + movl %r12d,40(%rsp) - xorl %ecx,%r13d rorl $9,%r14d + xorl %ecx,%r13d xorl %r8d,%r15d - movl %r12d,40(%rsp) - xorl %r10d,%r14d - andl %ecx,%r15d - rorl $5,%r13d addl %r9d,%r12d - xorl %r8d,%r15d + xorl %r10d,%r14d + + addl (%rbp,%rdi,4),%r12d + andl %ecx,%r15d + movl %r11d,%r9d rorl $11,%r14d xorl %ecx,%r13d - addl %r15d,%r12d + xorl %r8d,%r15d - movl %r10d,%r15d - addl (%rbp),%r12d + xorl %eax,%r9d xorl %r10d,%r14d + addl %r15d,%r12d + movl %r11d,%r15d - xorl %r11d,%r15d rorl $6,%r13d - movl %r11d,%r9d + andl %r10d,%r9d + andl %eax,%r15d - andl %r15d,%edi rorl $2,%r14d addl %r13d,%r12d + addl %r15d,%r9d - xorl %edi,%r9d addl %r12d,%ebx addl %r12d,%r9d + leaq 1(%rdi),%rdi + addl %r14d,%r9d - leaq 4(%rbp),%rbp movl 48(%rsp),%r13d - movl 36(%rsp),%edi - + movl 36(%rsp),%r14d movl %r13d,%r12d - rorl $11,%r13d - addl %r14d,%r9d - movl %edi,%r14d - rorl $2,%edi + movl %r14d,%r15d + + rorl $11,%r12d + xorl %r13d,%r12d + shrl $3,%r13d + rorl $7,%r12d xorl %r12d,%r13d - shrl $3,%r12d - rorl $7,%r13d - xorl %r14d,%edi + movl 16(%rsp),%r12d + + rorl $2,%r15d + xorl %r14d,%r15d shrl $10,%r14d - rorl $17,%edi - xorl %r13d,%r12d - xorl %r14d,%edi - addl 16(%rsp),%r12d + rorl $17,%r15d + addl %r13d,%r12d + xorl %r15d,%r14d addl 44(%rsp),%r12d movl %ebx,%r13d - addl %edi,%r12d + addl %r14d,%r12d movl %r9d,%r14d rorl $14,%r13d - movl %ecx,%edi + movl %ecx,%r15d + movl %r12d,44(%rsp) - xorl %ebx,%r13d rorl $9,%r14d - xorl %edx,%edi - - movl %r12d,44(%rsp) - xorl %r9d,%r14d - andl %ebx,%edi + xorl %ebx,%r13d + xorl %edx,%r15d rorl $5,%r13d addl %r8d,%r12d - xorl %edx,%edi + xorl %r9d,%r14d + + addl (%rbp,%rdi,4),%r12d + andl %ebx,%r15d + movl %r10d,%r8d rorl $11,%r14d xorl %ebx,%r13d - addl %edi,%r12d + xorl %edx,%r15d - movl %r9d,%edi - addl (%rbp),%r12d + xorl %r11d,%r8d xorl %r9d,%r14d + addl %r15d,%r12d + movl %r10d,%r15d - xorl %r10d,%edi rorl $6,%r13d - movl %r10d,%r8d + andl %r9d,%r8d + andl %r11d,%r15d - andl %edi,%r15d rorl $2,%r14d addl %r13d,%r12d + addl %r15d,%r8d - xorl %r15d,%r8d addl %r12d,%eax addl %r12d,%r8d + leaq 1(%rdi),%rdi + addl %r14d,%r8d - leaq 20(%rbp),%rbp movl 52(%rsp),%r13d - movl 40(%rsp),%r15d - + movl 40(%rsp),%r14d movl %r13d,%r12d - rorl $11,%r13d - addl %r14d,%r8d - movl %r15d,%r14d - rorl $2,%r15d + movl %r14d,%r15d + + rorl $11,%r12d + xorl %r13d,%r12d + shrl $3,%r13d + rorl $7,%r12d xorl %r12d,%r13d - shrl $3,%r12d - rorl $7,%r13d + movl 20(%rsp),%r12d + + rorl $2,%r15d xorl %r14d,%r15d shrl $10,%r14d rorl $17,%r15d - xorl %r13d,%r12d - xorl %r14d,%r15d - addl 20(%rsp),%r12d + addl %r13d,%r12d + xorl %r15d,%r14d addl 48(%rsp),%r12d movl %eax,%r13d - addl %r15d,%r12d + addl %r14d,%r12d movl %r8d,%r14d rorl $14,%r13d movl %ebx,%r15d + movl %r12d,48(%rsp) - xorl %eax,%r13d rorl $9,%r14d + xorl %eax,%r13d xorl %ecx,%r15d - movl %r12d,48(%rsp) - xorl %r8d,%r14d - andl %eax,%r15d - rorl $5,%r13d addl %edx,%r12d - xorl %ecx,%r15d + xorl %r8d,%r14d + + addl (%rbp,%rdi,4),%r12d + andl %eax,%r15d + movl %r9d,%edx rorl $11,%r14d xorl %eax,%r13d - addl %r15d,%r12d + xorl %ecx,%r15d - movl %r8d,%r15d - addl (%rbp),%r12d + xorl %r10d,%edx xorl %r8d,%r14d + addl %r15d,%r12d + movl %r9d,%r15d - xorl %r9d,%r15d rorl $6,%r13d - movl %r9d,%edx + andl %r8d,%edx + andl %r10d,%r15d - andl %r15d,%edi rorl $2,%r14d addl %r13d,%r12d + addl %r15d,%edx - xorl %edi,%edx addl %r12d,%r11d addl %r12d,%edx + leaq 1(%rdi),%rdi + addl %r14d,%edx - leaq 4(%rbp),%rbp movl 56(%rsp),%r13d - movl 44(%rsp),%edi - + movl 44(%rsp),%r14d movl %r13d,%r12d - rorl $11,%r13d - addl %r14d,%edx - movl %edi,%r14d - rorl $2,%edi + movl %r14d,%r15d + + rorl $11,%r12d + xorl %r13d,%r12d + shrl $3,%r13d + rorl $7,%r12d xorl %r12d,%r13d - shrl $3,%r12d - rorl $7,%r13d - xorl %r14d,%edi + movl 24(%rsp),%r12d + + rorl $2,%r15d + xorl %r14d,%r15d shrl $10,%r14d - rorl $17,%edi - xorl %r13d,%r12d - xorl %r14d,%edi - addl 24(%rsp),%r12d + rorl $17,%r15d + addl %r13d,%r12d + xorl %r15d,%r14d addl 52(%rsp),%r12d movl %r11d,%r13d - addl %edi,%r12d + addl %r14d,%r12d movl %edx,%r14d rorl $14,%r13d - movl %eax,%edi + movl %eax,%r15d + movl %r12d,52(%rsp) - xorl %r11d,%r13d rorl $9,%r14d - xorl %ebx,%edi - - movl %r12d,52(%rsp) - xorl %edx,%r14d - andl %r11d,%edi + xorl %r11d,%r13d + xorl %ebx,%r15d rorl $5,%r13d addl %ecx,%r12d - xorl %ebx,%edi + xorl %edx,%r14d + + addl (%rbp,%rdi,4),%r12d + andl %r11d,%r15d + movl %r8d,%ecx rorl $11,%r14d xorl %r11d,%r13d - addl %edi,%r12d + xorl %ebx,%r15d - movl %edx,%edi - addl (%rbp),%r12d + xorl %r9d,%ecx xorl %edx,%r14d + addl %r15d,%r12d + movl %r8d,%r15d - xorl %r8d,%edi rorl $6,%r13d - movl %r8d,%ecx + andl %edx,%ecx + andl %r9d,%r15d - andl %edi,%r15d rorl $2,%r14d addl %r13d,%r12d + addl %r15d,%ecx - xorl %r15d,%ecx addl %r12d,%r10d addl %r12d,%ecx + leaq 1(%rdi),%rdi + addl %r14d,%ecx - leaq 4(%rbp),%rbp movl 60(%rsp),%r13d - movl 48(%rsp),%r15d - + movl 48(%rsp),%r14d movl %r13d,%r12d - rorl $11,%r13d - addl %r14d,%ecx - movl %r15d,%r14d - rorl $2,%r15d + movl %r14d,%r15d + + rorl $11,%r12d + xorl %r13d,%r12d + shrl $3,%r13d + rorl $7,%r12d xorl %r12d,%r13d - shrl $3,%r12d - rorl $7,%r13d + movl 28(%rsp),%r12d + + rorl $2,%r15d xorl %r14d,%r15d shrl $10,%r14d rorl $17,%r15d - xorl %r13d,%r12d - xorl %r14d,%r15d - addl 28(%rsp),%r12d + addl %r13d,%r12d + xorl %r15d,%r14d addl 56(%rsp),%r12d movl %r10d,%r13d - addl %r15d,%r12d + addl %r14d,%r12d movl %ecx,%r14d rorl $14,%r13d movl %r11d,%r15d + movl %r12d,56(%rsp) - xorl %r10d,%r13d rorl $9,%r14d + xorl %r10d,%r13d xorl %eax,%r15d - movl %r12d,56(%rsp) - xorl %ecx,%r14d - andl %r10d,%r15d - rorl $5,%r13d addl %ebx,%r12d - xorl %eax,%r15d + xorl %ecx,%r14d + + addl (%rbp,%rdi,4),%r12d + andl %r10d,%r15d + movl %edx,%ebx rorl $11,%r14d xorl %r10d,%r13d - addl %r15d,%r12d + xorl %eax,%r15d - movl %ecx,%r15d - addl (%rbp),%r12d + xorl %r8d,%ebx xorl %ecx,%r14d + addl %r15d,%r12d + movl %edx,%r15d - xorl %edx,%r15d rorl $6,%r13d - movl %edx,%ebx + andl %ecx,%ebx + andl %r8d,%r15d - andl %r15d,%edi rorl $2,%r14d addl %r13d,%r12d + addl %r15d,%ebx - xorl %edi,%ebx addl %r12d,%r9d addl %r12d,%ebx + leaq 1(%rdi),%rdi + addl %r14d,%ebx - leaq 4(%rbp),%rbp movl 0(%rsp),%r13d - movl 52(%rsp),%edi - + movl 52(%rsp),%r14d movl %r13d,%r12d - rorl $11,%r13d - addl %r14d,%ebx - movl %edi,%r14d - rorl $2,%edi + movl %r14d,%r15d + + rorl $11,%r12d + xorl %r13d,%r12d + shrl $3,%r13d + rorl $7,%r12d xorl %r12d,%r13d - shrl $3,%r12d - rorl $7,%r13d - xorl %r14d,%edi + movl 32(%rsp),%r12d + + rorl $2,%r15d + xorl %r14d,%r15d shrl $10,%r14d - rorl $17,%edi - xorl %r13d,%r12d - xorl %r14d,%edi - addl 32(%rsp),%r12d + rorl $17,%r15d + addl %r13d,%r12d + xorl %r15d,%r14d addl 60(%rsp),%r12d movl %r9d,%r13d - addl %edi,%r12d + addl %r14d,%r12d movl %ebx,%r14d rorl $14,%r13d - movl %r10d,%edi + movl %r10d,%r15d + movl %r12d,60(%rsp) - xorl %r9d,%r13d rorl $9,%r14d - xorl %r11d,%edi - - movl %r12d,60(%rsp) - xorl %ebx,%r14d - andl %r9d,%edi + xorl %r9d,%r13d + xorl %r11d,%r15d rorl $5,%r13d addl %eax,%r12d - xorl %r11d,%edi + xorl %ebx,%r14d + + addl (%rbp,%rdi,4),%r12d + andl %r9d,%r15d + movl %ecx,%eax rorl $11,%r14d xorl %r9d,%r13d - addl %edi,%r12d + xorl %r11d,%r15d - movl %ebx,%edi - addl (%rbp),%r12d + xorl %edx,%eax xorl %ebx,%r14d + addl %r15d,%r12d + movl %ecx,%r15d - xorl %ecx,%edi rorl $6,%r13d - movl %ecx,%eax + andl %ebx,%eax + andl %edx,%r15d - andl %edi,%r15d rorl $2,%r14d addl %r13d,%r12d + addl %r15d,%eax - xorl %r15d,%eax addl %r12d,%r8d addl %r12d,%eax + leaq 1(%rdi),%rdi + addl %r14d,%eax - leaq 20(%rbp),%rbp - cmpb $0,3(%rbp) - jnz L$rounds_16_xx + cmpq $64,%rdi + jb L$rounds_16_xx movq 64+0(%rsp),%rdi - addl %r14d,%eax leaq 64(%rsi),%rsi addl 0(%rdi),%eax @@ -1743,1139 +1800,19 @@ L$epilogue: K256: .long 0x428a2f98,0x71374491,0xb5c0fbcf,0xe9b5dba5 -.long 0x428a2f98,0x71374491,0xb5c0fbcf,0xe9b5dba5 -.long 0x3956c25b,0x59f111f1,0x923f82a4,0xab1c5ed5 .long 0x3956c25b,0x59f111f1,0x923f82a4,0xab1c5ed5 .long 0xd807aa98,0x12835b01,0x243185be,0x550c7dc3 -.long 0xd807aa98,0x12835b01,0x243185be,0x550c7dc3 -.long 0x72be5d74,0x80deb1fe,0x9bdc06a7,0xc19bf174 .long 0x72be5d74,0x80deb1fe,0x9bdc06a7,0xc19bf174 .long 0xe49b69c1,0xefbe4786,0x0fc19dc6,0x240ca1cc -.long 0xe49b69c1,0xefbe4786,0x0fc19dc6,0x240ca1cc -.long 0x2de92c6f,0x4a7484aa,0x5cb0a9dc,0x76f988da .long 0x2de92c6f,0x4a7484aa,0x5cb0a9dc,0x76f988da .long 0x983e5152,0xa831c66d,0xb00327c8,0xbf597fc7 -.long 0x983e5152,0xa831c66d,0xb00327c8,0xbf597fc7 -.long 0xc6e00bf3,0xd5a79147,0x06ca6351,0x14292967 .long 0xc6e00bf3,0xd5a79147,0x06ca6351,0x14292967 .long 0x27b70a85,0x2e1b2138,0x4d2c6dfc,0x53380d13 -.long 0x27b70a85,0x2e1b2138,0x4d2c6dfc,0x53380d13 -.long 0x650a7354,0x766a0abb,0x81c2c92e,0x92722c85 .long 0x650a7354,0x766a0abb,0x81c2c92e,0x92722c85 .long 0xa2bfe8a1,0xa81a664b,0xc24b8b70,0xc76c51a3 -.long 0xa2bfe8a1,0xa81a664b,0xc24b8b70,0xc76c51a3 -.long 0xd192e819,0xd6990624,0xf40e3585,0x106aa070 .long 0xd192e819,0xd6990624,0xf40e3585,0x106aa070 .long 0x19a4c116,0x1e376c08,0x2748774c,0x34b0bcb5 -.long 0x19a4c116,0x1e376c08,0x2748774c,0x34b0bcb5 -.long 0x391c0cb3,0x4ed8aa4a,0x5b9cca4f,0x682e6ff3 .long 0x391c0cb3,0x4ed8aa4a,0x5b9cca4f,0x682e6ff3 .long 0x748f82ee,0x78a5636f,0x84c87814,0x8cc70208 -.long 0x748f82ee,0x78a5636f,0x84c87814,0x8cc70208 -.long 0x90befffa,0xa4506ceb,0xbef9a3f7,0xc67178f2 .long 0x90befffa,0xa4506ceb,0xbef9a3f7,0xc67178f2 -.long 0x00010203,0x04050607,0x08090a0b,0x0c0d0e0f -.long 0x00010203,0x04050607,0x08090a0b,0x0c0d0e0f -.long 0x03020100,0x0b0a0908,0xffffffff,0xffffffff -.long 0x03020100,0x0b0a0908,0xffffffff,0xffffffff -.long 0xffffffff,0xffffffff,0x03020100,0x0b0a0908 -.long 0xffffffff,0xffffffff,0x03020100,0x0b0a0908 -.byte 83,72,65,50,53,54,32,98,108,111,99,107,32,116,114,97,110,115,102,111,114,109,32,102,111,114,32,120,56,54,95,54,52,44,32,67,82,89,80,84,79,71,65,77,83,32,98,121,32,60,97,112,112,114,111,64,111,112,101,110,115,115,108,46,111,114,103,62,0 - -.p2align 6 -sha256_block_data_order_ssse3: -L$ssse3_shortcut: - pushq %rbx - pushq %rbp - pushq %r12 - pushq %r13 - pushq %r14 - pushq %r15 - movq %rsp,%r11 - shlq $4,%rdx - subq $96,%rsp - leaq (%rsi,%rdx,4),%rdx - andq $-64,%rsp - movq %rdi,64+0(%rsp) - movq %rsi,64+8(%rsp) - movq %rdx,64+16(%rsp) - movq %r11,64+24(%rsp) -L$prologue_ssse3: - - movl 0(%rdi),%eax - movl 4(%rdi),%ebx - movl 8(%rdi),%ecx - movl 12(%rdi),%edx - movl 16(%rdi),%r8d - movl 20(%rdi),%r9d - movl 24(%rdi),%r10d - movl 28(%rdi),%r11d - - - jmp L$loop_ssse3 -.p2align 4 -L$loop_ssse3: - movdqa K256+512(%rip),%xmm7 - movdqu 0(%rsi),%xmm0 - movdqu 16(%rsi),%xmm1 - movdqu 32(%rsi),%xmm2 - movdqu 48(%rsi),%xmm3 -.byte 102,15,56,0,199 - leaq K256(%rip),%rbp -.byte 102,15,56,0,207 - movdqa 0(%rbp),%xmm4 -.byte 102,15,56,0,215 - movdqa 32(%rbp),%xmm5 - paddd %xmm0,%xmm4 - movdqa 64(%rbp),%xmm6 -.byte 102,15,56,0,223 - movdqa 96(%rbp),%xmm7 - paddd %xmm1,%xmm5 - paddd %xmm2,%xmm6 - paddd %xmm3,%xmm7 - movdqa %xmm4,0(%rsp) - movl %eax,%r14d - movdqa %xmm5,16(%rsp) - movl %ebx,%edi - movdqa %xmm6,32(%rsp) - xorl %ecx,%edi - movdqa %xmm7,48(%rsp) - movl %r8d,%r13d - jmp L$ssse3_00_47 - -.p2align 4 -L$ssse3_00_47: - subq $-32*4,%rbp - rorl $14,%r13d - movdqa %xmm1,%xmm4 - movl %r14d,%eax - movl %r9d,%r12d - movdqa %xmm3,%xmm7 - rorl $9,%r14d - xorl %r8d,%r13d - xorl %r10d,%r12d - rorl $5,%r13d - xorl %eax,%r14d -.byte 102,15,58,15,224,4 - andl %r8d,%r12d - xorl %r8d,%r13d -.byte 102,15,58,15,250,4 - addl 0(%rsp),%r11d - movl %eax,%r15d - xorl %r10d,%r12d - rorl $11,%r14d - movdqa %xmm4,%xmm5 - xorl %ebx,%r15d - addl %r12d,%r11d - movdqa %xmm4,%xmm6 - rorl $6,%r13d - andl %r15d,%edi - psrld $3,%xmm4 - xorl %eax,%r14d - addl %r13d,%r11d - xorl %ebx,%edi - paddd %xmm7,%xmm0 - rorl $2,%r14d - addl %r11d,%edx - psrld $7,%xmm6 - addl %edi,%r11d - movl %edx,%r13d - pshufd $250,%xmm3,%xmm7 - addl %r11d,%r14d - rorl $14,%r13d - pslld $14,%xmm5 - movl %r14d,%r11d - movl %r8d,%r12d - pxor %xmm6,%xmm4 - rorl $9,%r14d - xorl %edx,%r13d - xorl %r9d,%r12d - rorl $5,%r13d - psrld $11,%xmm6 - xorl %r11d,%r14d - pxor %xmm5,%xmm4 - andl %edx,%r12d - xorl %edx,%r13d - pslld $11,%xmm5 - addl 4(%rsp),%r10d - movl %r11d,%edi - pxor %xmm6,%xmm4 - xorl %r9d,%r12d - rorl $11,%r14d - movdqa %xmm7,%xmm6 - xorl %eax,%edi - addl %r12d,%r10d - pxor %xmm5,%xmm4 - rorl $6,%r13d - andl %edi,%r15d - xorl %r11d,%r14d - psrld $10,%xmm7 - addl %r13d,%r10d - xorl %eax,%r15d - paddd %xmm4,%xmm0 - rorl $2,%r14d - addl %r10d,%ecx - psrlq $17,%xmm6 - addl %r15d,%r10d - movl %ecx,%r13d - addl %r10d,%r14d - pxor %xmm6,%xmm7 - rorl $14,%r13d - movl %r14d,%r10d - movl %edx,%r12d - rorl $9,%r14d - psrlq $2,%xmm6 - xorl %ecx,%r13d - xorl %r8d,%r12d - pxor %xmm6,%xmm7 - rorl $5,%r13d - xorl %r10d,%r14d - andl %ecx,%r12d - pshufd $128,%xmm7,%xmm7 - xorl %ecx,%r13d - addl 8(%rsp),%r9d - movl %r10d,%r15d - psrldq $8,%xmm7 - xorl %r8d,%r12d - rorl $11,%r14d - xorl %r11d,%r15d - addl %r12d,%r9d - rorl $6,%r13d - paddd %xmm7,%xmm0 - andl %r15d,%edi - xorl %r10d,%r14d - addl %r13d,%r9d - pshufd $80,%xmm0,%xmm7 - xorl %r11d,%edi - rorl $2,%r14d - addl %r9d,%ebx - movdqa %xmm7,%xmm6 - addl %edi,%r9d - movl %ebx,%r13d - psrld $10,%xmm7 - addl %r9d,%r14d - rorl $14,%r13d - psrlq $17,%xmm6 - movl %r14d,%r9d - movl %ecx,%r12d - pxor %xmm6,%xmm7 - rorl $9,%r14d - xorl %ebx,%r13d - xorl %edx,%r12d - rorl $5,%r13d - xorl %r9d,%r14d - psrlq $2,%xmm6 - andl %ebx,%r12d - xorl %ebx,%r13d - addl 12(%rsp),%r8d - pxor %xmm6,%xmm7 - movl %r9d,%edi - xorl %edx,%r12d - rorl $11,%r14d - pshufd $8,%xmm7,%xmm7 - xorl %r10d,%edi - addl %r12d,%r8d - movdqa 0(%rbp),%xmm6 - rorl $6,%r13d - andl %edi,%r15d - pslldq $8,%xmm7 - xorl %r9d,%r14d - addl %r13d,%r8d - xorl %r10d,%r15d - paddd %xmm7,%xmm0 - rorl $2,%r14d - addl %r8d,%eax - addl %r15d,%r8d - paddd %xmm0,%xmm6 - movl %eax,%r13d - addl %r8d,%r14d - movdqa %xmm6,0(%rsp) - rorl $14,%r13d - movdqa %xmm2,%xmm4 - movl %r14d,%r8d - movl %ebx,%r12d - movdqa %xmm0,%xmm7 - rorl $9,%r14d - xorl %eax,%r13d - xorl %ecx,%r12d - rorl $5,%r13d - xorl %r8d,%r14d -.byte 102,15,58,15,225,4 - andl %eax,%r12d - xorl %eax,%r13d -.byte 102,15,58,15,251,4 - addl 16(%rsp),%edx - movl %r8d,%r15d - xorl %ecx,%r12d - rorl $11,%r14d - movdqa %xmm4,%xmm5 - xorl %r9d,%r15d - addl %r12d,%edx - movdqa %xmm4,%xmm6 - rorl $6,%r13d - andl %r15d,%edi - psrld $3,%xmm4 - xorl %r8d,%r14d - addl %r13d,%edx - xorl %r9d,%edi - paddd %xmm7,%xmm1 - rorl $2,%r14d - addl %edx,%r11d - psrld $7,%xmm6 - addl %edi,%edx - movl %r11d,%r13d - pshufd $250,%xmm0,%xmm7 - addl %edx,%r14d - rorl $14,%r13d - pslld $14,%xmm5 - movl %r14d,%edx - movl %eax,%r12d - pxor %xmm6,%xmm4 - rorl $9,%r14d - xorl %r11d,%r13d - xorl %ebx,%r12d - rorl $5,%r13d - psrld $11,%xmm6 - xorl %edx,%r14d - pxor %xmm5,%xmm4 - andl %r11d,%r12d - xorl %r11d,%r13d - pslld $11,%xmm5 - addl 20(%rsp),%ecx - movl %edx,%edi - pxor %xmm6,%xmm4 - xorl %ebx,%r12d - rorl $11,%r14d - movdqa %xmm7,%xmm6 - xorl %r8d,%edi - addl %r12d,%ecx - pxor %xmm5,%xmm4 - rorl $6,%r13d - andl %edi,%r15d - xorl %edx,%r14d - psrld $10,%xmm7 - addl %r13d,%ecx - xorl %r8d,%r15d - paddd %xmm4,%xmm1 - rorl $2,%r14d - addl %ecx,%r10d - psrlq $17,%xmm6 - addl %r15d,%ecx - movl %r10d,%r13d - addl %ecx,%r14d - pxor %xmm6,%xmm7 - rorl $14,%r13d - movl %r14d,%ecx - movl %r11d,%r12d - rorl $9,%r14d - psrlq $2,%xmm6 - xorl %r10d,%r13d - xorl %eax,%r12d - pxor %xmm6,%xmm7 - rorl $5,%r13d - xorl %ecx,%r14d - andl %r10d,%r12d - pshufd $128,%xmm7,%xmm7 - xorl %r10d,%r13d - addl 24(%rsp),%ebx - movl %ecx,%r15d - psrldq $8,%xmm7 - xorl %eax,%r12d - rorl $11,%r14d - xorl %edx,%r15d - addl %r12d,%ebx - rorl $6,%r13d - paddd %xmm7,%xmm1 - andl %r15d,%edi - xorl %ecx,%r14d - addl %r13d,%ebx - pshufd $80,%xmm1,%xmm7 - xorl %edx,%edi - rorl $2,%r14d - addl %ebx,%r9d - movdqa %xmm7,%xmm6 - addl %edi,%ebx - movl %r9d,%r13d - psrld $10,%xmm7 - addl %ebx,%r14d - rorl $14,%r13d - psrlq $17,%xmm6 - movl %r14d,%ebx - movl %r10d,%r12d - pxor %xmm6,%xmm7 - rorl $9,%r14d - xorl %r9d,%r13d - xorl %r11d,%r12d - rorl $5,%r13d - xorl %ebx,%r14d - psrlq $2,%xmm6 - andl %r9d,%r12d - xorl %r9d,%r13d - addl 28(%rsp),%eax - pxor %xmm6,%xmm7 - movl %ebx,%edi - xorl %r11d,%r12d - rorl $11,%r14d - pshufd $8,%xmm7,%xmm7 - xorl %ecx,%edi - addl %r12d,%eax - movdqa 32(%rbp),%xmm6 - rorl $6,%r13d - andl %edi,%r15d - pslldq $8,%xmm7 - xorl %ebx,%r14d - addl %r13d,%eax - xorl %ecx,%r15d - paddd %xmm7,%xmm1 - rorl $2,%r14d - addl %eax,%r8d - addl %r15d,%eax - paddd %xmm1,%xmm6 - movl %r8d,%r13d - addl %eax,%r14d - movdqa %xmm6,16(%rsp) - rorl $14,%r13d - movdqa %xmm3,%xmm4 - movl %r14d,%eax - movl %r9d,%r12d - movdqa %xmm1,%xmm7 - rorl $9,%r14d - xorl %r8d,%r13d - xorl %r10d,%r12d - rorl $5,%r13d - xorl %eax,%r14d -.byte 102,15,58,15,226,4 - andl %r8d,%r12d - xorl %r8d,%r13d -.byte 102,15,58,15,248,4 - addl 32(%rsp),%r11d - movl %eax,%r15d - xorl %r10d,%r12d - rorl $11,%r14d - movdqa %xmm4,%xmm5 - xorl %ebx,%r15d - addl %r12d,%r11d - movdqa %xmm4,%xmm6 - rorl $6,%r13d - andl %r15d,%edi - psrld $3,%xmm4 - xorl %eax,%r14d - addl %r13d,%r11d - xorl %ebx,%edi - paddd %xmm7,%xmm2 - rorl $2,%r14d - addl %r11d,%edx - psrld $7,%xmm6 - addl %edi,%r11d - movl %edx,%r13d - pshufd $250,%xmm1,%xmm7 - addl %r11d,%r14d - rorl $14,%r13d - pslld $14,%xmm5 - movl %r14d,%r11d - movl %r8d,%r12d - pxor %xmm6,%xmm4 - rorl $9,%r14d - xorl %edx,%r13d - xorl %r9d,%r12d - rorl $5,%r13d - psrld $11,%xmm6 - xorl %r11d,%r14d - pxor %xmm5,%xmm4 - andl %edx,%r12d - xorl %edx,%r13d - pslld $11,%xmm5 - addl 36(%rsp),%r10d - movl %r11d,%edi - pxor %xmm6,%xmm4 - xorl %r9d,%r12d - rorl $11,%r14d - movdqa %xmm7,%xmm6 - xorl %eax,%edi - addl %r12d,%r10d - pxor %xmm5,%xmm4 - rorl $6,%r13d - andl %edi,%r15d - xorl %r11d,%r14d - psrld $10,%xmm7 - addl %r13d,%r10d - xorl %eax,%r15d - paddd %xmm4,%xmm2 - rorl $2,%r14d - addl %r10d,%ecx - psrlq $17,%xmm6 - addl %r15d,%r10d - movl %ecx,%r13d - addl %r10d,%r14d - pxor %xmm6,%xmm7 - rorl $14,%r13d - movl %r14d,%r10d - movl %edx,%r12d - rorl $9,%r14d - psrlq $2,%xmm6 - xorl %ecx,%r13d - xorl %r8d,%r12d - pxor %xmm6,%xmm7 - rorl $5,%r13d - xorl %r10d,%r14d - andl %ecx,%r12d - pshufd $128,%xmm7,%xmm7 - xorl %ecx,%r13d - addl 40(%rsp),%r9d - movl %r10d,%r15d - psrldq $8,%xmm7 - xorl %r8d,%r12d - rorl $11,%r14d - xorl %r11d,%r15d - addl %r12d,%r9d - rorl $6,%r13d - paddd %xmm7,%xmm2 - andl %r15d,%edi - xorl %r10d,%r14d - addl %r13d,%r9d - pshufd $80,%xmm2,%xmm7 - xorl %r11d,%edi - rorl $2,%r14d - addl %r9d,%ebx - movdqa %xmm7,%xmm6 - addl %edi,%r9d - movl %ebx,%r13d - psrld $10,%xmm7 - addl %r9d,%r14d - rorl $14,%r13d - psrlq $17,%xmm6 - movl %r14d,%r9d - movl %ecx,%r12d - pxor %xmm6,%xmm7 - rorl $9,%r14d - xorl %ebx,%r13d - xorl %edx,%r12d - rorl $5,%r13d - xorl %r9d,%r14d - psrlq $2,%xmm6 - andl %ebx,%r12d - xorl %ebx,%r13d - addl 44(%rsp),%r8d - pxor %xmm6,%xmm7 - movl %r9d,%edi - xorl %edx,%r12d - rorl $11,%r14d - pshufd $8,%xmm7,%xmm7 - xorl %r10d,%edi - addl %r12d,%r8d - movdqa 64(%rbp),%xmm6 - rorl $6,%r13d - andl %edi,%r15d - pslldq $8,%xmm7 - xorl %r9d,%r14d - addl %r13d,%r8d - xorl %r10d,%r15d - paddd %xmm7,%xmm2 - rorl $2,%r14d - addl %r8d,%eax - addl %r15d,%r8d - paddd %xmm2,%xmm6 - movl %eax,%r13d - addl %r8d,%r14d - movdqa %xmm6,32(%rsp) - rorl $14,%r13d - movdqa %xmm0,%xmm4 - movl %r14d,%r8d - movl %ebx,%r12d - movdqa %xmm2,%xmm7 - rorl $9,%r14d - xorl %eax,%r13d - xorl %ecx,%r12d - rorl $5,%r13d - xorl %r8d,%r14d -.byte 102,15,58,15,227,4 - andl %eax,%r12d - xorl %eax,%r13d -.byte 102,15,58,15,249,4 - addl 48(%rsp),%edx - movl %r8d,%r15d - xorl %ecx,%r12d - rorl $11,%r14d - movdqa %xmm4,%xmm5 - xorl %r9d,%r15d - addl %r12d,%edx - movdqa %xmm4,%xmm6 - rorl $6,%r13d - andl %r15d,%edi - psrld $3,%xmm4 - xorl %r8d,%r14d - addl %r13d,%edx - xorl %r9d,%edi - paddd %xmm7,%xmm3 - rorl $2,%r14d - addl %edx,%r11d - psrld $7,%xmm6 - addl %edi,%edx - movl %r11d,%r13d - pshufd $250,%xmm2,%xmm7 - addl %edx,%r14d - rorl $14,%r13d - pslld $14,%xmm5 - movl %r14d,%edx - movl %eax,%r12d - pxor %xmm6,%xmm4 - rorl $9,%r14d - xorl %r11d,%r13d - xorl %ebx,%r12d - rorl $5,%r13d - psrld $11,%xmm6 - xorl %edx,%r14d - pxor %xmm5,%xmm4 - andl %r11d,%r12d - xorl %r11d,%r13d - pslld $11,%xmm5 - addl 52(%rsp),%ecx - movl %edx,%edi - pxor %xmm6,%xmm4 - xorl %ebx,%r12d - rorl $11,%r14d - movdqa %xmm7,%xmm6 - xorl %r8d,%edi - addl %r12d,%ecx - pxor %xmm5,%xmm4 - rorl $6,%r13d - andl %edi,%r15d - xorl %edx,%r14d - psrld $10,%xmm7 - addl %r13d,%ecx - xorl %r8d,%r15d - paddd %xmm4,%xmm3 - rorl $2,%r14d - addl %ecx,%r10d - psrlq $17,%xmm6 - addl %r15d,%ecx - movl %r10d,%r13d - addl %ecx,%r14d - pxor %xmm6,%xmm7 - rorl $14,%r13d - movl %r14d,%ecx - movl %r11d,%r12d - rorl $9,%r14d - psrlq $2,%xmm6 - xorl %r10d,%r13d - xorl %eax,%r12d - pxor %xmm6,%xmm7 - rorl $5,%r13d - xorl %ecx,%r14d - andl %r10d,%r12d - pshufd $128,%xmm7,%xmm7 - xorl %r10d,%r13d - addl 56(%rsp),%ebx - movl %ecx,%r15d - psrldq $8,%xmm7 - xorl %eax,%r12d - rorl $11,%r14d - xorl %edx,%r15d - addl %r12d,%ebx - rorl $6,%r13d - paddd %xmm7,%xmm3 - andl %r15d,%edi - xorl %ecx,%r14d - addl %r13d,%ebx - pshufd $80,%xmm3,%xmm7 - xorl %edx,%edi - rorl $2,%r14d - addl %ebx,%r9d - movdqa %xmm7,%xmm6 - addl %edi,%ebx - movl %r9d,%r13d - psrld $10,%xmm7 - addl %ebx,%r14d - rorl $14,%r13d - psrlq $17,%xmm6 - movl %r14d,%ebx - movl %r10d,%r12d - pxor %xmm6,%xmm7 - rorl $9,%r14d - xorl %r9d,%r13d - xorl %r11d,%r12d - rorl $5,%r13d - xorl %ebx,%r14d - psrlq $2,%xmm6 - andl %r9d,%r12d - xorl %r9d,%r13d - addl 60(%rsp),%eax - pxor %xmm6,%xmm7 - movl %ebx,%edi - xorl %r11d,%r12d - rorl $11,%r14d - pshufd $8,%xmm7,%xmm7 - xorl %ecx,%edi - addl %r12d,%eax - movdqa 96(%rbp),%xmm6 - rorl $6,%r13d - andl %edi,%r15d - pslldq $8,%xmm7 - xorl %ebx,%r14d - addl %r13d,%eax - xorl %ecx,%r15d - paddd %xmm7,%xmm3 - rorl $2,%r14d - addl %eax,%r8d - addl %r15d,%eax - paddd %xmm3,%xmm6 - movl %r8d,%r13d - addl %eax,%r14d - movdqa %xmm6,48(%rsp) - cmpb $0,131(%rbp) - jne L$ssse3_00_47 - rorl $14,%r13d - movl %r14d,%eax - movl %r9d,%r12d - rorl $9,%r14d - xorl %r8d,%r13d - xorl %r10d,%r12d - rorl $5,%r13d - xorl %eax,%r14d - andl %r8d,%r12d - xorl %r8d,%r13d - addl 0(%rsp),%r11d - movl %eax,%r15d - xorl %r10d,%r12d - rorl $11,%r14d - xorl %ebx,%r15d - addl %r12d,%r11d - rorl $6,%r13d - andl %r15d,%edi - xorl %eax,%r14d - addl %r13d,%r11d - xorl %ebx,%edi - rorl $2,%r14d - addl %r11d,%edx - addl %edi,%r11d - movl %edx,%r13d - addl %r11d,%r14d - rorl $14,%r13d - movl %r14d,%r11d - movl %r8d,%r12d - rorl $9,%r14d - xorl %edx,%r13d - xorl %r9d,%r12d - rorl $5,%r13d - xorl %r11d,%r14d - andl %edx,%r12d - xorl %edx,%r13d - addl 4(%rsp),%r10d - movl %r11d,%edi - xorl %r9d,%r12d - rorl $11,%r14d - xorl %eax,%edi - addl %r12d,%r10d - rorl $6,%r13d - andl %edi,%r15d - xorl %r11d,%r14d - addl %r13d,%r10d - xorl %eax,%r15d - rorl $2,%r14d - addl %r10d,%ecx - addl %r15d,%r10d - movl %ecx,%r13d - addl %r10d,%r14d - rorl $14,%r13d - movl %r14d,%r10d - movl %edx,%r12d - rorl $9,%r14d - xorl %ecx,%r13d - xorl %r8d,%r12d - rorl $5,%r13d - xorl %r10d,%r14d - andl %ecx,%r12d - xorl %ecx,%r13d - addl 8(%rsp),%r9d - movl %r10d,%r15d - xorl %r8d,%r12d - rorl $11,%r14d - xorl %r11d,%r15d - addl %r12d,%r9d - rorl $6,%r13d - andl %r15d,%edi - xorl %r10d,%r14d - addl %r13d,%r9d - xorl %r11d,%edi - rorl $2,%r14d - addl %r9d,%ebx - addl %edi,%r9d - movl %ebx,%r13d - addl %r9d,%r14d - rorl $14,%r13d - movl %r14d,%r9d - movl %ecx,%r12d - rorl $9,%r14d - xorl %ebx,%r13d - xorl %edx,%r12d - rorl $5,%r13d - xorl %r9d,%r14d - andl %ebx,%r12d - xorl %ebx,%r13d - addl 12(%rsp),%r8d - movl %r9d,%edi - xorl %edx,%r12d - rorl $11,%r14d - xorl %r10d,%edi - addl %r12d,%r8d - rorl $6,%r13d - andl %edi,%r15d - xorl %r9d,%r14d - addl %r13d,%r8d - xorl %r10d,%r15d - rorl $2,%r14d - addl %r8d,%eax - addl %r15d,%r8d - movl %eax,%r13d - addl %r8d,%r14d - rorl $14,%r13d - movl %r14d,%r8d - movl %ebx,%r12d - rorl $9,%r14d - xorl %eax,%r13d - xorl %ecx,%r12d - rorl $5,%r13d - xorl %r8d,%r14d - andl %eax,%r12d - xorl %eax,%r13d - addl 16(%rsp),%edx - movl %r8d,%r15d - xorl %ecx,%r12d - rorl $11,%r14d - xorl %r9d,%r15d - addl %r12d,%edx - rorl $6,%r13d - andl %r15d,%edi - xorl %r8d,%r14d - addl %r13d,%edx - xorl %r9d,%edi - rorl $2,%r14d - addl %edx,%r11d - addl %edi,%edx - movl %r11d,%r13d - addl %edx,%r14d - rorl $14,%r13d - movl %r14d,%edx - movl %eax,%r12d - rorl $9,%r14d - xorl %r11d,%r13d - xorl %ebx,%r12d - rorl $5,%r13d - xorl %edx,%r14d - andl %r11d,%r12d - xorl %r11d,%r13d - addl 20(%rsp),%ecx - movl %edx,%edi - xorl %ebx,%r12d - rorl $11,%r14d - xorl %r8d,%edi - addl %r12d,%ecx - rorl $6,%r13d - andl %edi,%r15d - xorl %edx,%r14d - addl %r13d,%ecx - xorl %r8d,%r15d - rorl $2,%r14d - addl %ecx,%r10d - addl %r15d,%ecx - movl %r10d,%r13d - addl %ecx,%r14d - rorl $14,%r13d - movl %r14d,%ecx - movl %r11d,%r12d - rorl $9,%r14d - xorl %r10d,%r13d - xorl %eax,%r12d - rorl $5,%r13d - xorl %ecx,%r14d - andl %r10d,%r12d - xorl %r10d,%r13d - addl 24(%rsp),%ebx - movl %ecx,%r15d - xorl %eax,%r12d - rorl $11,%r14d - xorl %edx,%r15d - addl %r12d,%ebx - rorl $6,%r13d - andl %r15d,%edi - xorl %ecx,%r14d - addl %r13d,%ebx - xorl %edx,%edi - rorl $2,%r14d - addl %ebx,%r9d - addl %edi,%ebx - movl %r9d,%r13d - addl %ebx,%r14d - rorl $14,%r13d - movl %r14d,%ebx - movl %r10d,%r12d - rorl $9,%r14d - xorl %r9d,%r13d - xorl %r11d,%r12d - rorl $5,%r13d - xorl %ebx,%r14d - andl %r9d,%r12d - xorl %r9d,%r13d - addl 28(%rsp),%eax - movl %ebx,%edi - xorl %r11d,%r12d - rorl $11,%r14d - xorl %ecx,%edi - addl %r12d,%eax - rorl $6,%r13d - andl %edi,%r15d - xorl %ebx,%r14d - addl %r13d,%eax - xorl %ecx,%r15d - rorl $2,%r14d - addl %eax,%r8d - addl %r15d,%eax - movl %r8d,%r13d - addl %eax,%r14d - rorl $14,%r13d - movl %r14d,%eax - movl %r9d,%r12d - rorl $9,%r14d - xorl %r8d,%r13d - xorl %r10d,%r12d - rorl $5,%r13d - xorl %eax,%r14d - andl %r8d,%r12d - xorl %r8d,%r13d - addl 32(%rsp),%r11d - movl %eax,%r15d - xorl %r10d,%r12d - rorl $11,%r14d - xorl %ebx,%r15d - addl %r12d,%r11d - rorl $6,%r13d - andl %r15d,%edi - xorl %eax,%r14d - addl %r13d,%r11d - xorl %ebx,%edi - rorl $2,%r14d - addl %r11d,%edx - addl %edi,%r11d - movl %edx,%r13d - addl %r11d,%r14d - rorl $14,%r13d - movl %r14d,%r11d - movl %r8d,%r12d - rorl $9,%r14d - xorl %edx,%r13d - xorl %r9d,%r12d - rorl $5,%r13d - xorl %r11d,%r14d - andl %edx,%r12d - xorl %edx,%r13d - addl 36(%rsp),%r10d - movl %r11d,%edi - xorl %r9d,%r12d - rorl $11,%r14d - xorl %eax,%edi - addl %r12d,%r10d - rorl $6,%r13d - andl %edi,%r15d - xorl %r11d,%r14d - addl %r13d,%r10d - xorl %eax,%r15d - rorl $2,%r14d - addl %r10d,%ecx - addl %r15d,%r10d - movl %ecx,%r13d - addl %r10d,%r14d - rorl $14,%r13d - movl %r14d,%r10d - movl %edx,%r12d - rorl $9,%r14d - xorl %ecx,%r13d - xorl %r8d,%r12d - rorl $5,%r13d - xorl %r10d,%r14d - andl %ecx,%r12d - xorl %ecx,%r13d - addl 40(%rsp),%r9d - movl %r10d,%r15d - xorl %r8d,%r12d - rorl $11,%r14d - xorl %r11d,%r15d - addl %r12d,%r9d - rorl $6,%r13d - andl %r15d,%edi - xorl %r10d,%r14d - addl %r13d,%r9d - xorl %r11d,%edi - rorl $2,%r14d - addl %r9d,%ebx - addl %edi,%r9d - movl %ebx,%r13d - addl %r9d,%r14d - rorl $14,%r13d - movl %r14d,%r9d - movl %ecx,%r12d - rorl $9,%r14d - xorl %ebx,%r13d - xorl %edx,%r12d - rorl $5,%r13d - xorl %r9d,%r14d - andl %ebx,%r12d - xorl %ebx,%r13d - addl 44(%rsp),%r8d - movl %r9d,%edi - xorl %edx,%r12d - rorl $11,%r14d - xorl %r10d,%edi - addl %r12d,%r8d - rorl $6,%r13d - andl %edi,%r15d - xorl %r9d,%r14d - addl %r13d,%r8d - xorl %r10d,%r15d - rorl $2,%r14d - addl %r8d,%eax - addl %r15d,%r8d - movl %eax,%r13d - addl %r8d,%r14d - rorl $14,%r13d - movl %r14d,%r8d - movl %ebx,%r12d - rorl $9,%r14d - xorl %eax,%r13d - xorl %ecx,%r12d - rorl $5,%r13d - xorl %r8d,%r14d - andl %eax,%r12d - xorl %eax,%r13d - addl 48(%rsp),%edx - movl %r8d,%r15d - xorl %ecx,%r12d - rorl $11,%r14d - xorl %r9d,%r15d - addl %r12d,%edx - rorl $6,%r13d - andl %r15d,%edi - xorl %r8d,%r14d - addl %r13d,%edx - xorl %r9d,%edi - rorl $2,%r14d - addl %edx,%r11d - addl %edi,%edx - movl %r11d,%r13d - addl %edx,%r14d - rorl $14,%r13d - movl %r14d,%edx - movl %eax,%r12d - rorl $9,%r14d - xorl %r11d,%r13d - xorl %ebx,%r12d - rorl $5,%r13d - xorl %edx,%r14d - andl %r11d,%r12d - xorl %r11d,%r13d - addl 52(%rsp),%ecx - movl %edx,%edi - xorl %ebx,%r12d - rorl $11,%r14d - xorl %r8d,%edi - addl %r12d,%ecx - rorl $6,%r13d - andl %edi,%r15d - xorl %edx,%r14d - addl %r13d,%ecx - xorl %r8d,%r15d - rorl $2,%r14d - addl %ecx,%r10d - addl %r15d,%ecx - movl %r10d,%r13d - addl %ecx,%r14d - rorl $14,%r13d - movl %r14d,%ecx - movl %r11d,%r12d - rorl $9,%r14d - xorl %r10d,%r13d - xorl %eax,%r12d - rorl $5,%r13d - xorl %ecx,%r14d - andl %r10d,%r12d - xorl %r10d,%r13d - addl 56(%rsp),%ebx - movl %ecx,%r15d - xorl %eax,%r12d - rorl $11,%r14d - xorl %edx,%r15d - addl %r12d,%ebx - rorl $6,%r13d - andl %r15d,%edi - xorl %ecx,%r14d - addl %r13d,%ebx - xorl %edx,%edi - rorl $2,%r14d - addl %ebx,%r9d - addl %edi,%ebx - movl %r9d,%r13d - addl %ebx,%r14d - rorl $14,%r13d - movl %r14d,%ebx - movl %r10d,%r12d - rorl $9,%r14d - xorl %r9d,%r13d - xorl %r11d,%r12d - rorl $5,%r13d - xorl %ebx,%r14d - andl %r9d,%r12d - xorl %r9d,%r13d - addl 60(%rsp),%eax - movl %ebx,%edi - xorl %r11d,%r12d - rorl $11,%r14d - xorl %ecx,%edi - addl %r12d,%eax - rorl $6,%r13d - andl %edi,%r15d - xorl %ebx,%r14d - addl %r13d,%eax - xorl %ecx,%r15d - rorl $2,%r14d - addl %eax,%r8d - addl %r15d,%eax - movl %r8d,%r13d - addl %eax,%r14d - movq 64+0(%rsp),%rdi - movl %r14d,%eax - - addl 0(%rdi),%eax - leaq 64(%rsi),%rsi - addl 4(%rdi),%ebx - addl 8(%rdi),%ecx - addl 12(%rdi),%edx - addl 16(%rdi),%r8d - addl 20(%rdi),%r9d - addl 24(%rdi),%r10d - addl 28(%rdi),%r11d - - cmpq 64+16(%rsp),%rsi - - movl %eax,0(%rdi) - movl %ebx,4(%rdi) - movl %ecx,8(%rdi) - movl %edx,12(%rdi) - movl %r8d,16(%rdi) - movl %r9d,20(%rdi) - movl %r10d,24(%rdi) - movl %r11d,28(%rdi) - jb L$loop_ssse3 - - movq 64+24(%rsp),%rsi - movq (%rsi),%r15 - movq 8(%rsi),%r14 - movq 16(%rsi),%r13 - movq 24(%rsi),%r12 - movq 32(%rsi),%rbp - movq 40(%rsi),%rbx - leaq 48(%rsi),%rsp -L$epilogue_ssse3: - .byte 0xf3,0xc3 - - -.section .note.GNU-stack,"",%progbits