2015-01-11 Niels Möller <nisse@lysator.liu.se>
+ * x86_64/aesni/aes-decrypt-internal.asm: New file.
+ * x86_64/aesni/aes-encrypt-internal.asm: New file.
+ * configure.ac: New configure flag --enable-x86-aesni.
+
* aclocal.m4 (LSH_RPATH_INIT): Handle freebsd, in the same way as
gnu/linux, with -Wl,-rpath,.
AC_HELP_STRING([--enable-arm-neon], [Enable ARM Neon assembly. (default=auto)]),,
[enable_arm_neon=auto])
+AC_ARG_ENABLE(x86-aesni,
+ AC_HELP_STRING([--enable-x86-aesni], [Enable x86_64 aes instructions. (default=no)]),,
+ [enable_x86_aesni=no])
+
AC_ARG_ENABLE(mini-gmp,
AC_HELP_STRING([--enable-mini-gmp], [Enable mini-gmp, used instead of libgmp.]),,
[enable_mini_gmp=no])
[x86_64 | amd64])
if test "$ABI" = 64 ; then
asm_path=x86_64
+ if test "x$enable_x86_aesni" = xyes ; then
+ asm_path="x86_64/aesni $asm_path"
+ fi
else
asm_path=x86
fi
--- /dev/null
+C x86_64/aesni/aes-decrypt-internal.asm
+
+
+ifelse(<
+ Copyright (C) 2015 Niels Möller
+
+ This file is part of GNU Nettle.
+
+ GNU Nettle is free software: you can redistribute it and/or
+ modify it under the terms of either:
+
+ * the GNU Lesser General Public License as published by the Free
+ Software Foundation; either version 3 of the License, or (at your
+ option) any later version.
+
+ or
+
+ * the GNU General Public License as published by the Free
+ Software Foundation; either version 2 of the License, or (at your
+ option) any later version.
+
+ or both in parallel, as here.
+
+ GNU Nettle is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ General Public License for more details.
+
+ You should have received copies of the GNU General Public License and
+ the GNU Lesser General Public License along with this program. If
+ not, see http://www.gnu.org/licenses/.
+>)
+
+C Input argument
+define(<ROUNDS>, <%rdi>)
+define(<KEYS>, <%rsi>)
+C define(<TABLE>, <%rdx>) C Unused here
+define(<LENGTH>,<%rcx>)
+define(<DST>, <%r8>)
+define(<SRC>, <%r9>)
+
+C Round counter
+define(<CNT>, <%rdx>)
+C Subkey pointer
+define(<KEY>, <%rax>)
+
+ .arch bdver2
+ .file "aes-decrypt-internal.asm"
+
+ C _aes_decrypt(unsigned rounds, const uint32_t *keys,
+ C const struct aes_table *T,
+ C size_t length, uint8_t *dst,
+ C uint8_t *src)
+ .text
+ ALIGN(16)
+PROLOGUE(_nettle_aes_decrypt)
+ W64_ENTRY(6, 2)
+ shr $4, LENGTH
+ test LENGTH, LENGTH
+ jz .Lend
+
+ decl XREG(ROUNDS)
+
+.Lblock_loop:
+ mov ROUNDS, CNT
+ mov KEYS, KEY
+ movups (SRC), %xmm0
+ C FIXME: Better alignment of subkeys, so we can use movaps.
+ movups (KEY), %xmm1
+ pxor %xmm1, %xmm0
+
+ C FIXME: Could use some unrolling. Also all subkeys fit in
+ C registers, so they could be loaded once (on W64 we would
+ C need to save and restore some xmm registers, though).
+
+.Lround_loop:
+ add $16, KEY
+
+ movups (KEY), %xmm1
+ aesdec %xmm1, %xmm0
+ decl XREG(CNT)
+ jnz .Lround_loop
+
+ movups 16(KEY), %xmm1
+ aesdeclast %xmm1, %xmm0
+
+ movups %xmm0, (DST)
+ add $16, SRC
+ add $16, DST
+ dec LENGTH
+ jnz .Lblock_loop
+
+.Lend:
+ W64_EXIT(6, 2)
+ ret
+EPILOGUE(_nettle_aes_decrypt)
--- /dev/null
+C x86_64/aesni/aes-encrypt-internal.asm
+
+
+ifelse(<
+ Copyright (C) 2015 Niels Möller
+
+ This file is part of GNU Nettle.
+
+ GNU Nettle is free software: you can redistribute it and/or
+ modify it under the terms of either:
+
+ * the GNU Lesser General Public License as published by the Free
+ Software Foundation; either version 3 of the License, or (at your
+ option) any later version.
+
+ or
+
+ * the GNU General Public License as published by the Free
+ Software Foundation; either version 2 of the License, or (at your
+ option) any later version.
+
+ or both in parallel, as here.
+
+ GNU Nettle is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ General Public License for more details.
+
+ You should have received copies of the GNU General Public License and
+ the GNU Lesser General Public License along with this program. If
+ not, see http://www.gnu.org/licenses/.
+>)
+
+C Input argument
+define(<ROUNDS>, <%rdi>)
+define(<KEYS>, <%rsi>)
+C define(<TABLE>, <%rdx>) C Unused here
+define(<LENGTH>,<%rcx>)
+define(<DST>, <%r8>)
+define(<SRC>, <%r9>)
+
+C Round counter
+define(<CNT>, <%rdx>)
+C Subkey pointer
+define(<KEY>, <%rax>)
+
+ .arch bdver2
+ .file "aes-encrypt-internal.asm"
+
+ C _aes_encrypt(unsigned rounds, const uint32_t *keys,
+ C const struct aes_table *T,
+ C size_t length, uint8_t *dst,
+ C uint8_t *src)
+ .text
+ ALIGN(16)
+PROLOGUE(_nettle_aes_encrypt)
+ W64_ENTRY(6, 2)
+ shr $4, LENGTH
+ test LENGTH, LENGTH
+ jz .Lend
+
+ decl XREG(ROUNDS)
+
+.Lblock_loop:
+ mov ROUNDS, CNT
+ mov KEYS, KEY
+ movups (SRC), %xmm0
+ C FIXME: Better alignment of subkeys, so we can use movaps.
+ movups (KEY), %xmm1
+ pxor %xmm1, %xmm0
+
+ C FIXME: Could use some unrolling. Also all subkeys fit in
+ C registers, so they could be loaded once (on W64 we would
+ C need to save and restore some xmm registers, though).
+
+.Lround_loop:
+ add $16, KEY
+
+ movups (KEY), %xmm1
+ aesenc %xmm1, %xmm0
+ decl XREG(CNT)
+ jnz .Lround_loop
+
+ movups 16(KEY), %xmm1
+ aesenclast %xmm1, %xmm0
+
+ movups %xmm0, (DST)
+ add $16, SRC
+ add $16, DST
+ dec LENGTH
+ jnz .Lblock_loop
+
+.Lend:
+ W64_EXIT(6, 2)
+ ret
+EPILOGUE(_nettle_aes_encrypt)