From 3a4e808e1052f28910f0ebc132467b8585f00621 Mon Sep 17 00:00:00 2001 From: =?utf8?q?Niels=20M=C3=B6ller?= Date: Fri, 22 Aug 2014 21:00:14 +0200 Subject: [PATCH] Initial x86_64 implementation of ecc_25519_modp, 30% speedup. --- ChangeLog | 3 ++ x86_64/ecc-25519-modp.asm | 97 +++++++++++++++++++++++++++++++++++++++ 2 files changed, 100 insertions(+) create mode 100644 x86_64/ecc-25519-modp.asm diff --git a/ChangeLog b/ChangeLog index a0d83c88..b76e602e 100644 --- a/ChangeLog +++ b/ChangeLog @@ -1,5 +1,8 @@ 2014-08-22 Niels Möller + * x86_64/ecc-25519-modp.asm: New file. Initial assembly + implementation, 30% speedup of ecc_25519_modp. + * ecc-25519.c [HAVE_NATIVE_ecc_25519_modp]: Use assembly version if available. diff --git a/x86_64/ecc-25519-modp.asm b/x86_64/ecc-25519-modp.asm new file mode 100644 index 00000000..b09262d1 --- /dev/null +++ b/x86_64/ecc-25519-modp.asm @@ -0,0 +1,97 @@ +C x86_64/ecc-25519-modp.asm + +ifelse(< + Copyright (C) 2014 Niels Möller + + This file is part of GNU Nettle. + + GNU Nettle is free software: you can redistribute it and/or + modify it under the terms of either: + + * the GNU Lesser General Public License as published by the Free + Software Foundation; either version 3 of the License, or (at your + option) any later version. + + or + + * the GNU General Public License as published by the Free + Software Foundation; either version 2 of the License, or (at your + option) any later version. + + or both in parallel, as here. + + GNU Nettle is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + General Public License for more details. + + You should have received copies of the GNU General Public License and + the GNU Lesser General Public License along with this program. If + not, see http://www.gnu.org/licenses/. +>) + + .file "ecc-25519-modp.asm" + +define(, <%rsi>) +define(, <%rdi>) C Overlaps unused ecc input +define(, <%rcx>) +define(, <%r8>) +define(, <%r9>) +define(, <%r10>) +define(, <%r11>) +define(, <%r12>) + +PROLOGUE(nettle_ecc_25519_modp) + W64_ENTRY(2, 0) + push %r12 + + mov $38, M + mov 32(RP), %rax + mul M + mov %rax, U0 + mov %rdx, V1 + + mov 40(RP), %rax + mul M + mov %rax, U1 + mov %rdx, V2 + + mov 48(RP), %rax + mul M + mov %rax, U2 + mov %rdx, V3 + + mov 56(RP), %rax + mul M + + add V1, U1 + adc V2, U2 + adc V3, %rax + adc $0, %rdx + + shr M + C FIXME: Load and add earlier? + add (RP), U0 + adc 8(RP), U1 + adc 16(RP), U2 + adc 24(RP), %rax + adc $0, %rdx + + add %rax, %rax C Copy high bit to carry + adc %rdx, %rdx + shr %rax C Undo shift, clear high bit + imul M, %rdx + + add %rdx, U0 + mov U0, (RP) + adc $0, U1 + mov U1, 8(RP) + adc $0, U2 + mov U2, 16(RP) + adc $0, %rax + mov %rax, 24(RP) + + pop %r12 + W64_EXIT(2, 0) + ret +EPILOGUE(nettle_ecc_25519_modp) -- 2.47.2