From: Niels Möller Date: Fri, 12 Apr 2013 10:22:56 +0000 (+0200) Subject: ARM assembly for umac_nh. X-Git-Tag: nettle_2.7_release_20130424~55 X-Git-Url: http://git.ipfire.org/cgi-bin/gitweb.cgi?a=commitdiff_plain;h=3be646d1cdbffbceef956de554a005320a1aa63d;p=thirdparty%2Fnettle.git ARM assembly for umac_nh. --- diff --git a/ChangeLog b/ChangeLog index 86f75f11..7a163fe7 100644 --- a/ChangeLog +++ b/ChangeLog @@ -1,5 +1,8 @@ 2013-04-12 Niels Möller + * armv7/umac-nh.asm: New file. 2.1 time speedup. + * armv7/machine.m4 (D0REG, D1REG): New macros. + * configure.ac (asm_replace_list): Added umac-nh.asm and umac-nh-n.asm. diff --git a/armv7/machine.m4 b/armv7/machine.m4 index a829c76e..f982a66a 100644 --- a/armv7/machine.m4 +++ b/armv7/machine.m4 @@ -16,3 +16,41 @@ define(, )>)dnl + +define(, )>)dnl + +define(, )>)dnl diff --git a/armv7/umac-nh.asm b/armv7/umac-nh.asm new file mode 100644 index 00000000..7c0a0290 --- /dev/null +++ b/armv7/umac-nh.asm @@ -0,0 +1,91 @@ +C nettle, low-level cryptographics library +C +C Copyright (C) 2013 Niels Möller +C +C The nettle library is free software; you can redistribute it and/or modify +C it under the terms of the GNU Lesser General Public License as published by +C the Free Software Foundation; either version 2.1 of the License, or (at your +C option) any later version. +C +C The nettle library is distributed in the hope that it will be useful, but +C WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY +C or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public +C License for more details. +C +C You should have received a copy of the GNU Lesser General Public License +C along with the nettle library; see the file COPYING.LIB. If not, write to +C the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, +C MA 02111-1301, USA. + + .file "umac-nh.asm" + .fpu neon + +define(, ) +define(, ) +define(, ) +define(, ) + +define(, ) +define(, ) +define(, ) +define(, ) +define(, ) +define(, ) +define(, ) +define(, ) +define(, ) +define(, ) + + .text + .align 3 + +PROLOGUE(_nettle_umac_nh) + C Setup for 64-bit aligned reads + ands SHIFT, MSG, #7 + and MSG, MSG, #-8 + vld1.8 {DM}, [MSG :64] + addne MSG, MSG, #8 + addeq SHIFT, SHIFT, #8 + + C FIXME: Combine as rsb ? + lsl SHIFT, SHIFT, #3 + neg SHIFT, SHIFT + + C Right shift in QRIGHT (both halves) + vmov.i32 D0REG(QRIGHT)[0], SHIFT + vmov.32 D1REG(QRIGHT), D0REG(QRIGHT) + add SHIFT, SHIFT, #64 + + vmov.i32 D0REG(QLEFT)[0], SHIFT + vmov.32 D1REG(QLEFT), D0REG(QLEFT) + + vmov.i64 QACC, #0 + + vshl.u64 DM, DM, D0REG(QRIGHT) +.Loop: + C Set m[i] <-- m[i-1] >> RSHIFT + m[i] << LSHIFT + vld1.8 {QA, QB}, [MSG :64]! + vshl.u64 QT0, QA, QRIGHT + vshl.u64 QT1, QB, QRIGHT + vshl.u64 QA, QA, QLEFT + vshl.u64 QB, QB, QLEFT + veor D0REG(QA), D0REG(QA), DM + veor D1REG(QA), D1REG(QA), D0REG(QT0) + veor D0REG(QB), D0REG(QB), D1REG(QT0) + veor D1REG(QB), D1REG(QB), D0REG(QT1) + vmov DM, D1REG(QT1) + + vld1.i32 {QK0, QK1}, [KEY]! + vadd.i32 QA, QA, QK0 + vadd.i32 QB, QB, QK1 + vmull.u32 QT0, D0REG(QA), D0REG(QB) + vmull.u32 QT1, D1REG(QA), D1REG(QB) + subs LENGTH, LENGTH, #32 + vadd.i64 QACC, QACC, QT0 + vadd.i64 QACC, QACC, QT1 + bhi .Loop + + vadd.i64 D0REG(QACC), D0REG(QACC), D1REG(QACC) + vmov r0, r1, D0REG(QACC) + bx lr +EPILOGUE(_nettle_umac_nh)