From: Niels Möller Date: Mon, 11 Mar 2013 09:21:12 +0000 (+0100) Subject: ARM assembly for sha1. X-Git-Tag: nettle_2.7_release_20130424~106 X-Git-Url: http://git.ipfire.org/cgi-bin/gitweb.cgi?a=commitdiff_plain;h=9e2ce735038c6070941731ce60137606075f5e3f;p=thirdparty%2Fnettle.git ARM assembly for sha1. --- diff --git a/ChangeLog b/ChangeLog index bfaaa70c..e98ba45c 100644 --- a/ChangeLog +++ b/ChangeLog @@ -1,5 +1,7 @@ 2013-03-11 Niels Möller + * armv7/sha1-compress.asm: New file, 9% speedup. + * testsuite/testutils.c (test_hash): Test different alignments for the hash input. diff --git a/armv7/sha1-compress.asm b/armv7/sha1-compress.asm new file mode 100644 index 00000000..69c30e42 --- /dev/null +++ b/armv7/sha1-compress.asm @@ -0,0 +1,234 @@ +C nettle, low-level cryptographics library +C +C Copyright (C) 2013 Niels Möller +C +C The nettle library is free software; you can redistribute it and/or modify +C it under the terms of the GNU Lesser General Public License as published by +C the Free Software Foundation; either version 2.1 of the License, or (at your +C option) any later version. +C +C The nettle library is distributed in the hope that it will be useful, but +C WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY +C or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public +C License for more details. +C +C You should have received a copy of the GNU Lesser General Public License +C along with the nettle library; see the file COPYING.LIB. If not, write to +C the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, +C MA 02111-1301, USA. + + .file "sha1-compress.asm" + +define(, ) +define(, ) +define(, ) +define(, ) +define(, ) +define(, ) +define(, ) +define(, ) +define(, ) +define(, ) +define(, ) +define(, ) + +C FIXME: Could avoid a mov with even and odd variants. +define(, < + ldr T0, [INPUT], #+4 + sel W, WPREV, T0 + ror W, W, SHIFT + mov WPREV, T0 + rev W, W + str W, [SP,#eval(4*$1)] +>) +define(, < + ldr W, [sp, #+eval(4*$1)] + ldr T0, [sp, #+eval(4*(($1 + 2) % 16))] + eor W, W, T0 + ldr T0, [sp, #+eval(4*(($1 + 8) % 16))] + eor W, W, T0 + ldr T0, [sp, #+eval(4*(($1 + 13) % 16))] + eor W, W, T0 + ror W, W, #31 + str W, [sp, #+eval(4*$1)] +>) + +C F1(B,C,D) = D^(B&(C^D)) +C ROUND1(A,B,C,D,E) +define(, < + eor T0, $3, $4 + add $5, $5, K + and T0, T0, $2 + add $5, $5, $1, ror #27 + eor T0, T0, $4 + add $5, $5, W + ror $2, $2, #2 + add $5, $5, T0 +>) +C F2(B,C,D) = B^C^D +define(, < + eor T0, $2, $4 + add $5, $5, K + eor T0, T0, $3 + add $5, $5, $1, ror #27 + add $5, $5, W + ror $2, $2, #2 + add $5, $5, T0 +>) +C F3(B,C,D) = (B&C) | (D & (B|C)) = (B & (C ^ D)) + (C & D) +define(, < + eor T0, $3, $4 + add $5, $5, K + and T0, T0, $2 + add $5, $5, $1, ror #27 + add $5, $5, T0 + add $5, $5, W + and T0, $3, $4 + ror $2, $2, #2 + add $5, $5, T0 +>) + C void _nettle_sha1_compress(uint32_t *state, const uint8_t *input) + + .text + .align 2 +.LK1: + .int 0x5A827999 +.LK2: + .int 0x6ED9EBA1 +.LK3: + .int 0x8F1BBCDC + +PROLOGUE(_nettle_sha1_compress) + push {r4,r5,r6,r7,r8,r10,lr} + sub sp, sp, #64 + + C Sets SHIFT to 8*low bits of input pointer. Sets up GE flags + C as follows, corresponding to bytes to be used from WPREV + C SHIFT 0 8 16 24 + C CPSR.GE 0000 1110 1100 1000 + ands SHIFT, INPUT, #3 + and INPUT, INPUT, $-4 + ldr WPREV, [INPUT] + addne INPUT, INPUT, #4 C Unaligned input + lsl SHIFT, SHIFT, #3 + mov T0, #0 + movne T0, #-1 + lsl W, T0, SHIFT + uadd8 T0, T0, W C Sets APSR.GE bits + + ldr K, .LK1 + ldm STATE, {SA,SB,SC,SD,SE} + + LOAD( 0) ROUND1(SA, SB, SC, SD, SE) + LOAD( 1) ROUND1(SE, SA, SB, SC, SD) + LOAD( 2) ROUND1(SD, SE, SA, SB, SC) + LOAD( 3) ROUND1(SC, SD, SE, SA, SB) + LOAD( 4) ROUND1(SB, SC, SD, SE, SA) + + LOAD( 5) ROUND1(SA, SB, SC, SD, SE) + LOAD( 6) ROUND1(SE, SA, SB, SC, SD) + LOAD( 7) ROUND1(SD, SE, SA, SB, SC) + LOAD( 8) ROUND1(SC, SD, SE, SA, SB) + LOAD( 9) ROUND1(SB, SC, SD, SE, SA) + + LOAD(10) ROUND1(SA, SB, SC, SD, SE) + LOAD(11) ROUND1(SE, SA, SB, SC, SD) + LOAD(12) ROUND1(SD, SE, SA, SB, SC) + LOAD(13) ROUND1(SC, SD, SE, SA, SB) + LOAD(14) ROUND1(SB, SC, SD, SE, SA) + + LOAD(15) ROUND1(SA, SB, SC, SD, SE) + EXPN( 0) ROUND1(SE, SA, SB, SC, SD) + EXPN( 1) ROUND1(SD, SE, SA, SB, SC) + EXPN( 2) ROUND1(SC, SD, SE, SA, SB) + EXPN( 3) ROUND1(SB, SC, SD, SE, SA) + + ldr K, .LK2 + EXPN( 4) ROUND2(SA, SB, SC, SD, SE) + EXPN( 5) ROUND2(SE, SA, SB, SC, SD) + EXPN( 6) ROUND2(SD, SE, SA, SB, SC) + EXPN( 7) ROUND2(SC, SD, SE, SA, SB) + EXPN( 8) ROUND2(SB, SC, SD, SE, SA) + + EXPN( 9) ROUND2(SA, SB, SC, SD, SE) + EXPN(10) ROUND2(SE, SA, SB, SC, SD) + EXPN(11) ROUND2(SD, SE, SA, SB, SC) + EXPN(12) ROUND2(SC, SD, SE, SA, SB) + EXPN(13) ROUND2(SB, SC, SD, SE, SA) + + EXPN(14) ROUND2(SA, SB, SC, SD, SE) + EXPN(15) ROUND2(SE, SA, SB, SC, SD) + EXPN( 0) ROUND2(SD, SE, SA, SB, SC) + EXPN( 1) ROUND2(SC, SD, SE, SA, SB) + EXPN( 2) ROUND2(SB, SC, SD, SE, SA) + + EXPN( 3) ROUND2(SA, SB, SC, SD, SE) + EXPN( 4) ROUND2(SE, SA, SB, SC, SD) + EXPN( 5) ROUND2(SD, SE, SA, SB, SC) + EXPN( 6) ROUND2(SC, SD, SE, SA, SB) + EXPN( 7) ROUND2(SB, SC, SD, SE, SA) + + ldr K, .LK3 + EXPN( 8) ROUND3(SA, SB, SC, SD, SE) + EXPN( 9) ROUND3(SE, SA, SB, SC, SD) + EXPN(10) ROUND3(SD, SE, SA, SB, SC) + EXPN(11) ROUND3(SC, SD, SE, SA, SB) + EXPN(12) ROUND3(SB, SC, SD, SE, SA) + + EXPN(13) ROUND3(SA, SB, SC, SD, SE) + EXPN(14) ROUND3(SE, SA, SB, SC, SD) + EXPN(15) ROUND3(SD, SE, SA, SB, SC) + EXPN( 0) ROUND3(SC, SD, SE, SA, SB) + EXPN( 1) ROUND3(SB, SC, SD, SE, SA) + + EXPN( 2) ROUND3(SA, SB, SC, SD, SE) + EXPN( 3) ROUND3(SE, SA, SB, SC, SD) + EXPN( 4) ROUND3(SD, SE, SA, SB, SC) + EXPN( 5) ROUND3(SC, SD, SE, SA, SB) + EXPN( 6) ROUND3(SB, SC, SD, SE, SA) + + EXPN( 7) ROUND3(SA, SB, SC, SD, SE) + EXPN( 8) ROUND3(SE, SA, SB, SC, SD) + EXPN( 9) ROUND3(SD, SE, SA, SB, SC) + EXPN(10) ROUND3(SC, SD, SE, SA, SB) + EXPN(11) ROUND3(SB, SC, SD, SE, SA) + + ldr K, .LK4 + EXPN(12) ROUND2(SA, SB, SC, SD, SE) + EXPN(13) ROUND2(SE, SA, SB, SC, SD) + EXPN(14) ROUND2(SD, SE, SA, SB, SC) + EXPN(15) ROUND2(SC, SD, SE, SA, SB) + EXPN( 0) ROUND2(SB, SC, SD, SE, SA) + + EXPN( 1) ROUND2(SA, SB, SC, SD, SE) + EXPN( 2) ROUND2(SE, SA, SB, SC, SD) + EXPN( 3) ROUND2(SD, SE, SA, SB, SC) + EXPN( 4) ROUND2(SC, SD, SE, SA, SB) + EXPN( 5) ROUND2(SB, SC, SD, SE, SA) + + EXPN( 6) ROUND2(SA, SB, SC, SD, SE) + EXPN( 7) ROUND2(SE, SA, SB, SC, SD) + EXPN( 8) ROUND2(SD, SE, SA, SB, SC) + EXPN( 9) ROUND2(SC, SD, SE, SA, SB) + EXPN(10) ROUND2(SB, SC, SD, SE, SA) + + EXPN(11) ROUND2(SA, SB, SC, SD, SE) + EXPN(12) ROUND2(SE, SA, SB, SC, SD) + EXPN(13) ROUND2(SD, SE, SA, SB, SC) + EXPN(14) ROUND2(SC, SD, SE, SA, SB) + EXPN(15) ROUND2(SB, SC, SD, SE, SA) + + C Use registers we no longer need. + ldm STATE, {INPUT,T0,SHIFT,W,K} + add SA, SA, INPUT + add SB, SB, T0 + add SC, SC, SHIFT + add SD, SD, W + add SE, SE, K + add sp, sp, #64 + stm STATE, {SA,SB,SC,SD,SE} + pop {r4,r5,r6,r7,r8,r10,pc} +EPILOGUE(_nettle_sha1_compress) + +.LK4: + .int 0xCA62C1D6