]>
Commit | Line | Data |
---|---|---|
7c6ef2f2 NP |
1 | /* |
2 | * SHA transform optimized for ARM | |
3 | * | |
4 | * Copyright: (C) 2005 by Nicolas Pitre <nico@cam.org> | |
5 | * Created: September 17, 2005 | |
6 | * | |
7 | * This program is free software; you can redistribute it and/or modify | |
8 | * it under the terms of the GNU General Public License version 2 as | |
9 | * published by the Free Software Foundation. | |
10 | */ | |
11 | ||
12 | .text | |
13 | .globl sha_transform | |
14 | ||
15 | /* | |
16 | * void sha_transform(uint32_t *hash, const unsigned char *data, uint32_t *W); | |
17 | * | |
18 | * note: the "data" pointer may be unaligned. | |
19 | */ | |
20 | ||
21 | sha_transform: | |
22 | ||
23 | stmfd sp!, {r4 - r8, lr} | |
24 | ||
25 | @ for (i = 0; i < 16; i++) | |
26 | @ W[i] = ntohl(((uint32_t *)data)[i]); */ | |
27 | ||
28 | #ifdef __ARMEB__ | |
29 | mov r4, r0 | |
30 | mov r0, r2 | |
31 | mov r2, #64 | |
32 | bl memcpy | |
33 | mov r2, r0 | |
34 | mov r0, r4 | |
35 | #else | |
36 | mov r3, r2 | |
37 | mov lr, #16 | |
38 | 1: ldrb r4, [r1], #1 | |
39 | ldrb r5, [r1], #1 | |
40 | ldrb r6, [r1], #1 | |
41 | ldrb r7, [r1], #1 | |
42 | subs lr, lr, #1 | |
43 | orr r5, r5, r4, lsl #8 | |
44 | orr r6, r6, r5, lsl #8 | |
45 | orr r7, r7, r6, lsl #8 | |
46 | str r7, [r3], #4 | |
47 | bne 1b | |
48 | #endif | |
49 | ||
50 | @ for (i = 0; i < 64; i++) | |
51 | @ W[i+16] = ror(W[i+13] ^ W[i+8] ^ W[i+2] ^ W[i], 31); | |
52 | ||
53 | sub r3, r2, #4 | |
54 | mov lr, #64 | |
55 | 2: ldr r4, [r3, #4]! | |
56 | subs lr, lr, #1 | |
57 | ldr r5, [r3, #8] | |
58 | ldr r6, [r3, #32] | |
59 | ldr r7, [r3, #52] | |
60 | eor r4, r4, r5 | |
61 | eor r4, r4, r6 | |
62 | eor r4, r4, r7 | |
63 | mov r4, r4, ror #31 | |
64 | str r4, [r3, #64] | |
65 | bne 2b | |
66 | ||
67 | /* | |
68 | * The SHA functions are: | |
69 | * | |
70 | * f1(B,C,D) = (D ^ (B & (C ^ D))) | |
71 | * f2(B,C,D) = (B ^ C ^ D) | |
72 | * f3(B,C,D) = ((B & C) | (D & (B | C))) | |
73 | * | |
74 | * Then the sub-blocks are processed as follows: | |
75 | * | |
76 | * A' = ror(A, 27) + f(B,C,D) + E + K + *W++ | |
77 | * B' = A | |
78 | * C' = ror(B, 2) | |
79 | * D' = C | |
80 | * E' = D | |
81 | * | |
82 | * We therefore unroll each loop 5 times to avoid register shuffling. | |
83 | * Also the ror for C (and also D and E which are successivelyderived | |
84 | * from it) is applied in place to cut on an additional mov insn for | |
85 | * each round. | |
86 | */ | |
87 | ||
88 | .macro sha_f1, A, B, C, D, E | |
89 | ldr r3, [r2], #4 | |
90 | eor ip, \C, \D | |
91 | add \E, r1, \E, ror #2 | |
92 | and ip, \B, ip, ror #2 | |
93 | add \E, \E, \A, ror #27 | |
94 | eor ip, ip, \D, ror #2 | |
95 | add \E, \E, r3 | |
96 | add \E, \E, ip | |
97 | .endm | |
98 | ||
99 | .macro sha_f2, A, B, C, D, E | |
100 | ldr r3, [r2], #4 | |
101 | add \E, r1, \E, ror #2 | |
102 | eor ip, \B, \C, ror #2 | |
103 | add \E, \E, \A, ror #27 | |
104 | eor ip, ip, \D, ror #2 | |
105 | add \E, \E, r3 | |
106 | add \E, \E, ip | |
107 | .endm | |
108 | ||
109 | .macro sha_f3, A, B, C, D, E | |
110 | ldr r3, [r2], #4 | |
111 | add \E, r1, \E, ror #2 | |
112 | orr ip, \B, \C, ror #2 | |
113 | add \E, \E, \A, ror #27 | |
114 | and ip, ip, \D, ror #2 | |
115 | add \E, \E, r3 | |
116 | and r3, \B, \C, ror #2 | |
117 | orr ip, ip, r3 | |
118 | add \E, \E, ip | |
119 | .endm | |
120 | ||
121 | ldmia r0, {r4 - r8} | |
122 | ||
123 | mov lr, #4 | |
124 | ldr r1, .L_sha_K + 0 | |
125 | ||
126 | /* adjust initial values */ | |
127 | mov r6, r6, ror #30 | |
128 | mov r7, r7, ror #30 | |
129 | mov r8, r8, ror #30 | |
130 | ||
131 | 3: subs lr, lr, #1 | |
132 | sha_f1 r4, r5, r6, r7, r8 | |
133 | sha_f1 r8, r4, r5, r6, r7 | |
134 | sha_f1 r7, r8, r4, r5, r6 | |
135 | sha_f1 r6, r7, r8, r4, r5 | |
136 | sha_f1 r5, r6, r7, r8, r4 | |
137 | bne 3b | |
138 | ||
139 | ldr r1, .L_sha_K + 4 | |
140 | mov lr, #4 | |
141 | ||
142 | 4: subs lr, lr, #1 | |
143 | sha_f2 r4, r5, r6, r7, r8 | |
144 | sha_f2 r8, r4, r5, r6, r7 | |
145 | sha_f2 r7, r8, r4, r5, r6 | |
146 | sha_f2 r6, r7, r8, r4, r5 | |
147 | sha_f2 r5, r6, r7, r8, r4 | |
148 | bne 4b | |
149 | ||
150 | ldr r1, .L_sha_K + 8 | |
151 | mov lr, #4 | |
152 | ||
153 | 5: subs lr, lr, #1 | |
154 | sha_f3 r4, r5, r6, r7, r8 | |
155 | sha_f3 r8, r4, r5, r6, r7 | |
156 | sha_f3 r7, r8, r4, r5, r6 | |
157 | sha_f3 r6, r7, r8, r4, r5 | |
158 | sha_f3 r5, r6, r7, r8, r4 | |
159 | bne 5b | |
160 | ||
161 | ldr r1, .L_sha_K + 12 | |
162 | mov lr, #4 | |
163 | ||
164 | 6: subs lr, lr, #1 | |
165 | sha_f2 r4, r5, r6, r7, r8 | |
166 | sha_f2 r8, r4, r5, r6, r7 | |
167 | sha_f2 r7, r8, r4, r5, r6 | |
168 | sha_f2 r6, r7, r8, r4, r5 | |
169 | sha_f2 r5, r6, r7, r8, r4 | |
170 | bne 6b | |
171 | ||
172 | ldmia r0, {r1, r2, r3, ip, lr} | |
173 | add r4, r1, r4 | |
174 | add r5, r2, r5 | |
175 | add r6, r3, r6, ror #2 | |
176 | add r7, ip, r7, ror #2 | |
177 | add r8, lr, r8, ror #2 | |
178 | stmia r0, {r4 - r8} | |
179 | ||
180 | ldmfd sp!, {r4 - r8, pc} | |
181 | ||
182 | .L_sha_K: | |
183 | .word 0x5a827999, 0x6ed9eba1, 0x8f1bbcdc, 0xca62c1d6 | |
184 |