]>
Commit | Line | Data |
---|---|---|
6aa36e8e RS |
1 | #! /usr/bin/env perl |
2 | # Copyright 2016 The OpenSSL Project Authors. All Rights Reserved. | |
3 | # | |
4 | # Licensed under the OpenSSL license (the "License"). You may not use | |
5 | # this file except in compliance with the License. You can obtain a copy | |
6 | # in the file LICENSE in the source distribution or at | |
7 | # https://www.openssl.org/source/license.html | |
8 | ||
c6b77c16 AP |
9 | |
10 | # ==================================================================== | |
11 | # Written by Andy Polyakov <appro@openssl.org> for the OpenSSL | |
12 | # project. The module is, however, dual licensed under OpenSSL and | |
13 | # CRYPTOGAMS licenses depending on where you obtain it. For further | |
14 | # details see http://www.openssl.org/~appro/cryptogams/. | |
15 | # ==================================================================== | |
16 | ||
17 | # Poly1305 hash for MIPS64. | |
18 | # | |
19 | # May 2016 | |
20 | # | |
21 | # Numbers are cycles per processed byte with poly1305_blocks alone. | |
22 | # | |
23 | # IALU/gcc | |
24 | # R1x000 5.64/+120% (big-endian) | |
25 | # Octeon II 3.80/+280% (little-endian) | |
26 | ||
27 | ###################################################################### | |
28 | # There is a number of MIPS ABI in use, O32 and N32/64 are most | |
29 | # widely used. Then there is a new contender: NUBI. It appears that if | |
30 | # one picks the latter, it's possible to arrange code in ABI neutral | |
31 | # manner. Therefore let's stick to NUBI register layout: | |
32 | # | |
33 | ($zero,$at,$t0,$t1,$t2)=map("\$$_",(0..2,24,25)); | |
34 | ($a0,$a1,$a2,$a3,$a4,$a5,$a6,$a7)=map("\$$_",(4..11)); | |
35 | ($s0,$s1,$s2,$s3,$s4,$s5,$s6,$s7,$s8,$s9,$s10,$s11)=map("\$$_",(12..23)); | |
36 | ($gp,$tp,$sp,$fp,$ra)=map("\$$_",(3,28..31)); | |
37 | # | |
38 | # The return value is placed in $a0. Following coding rules facilitate | |
39 | # interoperability: | |
40 | # | |
41 | # - never ever touch $tp, "thread pointer", former $gp [o32 can be | |
42 | # excluded from the rule, because it's specified volatile]; | |
43 | # - copy return value to $t0, former $v0 [or to $a0 if you're adapting | |
44 | # old code]; | |
45 | # - on O32 populate $a4-$a7 with 'lw $aN,4*N($sp)' if necessary; | |
46 | # | |
47 | # For reference here is register layout for N32/64 MIPS ABIs: | |
48 | # | |
49 | # ($zero,$at,$v0,$v1)=map("\$$_",(0..3)); | |
50 | # ($a0,$a1,$a2,$a3,$a4,$a5,$a6,$a7)=map("\$$_",(4..11)); | |
51 | # ($t0,$t1,$t2,$t3,$t8,$t9)=map("\$$_",(12..15,24,25)); | |
52 | # ($s0,$s1,$s2,$s3,$s4,$s5,$s6,$s7)=map("\$$_",(16..23)); | |
53 | # ($gp,$sp,$fp,$ra)=map("\$$_",(28..31)); | |
54 | # | |
55 | # <appro@openssl.org> | |
56 | # | |
57 | ###################################################################### | |
58 | ||
59 | $flavour = shift || "o32"; # supported flavours are o32,n32,64,nubi32,nubi64 | |
60 | ||
61 | die "MIPS64 only" unless ($flavour =~ /64|n32/i); | |
62 | ||
63 | $v0 = ($flavour =~ /nubi/i) ? $a0 : $t0; | |
64 | $SAVED_REGS_MASK = ($flavour =~ /nubi/i) ? "0x0003f000" : "0x00030000"; | |
65 | ||
66 | ($ctx,$inp,$len,$padbit) = ($a0,$a1,$a2,$a3); | |
67 | ($in0,$in1,$tmp0,$tmp1,$tmp2,$tmp3,$tmp4) = ($a4,$a5,$a6,$a7,$at,$t0,$t1); | |
68 | ||
69 | $code.=<<___; | |
947716c1 AP |
70 | #include "mips_arch.h" |
71 | ||
c6b77c16 AP |
72 | #ifdef MIPSEB |
73 | # define MSB 0 | |
74 | # define LSB 7 | |
75 | #else | |
76 | # define MSB 7 | |
77 | # define LSB 0 | |
78 | #endif | |
79 | ||
80 | .text | |
81 | .set noat | |
82 | .set noreorder | |
83 | ||
84 | .align 5 | |
85 | .globl poly1305_init | |
86 | .ent poly1305_init | |
87 | poly1305_init: | |
88 | .frame $sp,0,$ra | |
89 | .set reorder | |
90 | ||
91 | sd $zero,0($ctx) | |
92 | sd $zero,8($ctx) | |
93 | sd $zero,16($ctx) | |
94 | ||
95 | beqz $inp,.Lno_key | |
96 | ||
947716c1 AP |
97 | #if defined(_MIPS_ARCH_MIPS64R6) |
98 | ld $in0,0($inp) | |
99 | ld $in1,8($inp) | |
100 | #else | |
c6b77c16 AP |
101 | ldl $in0,0+MSB($inp) |
102 | ldl $in1,8+MSB($inp) | |
103 | ldr $in0,0+LSB($inp) | |
104 | ldr $in1,8+LSB($inp) | |
947716c1 | 105 | #endif |
c6b77c16 AP |
106 | #ifdef MIPSEB |
107 | # if defined(_MIPS_ARCH_MIPS64R2) | |
108 | dsbh $in0,$in0 # byte swap | |
109 | dsbh $in1,$in1 | |
110 | dshd $in0,$in0 | |
111 | dshd $in1,$in1 | |
112 | # else | |
113 | ori $tmp0,$zero,0xFF | |
114 | dsll $tmp2,$tmp0,32 | |
115 | or $tmp0,$tmp2 # 0x000000FF000000FF | |
116 | ||
117 | and $tmp1,$in0,$tmp0 # byte swap | |
118 | and $tmp3,$in1,$tmp0 | |
119 | dsrl $tmp2,$in0,24 | |
120 | dsrl $tmp4,$in1,24 | |
121 | dsll $tmp1,24 | |
122 | dsll $tmp3,24 | |
123 | and $tmp2,$tmp0 | |
124 | and $tmp4,$tmp0 | |
125 | dsll $tmp0,8 # 0x0000FF000000FF00 | |
126 | or $tmp1,$tmp2 | |
127 | or $tmp3,$tmp4 | |
128 | and $tmp2,$in0,$tmp0 | |
129 | and $tmp4,$in1,$tmp0 | |
130 | dsrl $in0,8 | |
131 | dsrl $in1,8 | |
132 | dsll $tmp2,8 | |
133 | dsll $tmp4,8 | |
134 | and $in0,$tmp0 | |
135 | and $in1,$tmp0 | |
136 | or $tmp1,$tmp2 | |
137 | or $tmp3,$tmp4 | |
138 | or $in0,$tmp1 | |
139 | or $in1,$tmp3 | |
140 | dsrl $tmp1,$in0,32 | |
141 | dsrl $tmp3,$in1,32 | |
142 | dsll $in0,32 | |
143 | dsll $in1,32 | |
144 | or $in0,$tmp1 | |
145 | or $in1,$tmp3 | |
146 | # endif | |
147 | #endif | |
148 | li $tmp0,1 | |
149 | dsll $tmp0,32 | |
150 | daddiu $tmp0,-63 | |
151 | dsll $tmp0,28 | |
152 | daddiu $tmp0,-1 # 0ffffffc0fffffff | |
153 | ||
154 | and $in0,$tmp0 | |
155 | daddiu $tmp0,-3 # 0ffffffc0ffffffc | |
156 | and $in1,$tmp0 | |
157 | ||
158 | sd $in0,24($ctx) | |
159 | dsrl $tmp0,$in1,2 | |
160 | sd $in1,32($ctx) | |
161 | daddu $tmp0,$in1 # s1 = r1 + (r1 >> 2) | |
162 | sd $tmp0,40($ctx) | |
163 | ||
164 | .Lno_key: | |
165 | li $v0,0 # return 0 | |
166 | jr $ra | |
167 | .end poly1305_init | |
168 | ___ | |
169 | { | |
170 | my ($h0,$h1,$h2,$r0,$r1,$s1,$d0,$d1,$d2) = | |
171 | ($s0,$s1,$s2,$s3,$s4,$s5,$in0,$in1,$t2); | |
172 | ||
173 | $code.=<<___; | |
174 | .align 5 | |
175 | .globl poly1305_blocks | |
176 | .ent poly1305_blocks | |
177 | poly1305_blocks: | |
178 | .set noreorder | |
179 | dsrl $len,4 # number of complete blocks | |
8640f210 | 180 | bnez $len,poly1305_blocks_internal |
c6b77c16 | 181 | nop |
8640f210 AP |
182 | jr $ra |
183 | nop | |
184 | .end poly1305_blocks | |
c6b77c16 | 185 | |
8640f210 AP |
186 | .align 5 |
187 | .ent poly1305_blocks_internal | |
188 | poly1305_blocks_internal: | |
189 | .frame $sp,6*8,$ra | |
c6b77c16 | 190 | .mask $SAVED_REGS_MASK,-8 |
8640f210 | 191 | .set noreorder |
947716c1 | 192 | dsubu $sp,6*8 |
8640f210 AP |
193 | sd $s5,40($sp) |
194 | sd $s4,32($sp) | |
c6b77c16 AP |
195 | ___ |
196 | $code.=<<___ if ($flavour =~ /nubi/i); # optimize non-nubi prologue | |
8640f210 AP |
197 | sd $s3,24($sp) |
198 | sd $s2,16($sp) | |
199 | sd $s1,8($sp) | |
200 | sd $s0,0($sp) | |
c6b77c16 AP |
201 | ___ |
202 | $code.=<<___; | |
203 | .set reorder | |
204 | ||
205 | ld $h0,0($ctx) # load hash value | |
206 | ld $h1,8($ctx) | |
207 | ld $h2,16($ctx) | |
208 | ||
209 | ld $r0,24($ctx) # load key | |
210 | ld $r1,32($ctx) | |
211 | ld $s1,40($ctx) | |
212 | ||
213 | .Loop: | |
947716c1 AP |
214 | #if defined(_MIPS_ARCH_MIPS64R6) |
215 | ld $in0,0($inp) # load input | |
216 | ld $in1,8($inp) | |
217 | #else | |
c6b77c16 AP |
218 | ldl $in0,0+MSB($inp) # load input |
219 | ldl $in1,8+MSB($inp) | |
220 | ldr $in0,0+LSB($inp) | |
c6b77c16 | 221 | ldr $in1,8+LSB($inp) |
947716c1 AP |
222 | #endif |
223 | daddiu $len,-1 | |
c6b77c16 AP |
224 | daddiu $inp,16 |
225 | #ifdef MIPSEB | |
226 | # if defined(_MIPS_ARCH_MIPS64R2) | |
227 | dsbh $in0,$in0 # byte swap | |
228 | dsbh $in1,$in1 | |
229 | dshd $in0,$in0 | |
230 | dshd $in1,$in1 | |
231 | # else | |
232 | ori $tmp0,$zero,0xFF | |
233 | dsll $tmp2,$tmp0,32 | |
234 | or $tmp0,$tmp2 # 0x000000FF000000FF | |
235 | ||
236 | and $tmp1,$in0,$tmp0 # byte swap | |
237 | and $tmp3,$in1,$tmp0 | |
238 | dsrl $tmp2,$in0,24 | |
239 | dsrl $tmp4,$in1,24 | |
240 | dsll $tmp1,24 | |
241 | dsll $tmp3,24 | |
242 | and $tmp2,$tmp0 | |
243 | and $tmp4,$tmp0 | |
244 | dsll $tmp0,8 # 0x0000FF000000FF00 | |
245 | or $tmp1,$tmp2 | |
246 | or $tmp3,$tmp4 | |
247 | and $tmp2,$in0,$tmp0 | |
248 | and $tmp4,$in1,$tmp0 | |
249 | dsrl $in0,8 | |
250 | dsrl $in1,8 | |
251 | dsll $tmp2,8 | |
252 | dsll $tmp4,8 | |
253 | and $in0,$tmp0 | |
254 | and $in1,$tmp0 | |
255 | or $tmp1,$tmp2 | |
256 | or $tmp3,$tmp4 | |
257 | or $in0,$tmp1 | |
258 | or $in1,$tmp3 | |
259 | dsrl $tmp1,$in0,32 | |
260 | dsrl $tmp3,$in1,32 | |
261 | dsll $in0,32 | |
262 | dsll $in1,32 | |
263 | or $in0,$tmp1 | |
264 | or $in1,$tmp3 | |
265 | # endif | |
266 | #endif | |
267 | daddu $h0,$in0 # accumulate input | |
268 | daddu $h1,$in1 | |
269 | sltu $tmp0,$h0,$in0 | |
270 | sltu $tmp1,$h1,$in1 | |
271 | daddu $h1,$tmp0 | |
272 | ||
947716c1 | 273 | dmultu ($r0,$h0) # h0*r0 |
c6b77c16 AP |
274 | daddu $h2,$padbit |
275 | sltu $tmp0,$h1,$tmp0 | |
947716c1 AP |
276 | mflo ($d0,$r0,$h0) |
277 | mfhi ($d1,$r0,$h0) | |
c6b77c16 | 278 | |
947716c1 | 279 | dmultu ($s1,$h1) # h1*5*r1 |
c6b77c16 AP |
280 | daddu $tmp0,$tmp1 |
281 | daddu $h2,$tmp0 | |
947716c1 AP |
282 | mflo ($tmp0,$s1,$h1) |
283 | mfhi ($tmp1,$s1,$h1) | |
c6b77c16 | 284 | |
947716c1 | 285 | dmultu ($r1,$h0) # h0*r1 |
c6b77c16 AP |
286 | daddu $d0,$tmp0 |
287 | daddu $d1,$tmp1 | |
947716c1 AP |
288 | mflo ($tmp2,$r1,$h0) |
289 | mfhi ($d2,$r1,$h0) | |
c6b77c16 AP |
290 | sltu $tmp0,$d0,$tmp0 |
291 | daddu $d1,$tmp0 | |
292 | ||
947716c1 | 293 | dmultu ($r0,$h1) # h1*r0 |
c6b77c16 AP |
294 | daddu $d1,$tmp2 |
295 | sltu $tmp2,$d1,$tmp2 | |
947716c1 AP |
296 | mflo ($tmp0,$r0,$h1) |
297 | mfhi ($tmp1,$r0,$h1) | |
c6b77c16 AP |
298 | daddu $d2,$tmp2 |
299 | ||
947716c1 | 300 | dmultu ($s1,$h2) # h2*5*r1 |
c6b77c16 AP |
301 | daddu $d1,$tmp0 |
302 | daddu $d2,$tmp1 | |
947716c1 | 303 | mflo ($tmp2,$s1,$h2) |
c6b77c16 | 304 | |
947716c1 | 305 | dmultu ($r0,$h2) # h2*r0 |
c6b77c16 AP |
306 | sltu $tmp0,$d1,$tmp0 |
307 | daddu $d2,$tmp0 | |
947716c1 | 308 | mflo ($tmp3,$r0,$h2) |
c6b77c16 AP |
309 | |
310 | daddu $d1,$tmp2 | |
311 | daddu $d2,$tmp3 | |
312 | sltu $tmp2,$d1,$tmp2 | |
313 | daddu $d2,$tmp2 | |
314 | ||
315 | li $tmp0,-4 # final reduction | |
316 | and $tmp0,$d2 | |
317 | dsrl $tmp1,$d2,2 | |
318 | andi $h2,$d2,3 | |
319 | daddu $tmp0,$tmp1 | |
320 | daddu $h0,$d0,$tmp0 | |
321 | sltu $tmp0,$h0,$tmp0 | |
322 | daddu $h1,$d1,$tmp0 | |
323 | sltu $tmp0,$h1,$tmp0 | |
324 | daddu $h2,$h2,$tmp0 | |
325 | ||
326 | bnez $len,.Loop | |
327 | ||
328 | sd $h0,0($ctx) # store hash value | |
329 | sd $h1,8($ctx) | |
330 | sd $h2,16($ctx) | |
331 | ||
332 | .set noreorder | |
8640f210 AP |
333 | ld $s5,40($sp) # epilogue |
334 | ld $s4,32($sp) | |
c6b77c16 AP |
335 | ___ |
336 | $code.=<<___ if ($flavour =~ /nubi/i); # optimize non-nubi epilogue | |
8640f210 AP |
337 | ld $s3,24($sp) |
338 | ld $s2,16($sp) | |
339 | ld $s1,8($sp) | |
340 | ld $s0,0($sp) | |
c6b77c16 AP |
341 | ___ |
342 | $code.=<<___; | |
c6b77c16 | 343 | jr $ra |
947716c1 | 344 | daddu $sp,6*8 |
8640f210 | 345 | .end poly1305_blocks_internal |
c6b77c16 AP |
346 | ___ |
347 | } | |
348 | { | |
349 | my ($ctx,$mac,$nonce) = ($a0,$a1,$a2); | |
350 | ||
351 | $code.=<<___; | |
352 | .align 5 | |
353 | .globl poly1305_emit | |
354 | .ent poly1305_emit | |
355 | poly1305_emit: | |
356 | .frame $sp,0,$ra | |
357 | .set reorder | |
358 | ||
359 | ld $tmp0,0($ctx) | |
360 | ld $tmp1,8($ctx) | |
361 | ld $tmp2,16($ctx) | |
362 | ||
363 | daddiu $in0,$tmp0,5 # compare to modulus | |
364 | sltiu $tmp3,$in0,5 | |
365 | daddu $in1,$tmp1,$tmp3 | |
366 | sltu $tmp3,$in1,$tmp3 | |
367 | daddu $tmp2,$tmp2,$tmp3 | |
368 | ||
369 | dsrl $tmp2,2 # see if it carried/borrowed | |
370 | dsubu $tmp2,$zero,$tmp2 | |
371 | nor $tmp3,$zero,$tmp2 | |
372 | ||
373 | and $in0,$tmp2 | |
374 | and $tmp0,$tmp3 | |
375 | and $in1,$tmp2 | |
376 | and $tmp1,$tmp3 | |
377 | or $in0,$tmp0 | |
378 | or $in1,$tmp1 | |
379 | ||
380 | lwu $tmp0,0($nonce) # load nonce | |
381 | lwu $tmp1,4($nonce) | |
382 | lwu $tmp2,8($nonce) | |
383 | lwu $tmp3,12($nonce) | |
384 | dsll $tmp1,32 | |
385 | dsll $tmp3,32 | |
386 | or $tmp0,$tmp1 | |
387 | or $tmp2,$tmp3 | |
388 | ||
389 | daddu $in0,$tmp0 # accumulate nonce | |
390 | daddu $in1,$tmp2 | |
391 | sltu $tmp0,$in0,$tmp0 | |
392 | daddu $in1,$tmp0 | |
393 | ||
394 | dsrl $tmp0,$in0,8 # write mac value | |
395 | dsrl $tmp1,$in0,16 | |
396 | dsrl $tmp2,$in0,24 | |
397 | sb $in0,0($mac) | |
398 | dsrl $tmp3,$in0,32 | |
399 | sb $tmp0,1($mac) | |
400 | dsrl $tmp0,$in0,40 | |
401 | sb $tmp1,2($mac) | |
402 | dsrl $tmp1,$in0,48 | |
403 | sb $tmp2,3($mac) | |
404 | dsrl $tmp2,$in0,56 | |
405 | sb $tmp3,4($mac) | |
406 | dsrl $tmp3,$in1,8 | |
407 | sb $tmp0,5($mac) | |
408 | dsrl $tmp0,$in1,16 | |
409 | sb $tmp1,6($mac) | |
410 | dsrl $tmp1,$in1,24 | |
411 | sb $tmp2,7($mac) | |
412 | ||
413 | sb $in1,8($mac) | |
414 | dsrl $tmp2,$in1,32 | |
415 | sb $tmp3,9($mac) | |
416 | dsrl $tmp3,$in1,40 | |
417 | sb $tmp0,10($mac) | |
418 | dsrl $tmp0,$in1,48 | |
419 | sb $tmp1,11($mac) | |
420 | dsrl $tmp1,$in1,56 | |
421 | sb $tmp2,12($mac) | |
422 | sb $tmp3,13($mac) | |
423 | sb $tmp0,14($mac) | |
424 | sb $tmp1,15($mac) | |
425 | ||
426 | jr $ra | |
427 | .end poly1305_emit | |
428 | .rdata | |
429 | .asciiz "Poly1305 for MIPS64, CRYPTOGAMS by <appro\@openssl.org>" | |
430 | .align 2 | |
431 | ___ | |
432 | } | |
433 | ||
434 | $output=pop and open STDOUT,">$output"; | |
435 | print $code; | |
436 | close STDOUT; | |
437 |