]> git.ipfire.org Git - thirdparty/openssl.git/blame - crypto/ppccpuid.pl
Improve chacha20 perfomance on aarch64 by interleaving scalar with SVE/SVE2
[thirdparty/openssl.git] / crypto / ppccpuid.pl
CommitLineData
e0a65194 1#! /usr/bin/env perl
fecb3aae 2# Copyright 2007-2022 The OpenSSL Project Authors. All Rights Reserved.
e0a65194 3#
0e9725bc 4# Licensed under the Apache License 2.0 (the "License"). You may not use
e0a65194
RS
5# this file except in compliance with the License. You can obtain a copy
6# in the file LICENSE in the source distribution or at
7# https://www.openssl.org/source/license.html
8
9c9c83cc 9
1aa89a7a
RL
10# $output is the last argument if it looks like a file (it has an extension)
11# $flavour is the first argument if it doesn't look like a file
12$output = $#ARGV >= 0 && $ARGV[$#ARGV] =~ m|\.\w+$| ? pop : undef;
13$flavour = $#ARGV >= 0 && $ARGV[0] !~ m|\.| ? shift : undef;
9c9c83cc
AP
14
15$0 =~ m/(.*[\/\\])[^\/\\]+$/; $dir=$1;
16( $xlate="${dir}ppc-xlate.pl" and -f $xlate ) or
17( $xlate="${dir}perlasm/ppc-xlate.pl" and -f $xlate) or
18die "can't locate ppc-xlate.pl";
19
1aa89a7a
RL
20open STDOUT,"| $^X $xlate $flavour \"$output\""
21 or die "can't call $xlate: $!";
9c9c83cc 22
addd641f 23if ($flavour=~/64/) {
9c9c83cc
AP
24 $CMPLI="cmpldi";
25 $SHRLI="srdi";
26 $SIGNX="extsw";
27} else {
28 $CMPLI="cmplwi";
29 $SHRLI="srwi";
30 $SIGNX="mr";
31}
32
33$code=<<___;
492279f6 34.machine "any"
9c9c83cc
AP
35.text
36
81eae077
AP
37.globl .OPENSSL_fpu_probe
38.align 4
39.OPENSSL_fpu_probe:
40 fmr f0,f0
41 blr
42 .long 0
43 .byte 0,12,0x14,0,0,0,0,0
44.size .OPENSSL_fpu_probe,.-.OPENSSL_fpu_probe
b4b48a10 45.globl .OPENSSL_ppc64_probe
9c9c83cc 46.align 4
b4b48a10
AP
47.OPENSSL_ppc64_probe:
48 fcfid f1,f1
49 extrdi r0,r0,32,0
9c9c83cc 50 blr
67150340
AP
51 .long 0
52 .byte 0,12,0x14,0,0,0,0,0
d6019e16 53.size .OPENSSL_ppc64_probe,.-.OPENSSL_ppc64_probe
9c9c83cc 54
6415dd7b
AP
55.globl .OPENSSL_altivec_probe
56.align 4
57.OPENSSL_altivec_probe:
58 .long 0x10000484 # vor v0,v0,v0
59 blr
67150340
AP
60 .long 0
61 .byte 0,12,0x14,0,0,0,0,0
d6019e16 62.size .OPENSSL_altivec_probe,.-..OPENSSL_altivec_probe
6415dd7b 63
de51e830
AP
64.globl .OPENSSL_crypto207_probe
65.align 4
d86689e1 66.OPENSSL_crypto207_probe:
de51e830
AP
67 lvx_u v0,0,r1
68 vcipher v0,v0,v0
69 blr
70 .long 0
71 .byte 0,12,0x14,0,0,0,0,0
72.size .OPENSSL_crypto207_probe,.-.OPENSSL_crypto207_probe
73
e0e53282
AP
74.globl .OPENSSL_madd300_probe
75.align 4
76.OPENSSL_madd300_probe:
77 xor r0,r0,r0
78 maddld r3,r0,r0,r0
79 maddhdu r3,r0,r0,r0
80 blr
81 .long 0
82 .byte 0,12,0x14,0,0,0,0,0
83
f596bbe4
DB
84.globl .OPENSSL_brd31_probe
85.align 4
86.OPENSSL_brd31_probe:
87 xor r0,r0,r0
88 brd r3,r0
89 blr
90 .long 0
91 .byte 0,12,0x14,0,0,0,0,0
92.size .OPENSSL_brd31_probe,.-.OPENSSL_brd31_probe
93
94
9c9c83cc
AP
95.globl .OPENSSL_wipe_cpu
96.align 4
97.OPENSSL_wipe_cpu:
98 xor r0,r0,r0
78a533cb
AP
99 fmr f0,f31
100 fmr f1,f31
101 fmr f2,f31
9c9c83cc 102 mr r3,r1
78a533cb 103 fmr f3,f31
9c9c83cc 104 xor r4,r4,r4
78a533cb 105 fmr f4,f31
9c9c83cc 106 xor r5,r5,r5
78a533cb 107 fmr f5,f31
9c9c83cc 108 xor r6,r6,r6
78a533cb 109 fmr f6,f31
9c9c83cc 110 xor r7,r7,r7
78a533cb 111 fmr f7,f31
9c9c83cc 112 xor r8,r8,r8
78a533cb 113 fmr f8,f31
9c9c83cc 114 xor r9,r9,r9
78a533cb 115 fmr f9,f31
9c9c83cc 116 xor r10,r10,r10
78a533cb 117 fmr f10,f31
9c9c83cc 118 xor r11,r11,r11
78a533cb 119 fmr f11,f31
9c9c83cc 120 xor r12,r12,r12
78a533cb
AP
121 fmr f12,f31
122 fmr f13,f31
9c9c83cc 123 blr
67150340
AP
124 .long 0
125 .byte 0,12,0x14,0,0,0,0,0
d6019e16 126.size .OPENSSL_wipe_cpu,.-.OPENSSL_wipe_cpu
9c9c83cc
AP
127
128.globl .OPENSSL_atomic_add
129.align 4
130.OPENSSL_atomic_add:
5fabb88a 131Ladd: lwarx r5,0,r3
9c9c83cc
AP
132 add r0,r4,r5
133 stwcx. r0,0,r3
5fabb88a 134 bne- Ladd
9c9c83cc
AP
135 $SIGNX r3,r0
136 blr
67150340
AP
137 .long 0
138 .byte 0,12,0x14,0,0,0,2,0
139 .long 0
d6019e16 140.size .OPENSSL_atomic_add,.-.OPENSSL_atomic_add
9c9c83cc 141
c8f37048 142.globl .OPENSSL_rdtsc_mftb
9c9c83cc 143.align 4
c8f37048 144.OPENSSL_rdtsc_mftb:
9c9c83cc 145 mftb r3
9c9c83cc 146 blr
67150340
AP
147 .long 0
148 .byte 0,12,0x14,0,0,0,0,0
c8f37048
BE
149.size .OPENSSL_rdtsc_mftb,.-.OPENSSL_rdtsc_mftb
150
151.globl .OPENSSL_rdtsc_mfspr268
152.align 4
153.OPENSSL_rdtsc_mfspr268:
154 mfspr r3,268
155 blr
156 .long 0
157 .byte 0,12,0x14,0,0,0,0,0
158.size .OPENSSL_rdtsc_mfspr268,.-.OPENSSL_rdtsc_mfspr268
9c9c83cc
AP
159
160.globl .OPENSSL_cleanse
161.align 4
162.OPENSSL_cleanse:
163 $CMPLI r4,7
164 li r0,0
165 bge Lot
7676eebf
AP
166 $CMPLI r4,0
167 beqlr-
9c9c83cc
AP
168Little: mtctr r4
169 stb r0,0(r3)
170 addi r3,r3,1
9474483a 171 bdnz \$-8
9c9c83cc
AP
172 blr
173Lot: andi. r5,r3,3
174 beq Laligned
175 stb r0,0(r3)
176 subi r4,r4,1
177 addi r3,r3,1
178 b Lot
179Laligned:
180 $SHRLI r5,r4,2
181 mtctr r5
182 stw r0,0(r3)
183 addi r3,r3,4
9474483a 184 bdnz \$-8
9c9c83cc
AP
185 andi. r4,r4,3
186 bne Little
187 blr
67150340
AP
188 .long 0
189 .byte 0,12,0x14,0,0,0,2,0
190 .long 0
d6019e16 191.size .OPENSSL_cleanse,.-.OPENSSL_cleanse
e33826f0
AP
192
193globl .CRYPTO_memcmp
194.align 4
195.CRYPTO_memcmp:
196 $CMPLI r5,0
197 li r0,0
198 beq Lno_data
199 mtctr r5
200Loop_cmp:
201 lbz r6,0(r3)
202 addi r3,r3,1
203 lbz r7,0(r4)
204 addi r4,r4,1
205 xor r6,r6,r7
206 or r0,r0,r6
207 bdnz Loop_cmp
208
209Lno_data:
210 li r3,0
211 sub r3,r3,r0
212 extrwi r3,r3,1,0
213 blr
214 .long 0
215 .byte 0,12,0x14,0,0,0,3,0
216 .long 0
217.size .CRYPTO_memcmp,.-.CRYPTO_memcmp
9c9c83cc 218___
5fabb88a
AP
219{
220my ($out,$cnt,$max)=("r3","r4","r5");
221my ($tick,$lasttick)=("r6","r7");
222my ($diff,$lastdiff)=("r8","r9");
223
224$code.=<<___;
c8f37048 225.globl .OPENSSL_instrument_bus_mftb
5fabb88a 226.align 4
c8f37048 227.OPENSSL_instrument_bus_mftb:
5fabb88a
AP
228 mtctr $cnt
229
230 mftb $lasttick # collect 1st tick
231 li $diff,0
232
233 dcbf 0,$out # flush cache line
234 lwarx $tick,0,$out # load and lock
235 add $tick,$tick,$diff
236 stwcx. $tick,0,$out
237 stwx $tick,0,$out
238
239Loop: mftb $tick
240 sub $diff,$tick,$lasttick
241 mr $lasttick,$tick
242 dcbf 0,$out # flush cache line
243 lwarx $tick,0,$out # load and lock
244 add $tick,$tick,$diff
245 stwcx. $tick,0,$out
246 stwx $tick,0,$out
247 addi $out,$out,4 # ++$out
248 bdnz Loop
249
250 mr r3,$cnt
251 blr
67150340
AP
252 .long 0
253 .byte 0,12,0x14,0,0,0,2,0
254 .long 0
c8f37048 255.size .OPENSSL_instrument_bus_mftb,.-.OPENSSL_instrument_bus_mftb
5fabb88a 256
c8f37048 257.globl .OPENSSL_instrument_bus2_mftb
5fabb88a 258.align 4
c8f37048 259.OPENSSL_instrument_bus2_mftb:
5fabb88a
AP
260 mr r0,$cnt
261 slwi $cnt,$cnt,2
262
263 mftb $lasttick # collect 1st tick
264 li $diff,0
265
266 dcbf 0,$out # flush cache line
267 lwarx $tick,0,$out # load and lock
268 add $tick,$tick,$diff
269 stwcx. $tick,0,$out
270 stwx $tick,0,$out
271
272 mftb $tick # collect 1st diff
273 sub $diff,$tick,$lasttick
274 mr $lasttick,$tick
275 mr $lastdiff,$diff
276Loop2:
277 dcbf 0,$out # flush cache line
278 lwarx $tick,0,$out # load and lock
279 add $tick,$tick,$diff
280 stwcx. $tick,0,$out
281 stwx $tick,0,$out
282
283 addic. $max,$max,-1
284 beq Ldone2
285
286 mftb $tick
287 sub $diff,$tick,$lasttick
288 mr $lasttick,$tick
289 cmplw 7,$diff,$lastdiff
290 mr $lastdiff,$diff
291
292 mfcr $tick # pull cr
293 not $tick,$tick # flip bits
294 rlwinm $tick,$tick,1,29,29 # isolate flipped eq bit and scale
295
296 sub. $cnt,$cnt,$tick # conditional --$cnt
297 add $out,$out,$tick # conditional ++$out
298 bne Loop2
299
300Ldone2:
301 srwi $cnt,$cnt,2
302 sub r3,r0,$cnt
303 blr
67150340
AP
304 .long 0
305 .byte 0,12,0x14,0,0,0,3,0
306 .long 0
c8f37048
BE
307.size .OPENSSL_instrument_bus2_mftb,.-.OPENSSL_instrument_bus2_mftb
308
309.globl .OPENSSL_instrument_bus_mfspr268
310.align 4
311.OPENSSL_instrument_bus_mfspr268:
312 mtctr $cnt
313
314 mfspr $lasttick,268 # collect 1st tick
315 li $diff,0
316
317 dcbf 0,$out # flush cache line
318 lwarx $tick,0,$out # load and lock
319 add $tick,$tick,$diff
320 stwcx. $tick,0,$out
321 stwx $tick,0,$out
322
323Loop3: mfspr $tick,268
324 sub $diff,$tick,$lasttick
325 mr $lasttick,$tick
326 dcbf 0,$out # flush cache line
327 lwarx $tick,0,$out # load and lock
328 add $tick,$tick,$diff
329 stwcx. $tick,0,$out
330 stwx $tick,0,$out
331 addi $out,$out,4 # ++$out
332 bdnz Loop3
333
334 mr r3,$cnt
335 blr
336 .long 0
337 .byte 0,12,0x14,0,0,0,2,0
338 .long 0
339.size .OPENSSL_instrument_bus_mfspr268,.-.OPENSSL_instrument_bus_mfspr268
340
341.globl .OPENSSL_instrument_bus2_mfspr268
342.align 4
343.OPENSSL_instrument_bus2_mfspr268:
344 mr r0,$cnt
345 slwi $cnt,$cnt,2
346
347 mfspr $lasttick,268 # collect 1st tick
348 li $diff,0
349
350 dcbf 0,$out # flush cache line
351 lwarx $tick,0,$out # load and lock
352 add $tick,$tick,$diff
353 stwcx. $tick,0,$out
354 stwx $tick,0,$out
355
356 mfspr $tick,268 # collect 1st diff
357 sub $diff,$tick,$lasttick
358 mr $lasttick,$tick
359 mr $lastdiff,$diff
360Loop4:
361 dcbf 0,$out # flush cache line
362 lwarx $tick,0,$out # load and lock
363 add $tick,$tick,$diff
364 stwcx. $tick,0,$out
365 stwx $tick,0,$out
366
367 addic. $max,$max,-1
368 beq Ldone4
369
370 mfspr $tick,268
371 sub $diff,$tick,$lasttick
372 mr $lasttick,$tick
373 cmplw 7,$diff,$lastdiff
374 mr $lastdiff,$diff
375
376 mfcr $tick # pull cr
377 not $tick,$tick # flip bits
378 rlwinm $tick,$tick,1,29,29 # isolate flipped eq bit and scale
379
380 sub. $cnt,$cnt,$tick # conditional --$cnt
381 add $out,$out,$tick # conditional ++$out
382 bne Loop4
383
384Ldone4:
385 srwi $cnt,$cnt,2
386 sub r3,r0,$cnt
387 blr
388 .long 0
389 .byte 0,12,0x14,0,0,0,3,0
390 .long 0
391.size .OPENSSL_instrument_bus2_mfspr268,.-.OPENSSL_instrument_bus2_mfspr268
5fabb88a
AP
392___
393}
9c9c83cc
AP
394
395$code =~ s/\`([^\`]*)\`/eval $1/gem;
396print $code;
a21314db 397close STDOUT or die "error closing STDOUT: $!";