]> git.ipfire.org Git - thirdparty/openssl.git/blob - crypto/ppccpuid.pl
ppccpuid.pl: branch hints in OPENSSL_cleanse impact small block performance
[thirdparty/openssl.git] / crypto / ppccpuid.pl
1 #!/usr/bin/env perl
2
3 $flavour = shift;
4
5 $0 =~ m/(.*[\/\\])[^\/\\]+$/; $dir=$1;
6 ( $xlate="${dir}ppc-xlate.pl" and -f $xlate ) or
7 ( $xlate="${dir}perlasm/ppc-xlate.pl" and -f $xlate) or
8 die "can't locate ppc-xlate.pl";
9
10 open STDOUT,"| $^X $xlate $flavour ".shift || die "can't call $xlate: $!";
11
12 if ($flavour=~/64/) {
13 $CMPLI="cmpldi";
14 $SHRLI="srdi";
15 $SIGNX="extsw";
16 } else {
17 $CMPLI="cmplwi";
18 $SHRLI="srwi";
19 $SIGNX="mr";
20 }
21
22 $code=<<___;
23 .machine "any"
24 .text
25
26 .globl .OPENSSL_ppc64_probe
27 .align 4
28 .OPENSSL_ppc64_probe:
29 fcfid f1,f1
30 extrdi r0,r0,32,0
31 blr
32 .long 0
33 .byte 0,12,0x14,0,0,0,0,0
34
35 .globl .OPENSSL_altivec_probe
36 .align 4
37 .OPENSSL_altivec_probe:
38 .long 0x10000484 # vor v0,v0,v0
39 blr
40 .long 0
41 .byte 0,12,0x14,0,0,0,0,0
42
43 .globl .OPENSSL_wipe_cpu
44 .align 4
45 .OPENSSL_wipe_cpu:
46 xor r0,r0,r0
47 fmr f0,f31
48 fmr f1,f31
49 fmr f2,f31
50 mr r3,r1
51 fmr f3,f31
52 xor r4,r4,r4
53 fmr f4,f31
54 xor r5,r5,r5
55 fmr f5,f31
56 xor r6,r6,r6
57 fmr f6,f31
58 xor r7,r7,r7
59 fmr f7,f31
60 xor r8,r8,r8
61 fmr f8,f31
62 xor r9,r9,r9
63 fmr f9,f31
64 xor r10,r10,r10
65 fmr f10,f31
66 xor r11,r11,r11
67 fmr f11,f31
68 xor r12,r12,r12
69 fmr f12,f31
70 fmr f13,f31
71 blr
72 .long 0
73 .byte 0,12,0x14,0,0,0,0,0
74
75 .globl .OPENSSL_atomic_add
76 .align 4
77 .OPENSSL_atomic_add:
78 Ladd: lwarx r5,0,r3
79 add r0,r4,r5
80 stwcx. r0,0,r3
81 bne- Ladd
82 $SIGNX r3,r0
83 blr
84 .long 0
85 .byte 0,12,0x14,0,0,0,2,0
86 .long 0
87
88 .globl .OPENSSL_rdtsc
89 .align 4
90 .OPENSSL_rdtsc:
91 mftb r3
92 mftbu r4
93 blr
94 .long 0
95 .byte 0,12,0x14,0,0,0,0,0
96
97 .globl .OPENSSL_cleanse
98 .align 4
99 .OPENSSL_cleanse:
100 $CMPLI r4,7
101 li r0,0
102 bge Lot
103 $CMPLI r4,0
104 beqlr-
105 Little: mtctr r4
106 stb r0,0(r3)
107 addi r3,r3,1
108 bdnz \$-8
109 blr
110 Lot: andi. r5,r3,3
111 beq Laligned
112 stb r0,0(r3)
113 subi r4,r4,1
114 addi r3,r3,1
115 b Lot
116 Laligned:
117 $SHRLI r5,r4,2
118 mtctr r5
119 stw r0,0(r3)
120 addi r3,r3,4
121 bdnz \$-8
122 andi. r4,r4,3
123 bne Little
124 blr
125 .long 0
126 .byte 0,12,0x14,0,0,0,2,0
127 .long 0
128 ___
129 {
130 my ($out,$cnt,$max)=("r3","r4","r5");
131 my ($tick,$lasttick)=("r6","r7");
132 my ($diff,$lastdiff)=("r8","r9");
133
134 $code.=<<___;
135 .globl .OPENSSL_instrument_bus
136 .align 4
137 .OPENSSL_instrument_bus:
138 mtctr $cnt
139
140 mftb $lasttick # collect 1st tick
141 li $diff,0
142
143 dcbf 0,$out # flush cache line
144 lwarx $tick,0,$out # load and lock
145 add $tick,$tick,$diff
146 stwcx. $tick,0,$out
147 stwx $tick,0,$out
148
149 Loop: mftb $tick
150 sub $diff,$tick,$lasttick
151 mr $lasttick,$tick
152 dcbf 0,$out # flush cache line
153 lwarx $tick,0,$out # load and lock
154 add $tick,$tick,$diff
155 stwcx. $tick,0,$out
156 stwx $tick,0,$out
157 addi $out,$out,4 # ++$out
158 bdnz Loop
159
160 mr r3,$cnt
161 blr
162 .long 0
163 .byte 0,12,0x14,0,0,0,2,0
164 .long 0
165
166 .globl .OPENSSL_instrument_bus2
167 .align 4
168 .OPENSSL_instrument_bus2:
169 mr r0,$cnt
170 slwi $cnt,$cnt,2
171
172 mftb $lasttick # collect 1st tick
173 li $diff,0
174
175 dcbf 0,$out # flush cache line
176 lwarx $tick,0,$out # load and lock
177 add $tick,$tick,$diff
178 stwcx. $tick,0,$out
179 stwx $tick,0,$out
180
181 mftb $tick # collect 1st diff
182 sub $diff,$tick,$lasttick
183 mr $lasttick,$tick
184 mr $lastdiff,$diff
185 Loop2:
186 dcbf 0,$out # flush cache line
187 lwarx $tick,0,$out # load and lock
188 add $tick,$tick,$diff
189 stwcx. $tick,0,$out
190 stwx $tick,0,$out
191
192 addic. $max,$max,-1
193 beq Ldone2
194
195 mftb $tick
196 sub $diff,$tick,$lasttick
197 mr $lasttick,$tick
198 cmplw 7,$diff,$lastdiff
199 mr $lastdiff,$diff
200
201 mfcr $tick # pull cr
202 not $tick,$tick # flip bits
203 rlwinm $tick,$tick,1,29,29 # isolate flipped eq bit and scale
204
205 sub. $cnt,$cnt,$tick # conditional --$cnt
206 add $out,$out,$tick # conditional ++$out
207 bne Loop2
208
209 Ldone2:
210 srwi $cnt,$cnt,2
211 sub r3,r0,$cnt
212 blr
213 .long 0
214 .byte 0,12,0x14,0,0,0,3,0
215 .long 0
216 ___
217 }
218
219 $code =~ s/\`([^\`]*)\`/eval $1/gem;
220 print $code;
221 close STDOUT;