]> git.ipfire.org Git - thirdparty/openssl.git/blame - crypto/modes/asm/ghashp8-ppc.pl
Add OpenSSL copyright to .pl files
[thirdparty/openssl.git] / crypto / modes / asm / ghashp8-ppc.pl
CommitLineData
6aa36e8e
RS
1#! /usr/bin/env perl
2# Copyright 2014-2016 The OpenSSL Project Authors. All Rights Reserved.
3#
4# Licensed under the OpenSSL license (the "License"). You may not use
5# this file except in compliance with the License. You can obtain a copy
6# in the file LICENSE in the source distribution or at
7# https://www.openssl.org/source/license.html
8
f5b798f5
AP
9#
10# ====================================================================
11# Written by Andy Polyakov <appro@openssl.org> for the OpenSSL
12# project. The module is, however, dual licensed under OpenSSL and
13# CRYPTOGAMS licenses depending on where you obtain it. For further
14# details see http://www.openssl.org/~appro/cryptogams/.
15# ====================================================================
16#
17# GHASH for for PowerISA v2.07.
18#
19# July 2014
20#
21# Accurate performance measurements are problematic, because it's
22# always virtualized setup with possibly throttled processor.
23# Relative comparison is therefore more informative. This initial
24# version is ~2.1x slower than hardware-assisted AES-128-CTR, ~12x
25# faster than "4-bit" integer-only compiler-generated 64-bit code.
26# "Initial version" means that there is room for futher improvement.
27
28$flavour=shift;
29$output =shift;
30
31if ($flavour =~ /64/) {
32 $SIZE_T=8;
33 $LRSAVE=2*$SIZE_T;
34 $STU="stdu";
35 $POP="ld";
36 $PUSH="std";
37} elsif ($flavour =~ /32/) {
38 $SIZE_T=4;
39 $LRSAVE=$SIZE_T;
40 $STU="stwu";
41 $POP="lwz";
42 $PUSH="stw";
43} else { die "nonsense $flavour"; }
44
45$0 =~ m/(.*[\/\\])[^\/\\]+$/; $dir=$1;
46( $xlate="${dir}ppc-xlate.pl" and -f $xlate ) or
47( $xlate="${dir}../../perlasm/ppc-xlate.pl" and -f $xlate) or
48die "can't locate ppc-xlate.pl";
49
50open STDOUT,"| $^X $xlate $flavour $output" || die "can't call $xlate: $!";
51
52my ($Xip,$Htbl,$inp,$len)=map("r$_",(3..6)); # argument block
53
54my ($Xl,$Xm,$Xh,$IN)=map("v$_",(0..3));
55my ($zero,$t0,$t1,$t2,$xC2,$H,$Hh,$Hl,$lemask)=map("v$_",(4..12));
56my $vrsave="r12";
57
58$code=<<___;
59.machine "any"
60
61.text
62
63.globl .gcm_init_p8
64.align 5
65.gcm_init_p8:
66 lis r0,0xfff0
67 li r8,0x10
68 mfspr $vrsave,256
69 li r9,0x20
70 mtspr 256,r0
71 li r10,0x30
72 lvx_u $H,0,r4 # load H
73
74 vspltisb $xC2,-16 # 0xf0
75 vspltisb $t0,1 # one
76 vaddubm $xC2,$xC2,$xC2 # 0xe0
77 vxor $zero,$zero,$zero
78 vor $xC2,$xC2,$t0 # 0xe1
79 vsldoi $xC2,$xC2,$zero,15 # 0xe1...
80 vsldoi $t1,$zero,$t0,1 # ...1
81 vaddubm $xC2,$xC2,$xC2 # 0xc2...
82 vspltisb $t2,7
83 vor $xC2,$xC2,$t1 # 0xc2....01
84 vspltb $t1,$H,0 # most significant byte
85 vsl $H,$H,$t0 # H<<=1
86 vsrab $t1,$t1,$t2 # broadcast carry bit
87 vand $t1,$t1,$xC2
88 vxor $H,$H,$t1 # twisted H
89
90 vsldoi $H,$H,$H,8 # twist even more ...
91 vsldoi $xC2,$zero,$xC2,8 # 0xc2.0
92 vsldoi $Hl,$zero,$H,8 # ... and split
93 vsldoi $Hh,$H,$zero,8
94
95 stvx_u $xC2,0,r3 # save pre-computed table
96 stvx_u $Hl,r8,r3
97 stvx_u $H, r9,r3
98 stvx_u $Hh,r10,r3
99
100 mtspr 256,$vrsave
101 blr
102 .long 0
103 .byte 0,12,0x14,0,0,0,2,0
104 .long 0
105.size .gcm_init_p8,.-.gcm_init_p8
106
107.globl .gcm_gmult_p8
108.align 5
109.gcm_gmult_p8:
110 lis r0,0xfff8
111 li r8,0x10
112 mfspr $vrsave,256
113 li r9,0x20
114 mtspr 256,r0
115 li r10,0x30
116 lvx_u $IN,0,$Xip # load Xi
117
118 lvx_u $Hl,r8,$Htbl # load pre-computed table
119 le?lvsl $lemask,r0,r0
120 lvx_u $H, r9,$Htbl
121 le?vspltisb $t0,0x07
122 lvx_u $Hh,r10,$Htbl
123 le?vxor $lemask,$lemask,$t0
124 lvx_u $xC2,0,$Htbl
125 le?vperm $IN,$IN,$IN,$lemask
126 vxor $zero,$zero,$zero
127
053fa39a
RL
128 vpmsumd $Xl,$IN,$Hl # H.lo·Xi.lo
129 vpmsumd $Xm,$IN,$H # H.hi·Xi.lo+H.lo·Xi.hi
130 vpmsumd $Xh,$IN,$Hh # H.hi·Xi.hi
f5b798f5
AP
131
132 vpmsumd $t2,$Xl,$xC2 # 1st phase
133
134 vsldoi $t0,$Xm,$zero,8
135 vsldoi $t1,$zero,$Xm,8
136 vxor $Xl,$Xl,$t0
137 vxor $Xh,$Xh,$t1
138
139 vsldoi $Xl,$Xl,$Xl,8
140 vxor $Xl,$Xl,$t2
141
142 vsldoi $t1,$Xl,$Xl,8 # 2nd phase
143 vpmsumd $Xl,$Xl,$xC2
144 vxor $t1,$t1,$Xh
145 vxor $Xl,$Xl,$t1
146
147 le?vperm $Xl,$Xl,$Xl,$lemask
148 stvx_u $Xl,0,$Xip # write out Xi
149
150 mtspr 256,$vrsave
151 blr
152 .long 0
153 .byte 0,12,0x14,0,0,0,2,0
154 .long 0
155.size .gcm_gmult_p8,.-.gcm_gmult_p8
156
157.globl .gcm_ghash_p8
158.align 5
159.gcm_ghash_p8:
160 lis r0,0xfff8
161 li r8,0x10
162 mfspr $vrsave,256
163 li r9,0x20
164 mtspr 256,r0
165 li r10,0x30
166 lvx_u $Xl,0,$Xip # load Xi
167
168 lvx_u $Hl,r8,$Htbl # load pre-computed table
169 le?lvsl $lemask,r0,r0
170 lvx_u $H, r9,$Htbl
171 le?vspltisb $t0,0x07
172 lvx_u $Hh,r10,$Htbl
173 le?vxor $lemask,$lemask,$t0
174 lvx_u $xC2,0,$Htbl
175 le?vperm $Xl,$Xl,$Xl,$lemask
176 vxor $zero,$zero,$zero
177
178 lvx_u $IN,0,$inp
179 addi $inp,$inp,16
180 subi $len,$len,16
181 le?vperm $IN,$IN,$IN,$lemask
182 vxor $IN,$IN,$Xl
183 b Loop
184
185.align 5
186Loop:
187 subic $len,$len,16
053fa39a 188 vpmsumd $Xl,$IN,$Hl # H.lo·Xi.lo
f5b798f5 189 subfe. r0,r0,r0 # borrow?-1:0
053fa39a 190 vpmsumd $Xm,$IN,$H # H.hi·Xi.lo+H.lo·Xi.hi
f5b798f5 191 and r0,r0,$len
053fa39a 192 vpmsumd $Xh,$IN,$Hh # H.hi·Xi.hi
f5b798f5
AP
193 add $inp,$inp,r0
194
195 vpmsumd $t2,$Xl,$xC2 # 1st phase
196
197 vsldoi $t0,$Xm,$zero,8
198 vsldoi $t1,$zero,$Xm,8
199 vxor $Xl,$Xl,$t0
200 vxor $Xh,$Xh,$t1
201
202 vsldoi $Xl,$Xl,$Xl,8
203 vxor $Xl,$Xl,$t2
204 lvx_u $IN,0,$inp
205 addi $inp,$inp,16
206
207 vsldoi $t1,$Xl,$Xl,8 # 2nd phase
208 vpmsumd $Xl,$Xl,$xC2
209 le?vperm $IN,$IN,$IN,$lemask
210 vxor $t1,$t1,$Xh
211 vxor $IN,$IN,$t1
212 vxor $IN,$IN,$Xl
213 beq Loop # did $len-=16 borrow?
214
215 vxor $Xl,$Xl,$t1
216 le?vperm $Xl,$Xl,$Xl,$lemask
217 stvx_u $Xl,0,$Xip # write out Xi
218
219 mtspr 256,$vrsave
220 blr
221 .long 0
222 .byte 0,12,0x14,0,0,0,4,0
223 .long 0
224.size .gcm_ghash_p8,.-.gcm_ghash_p8
225
226.asciz "GHASH for PowerISA 2.07, CRYPTOGAMS by <appro\@openssl.org>"
227.align 2
228___
229
230foreach (split("\n",$code)) {
231 if ($flavour =~ /le$/o) { # little-endian
232 s/le\?//o or
233 s/be\?/#be#/o;
234 } else {
235 s/le\?/#le#/o or
236 s/be\?//o;
237 }
238 print $_,"\n";
239}
240
241close STDOUT; # enforce flush