2 # Copyright 2020-2021 The OpenSSL Project Authors. All Rights Reserved.
4 # Licensed under the Apache License 2.0 (the "License"). You may not use
5 # this file except in compliance with the License. You can obtain a copy
6 # in the file LICENSE in the source distribution or at
7 # https://www.openssl.org/source/license.html
10 #========================================================================
11 # Written by Xiaokang Qian <xiaokang.qian@arm.com> for the OpenSSL project,
12 # derived from https://github.com/ARM-software/AArch64cryptolib, original
13 # author Samuel Lee <Samuel.Lee@arm.com>. The module is, however, dual
14 # licensed under OpenSSL and SPDX BSD-3-Clause licenses depending on where you
16 #========================================================================
18 # Approach - We want to reload constants as we have plenty of spare ASIMD slots around crypto units for loading
19 # Unroll x8 in main loop, main loop to act on 8 16B blocks per iteration, and then do modulo of the accumulated
20 # intermediate hashesfrom the 8 blocks.
22 # ____________________________________________________
25 # |____________________________________________________|
27 # | CTR block 8k+13| AES block 8k+8 | GHASH block 8k+0 |
28 # |________________|________________|__________________|
30 # | CTR block 8k+14| AES block 8k+9 | GHASH block 8k+1 |
31 # |________________|________________|__________________|
33 # | CTR block 8k+15| AES block 8k+10| GHASH block 8k+2 |
34 # |________________|________________|__________________|
36 # | CTR block 8k+16| AES block 8k+11| GHASH block 8k+3 |
37 # |________________|________________|__________________|
39 # | CTR block 8k+17| AES block 8k+12| GHASH block 8k+4 |
40 # |________________|________________|__________________|
42 # | CTR block 8k+18| AES block 8k+13| GHASH block 8k+5 |
43 # |________________|________________|__________________|
45 # | CTR block 8k+19| AES block 8k+14| GHASH block 8k+6 |
46 # |________________|________________|__________________|
48 # | CTR block 8k+20| AES block 8k+15| GHASH block 8k+7 |
49 # |________________|____(mostly)____|__________________|
52 # |____________________________________________________|
55 # Ensure previous generated intermediate hash is aligned and merged with result for GHASH 4k+0
56 # EXT low_acc, low_acc, low_acc, #8
57 # EOR res_curr (8k+0), res_curr (4k+0), low_acc
60 # Increment and byte reverse counter in scalar registers and transfer to SIMD registers
61 # REV ctr32, rev_ctr32
62 # ORR ctr64, constctr96_top32, ctr32, LSL #32
63 # INS ctr_next.d[0], constctr96_bottom64 // Keeping this in scalar registers to free up space in SIMD RF
64 # INS ctr_next.d[1], ctr64X
68 # Do AES encryption/decryption on CTR block X and EOR it with input block X. Take 256 bytes key below for example.
69 # Doing small trick here of loading input in scalar registers, EORing with last key and then transferring
70 # Given we are very constrained in our ASIMD registers this is quite important
73 # LDR input_low, [ input_ptr ], #8
74 # LDR input_high, [ input_ptr ], #8
75 # EOR input_low, k14_low
76 # EOR input_high, k14_high
77 # INS res_curr.d[0], input_low
78 # INS res_curr.d[1], input_high
79 # AESE ctr_curr, k0; AESMC ctr_curr, ctr_curr
80 # AESE ctr_curr, k1; AESMC ctr_curr, ctr_curr
81 # AESE ctr_curr, k2; AESMC ctr_curr, ctr_curr
82 # AESE ctr_curr, k3; AESMC ctr_curr, ctr_curr
83 # AESE ctr_curr, k4; AESMC ctr_curr, ctr_curr
84 # AESE ctr_curr, k5; AESMC ctr_curr, ctr_curr
85 # AESE ctr_curr, k6; AESMC ctr_curr, ctr_curr
86 # AESE ctr_curr, k7; AESMC ctr_curr, ctr_curr
87 # AESE ctr_curr, k8; AESMC ctr_curr, ctr_curr
88 # AESE ctr_curr, k9; AESMC ctr_curr, ctr_curr
89 # AESE ctr_curr, k10; AESMC ctr_curr, ctr_curr
90 # AESE ctr_curr, k11; AESMC ctr_curr, ctr_curr
91 # AESE ctr_curr, k12; AESMC ctr_curr, ctr_curr
93 # EOR res_curr, res_curr, ctr_curr
94 # ST1 { res_curr.16b }, [ output_ptr ], #16
97 # AESE ctr_curr, k0; AESMC ctr_curr, ctr_curr
98 # AESE ctr_curr, k1; AESMC ctr_curr, ctr_curr
99 # AESE ctr_curr, k2; AESMC ctr_curr, ctr_curr
100 # AESE ctr_curr, k3; AESMC ctr_curr, ctr_curr
101 # AESE ctr_curr, k4; AESMC ctr_curr, ctr_curr
102 # AESE ctr_curr, k5; AESMC ctr_curr, ctr_curr
103 # AESE ctr_curr, k6; AESMC ctr_curr, ctr_curr
104 # AESE ctr_curr, k7; AESMC ctr_curr, ctr_curr
105 # AESE ctr_curr, k8; AESMC ctr_curr, ctr_curr
106 # AESE ctr_curr, k9; AESMC ctr_curr, ctr_curr
107 # AESE ctr_curr, k10; AESMC ctr_curr, ctr_curr
108 # AESE ctr_curr, k11; AESMC ctr_curr, ctr_curr
109 # AESE ctr_curr, k12; AESMC ctr_curr, ctr_curr
111 # LDR res_curr, [ input_ptr ], #16
112 # EOR res_curr, res_curr, ctr_curr
113 # MOV output_low, res_curr.d[0]
114 # MOV output_high, res_curr.d[1]
115 # EOR output_low, k14_low
116 # EOR output_high, k14_high
117 # STP output_low, output_high, [ output_ptr ], #16
120 # Do 128b karatsuba polynomial multiplication on block
121 # We only have 64b->128b polynomial multipliers, naively that means we need to do 4 64b multiplies to generate a 128b
124 # Pmull(A,B) == (Pmull(Ah,Bh)<<128 | Pmull(Al,Bl)) ^ (Pmull(Ah,Bl) ^ Pmull(Al,Bh))<<64
126 # The idea behind Karatsuba multiplication is that we can do just 3 64b multiplies:
127 # Pmull(A,B) == (Pmull(Ah,Bh)<<128 | Pmull(Al,Bl)) ^ (Pmull(Ah^Al,Bh^Bl) ^ Pmull(Ah,Bh) ^ Pmull(Al,Bl))<<64
129 # There is some complication here because the bit order of GHASH's PMULL is reversed compared to elsewhere, so we are
130 # multiplying with "twisted" powers of H
132 # Note: We can PMULL directly into the acc_x in first GHASH of the loop
133 # Note: For scheduling big cores we want to split the processing to happen over two loop iterations - otherwise the critical
134 # path latency dominates the performance
136 # This has a knock on effect on register pressure, so we have to be a bit more clever with our temporary registers
137 # than indicated here
138 # REV64 res_curr, res_curr
139 # INS t_m.d[0], res_curr.d[1]
140 # EOR t_m.8B, t_m.8B, res_curr.8B
141 # PMULL2 t_h, res_curr, HX
142 # PMULL t_l, res_curr, HX
143 # PMULL t_m, t_m, HX_k
144 # EOR acc_h, acc_h, t_h
145 # EOR acc_l, acc_l, t_l
146 # EOR acc_m, acc_m, t_m
148 # MODULO: take the partial accumulators (~representing sum of 256b multiplication results), from GHASH and do modulo reduction on them
149 # There is some complication here because the bit order of GHASH's PMULL is reversed compared to elsewhere, so we are doing modulo
150 # with a reversed constant
151 # EOR3 acc_m, acc_m, acc_l, acc_h // Finish off karatsuba processing
152 # PMULL t_mod, acc_h, mod_constant
153 # EXT acc_h, acc_h, acc_h, #8
154 # EOR3 acc_m, acc_m, t_mod, acc_h
155 # PMULL acc_h, acc_m, mod_constant
156 # EXT acc_m, acc_m, acc_m, #8
157 # EOR3 acc_l, acc_l, acc_m, acc_h
159 $output = $#ARGV >= 0 && $ARGV[$#ARGV] =~ m
|\
.\w
+$| ?
pop : undef;
160 $flavour = $#ARGV >= 0 && $ARGV[0] !~ m
|\
.| ?
shift : undef;
162 $0 =~ m/(.*[\/\\])[^\
/\\]+$/; $dir=$1;
163 ( $xlate="${dir}arm-xlate.pl" and -f
$xlate ) or
164 ( $xlate="${dir}../../perlasm/arm-xlate.pl" and -f
$xlate ) or
165 die "can't locate arm-xlate.pl";
167 die "only for 64 bit" if $flavour !~ /64/;
169 open OUT
,"| \"$^X\" $xlate $flavour $output";
173 #include "arm_arch.h"
175 #if __ARM_MAX_ARCH__>=8
177 $code.=".arch armv8.2-a+crypto\n.text\n";
179 $input_ptr="x0"; #argument block
184 $constant_temp="x15";
185 $modulo_constant="x10";
188 my ($end_input_ptr,$main_end_input_ptr,$temp0_x,$temp1_x)=map("x$_",(4..7));
189 my ($temp2_x,$temp3_x)=map("x$_",(13..14));
190 my ($ctr0b,$ctr1b,$ctr2b,$ctr3b,$ctr4b,$ctr5b,$ctr6b,$ctr7b,$res0b,$res1b,$res2b,$res3b,$res4b,$res5b,$res6b,$res7b)=map("v$_.16b",(0..15));
191 my ($ctr0,$ctr1,$ctr2,$ctr3,$ctr4,$ctr5,$ctr6,$ctr7,$res0,$res1,$res2,$res3,$res4,$res5,$res6,$res7)=map("v$_",(0..15));
192 my ($ctr0d,$ctr1d,$ctr2d,$ctr3d,$ctr4d,$ctr5d,$ctr6d,$ctr7d)=map("d$_",(0..7));
193 my ($ctr0q,$ctr1q,$ctr2q,$ctr3q,$ctr4q,$ctr5q,$ctr6q,$ctr7q)=map("q$_",(0..7));
194 my ($res0q,$res1q,$res2q,$res3q,$res4q,$res5q,$res6q,$res7q)=map("q$_",(8..15));
196 my ($ctr_t0,$ctr_t1,$ctr_t2,$ctr_t3,$ctr_t4,$ctr_t5,$ctr_t6,$ctr_t7)=map("v$_",(8..15));
197 my ($ctr_t0b,$ctr_t1b,$ctr_t2b,$ctr_t3b,$ctr_t4b,$ctr_t5b,$ctr_t6b,$ctr_t7b)=map("v$_.16b",(8..15));
198 my ($ctr_t0q,$ctr_t1q,$ctr_t2q,$ctr_t3q,$ctr_t4q,$ctr_t5q,$ctr_t6q,$ctr_t7q)=map("q$_",(8..15));
200 my ($acc_hb,$acc_mb,$acc_lb)=map("v$_.16b",(17..19));
201 my ($acc_h,$acc_m,$acc_l)=map("v$_",(17..19));
203 my ($h1,$h12k,$h2,$h3,$h34k,$h4)=map("v$_",(20..25));
204 my ($h5,$h56k,$h6,$h7,$h78k,$h8)=map("v$_",(20..25));
205 my ($h1q,$h12kq,$h2q,$h3q,$h34kq,$h4q)=map("q$_",(20..25));
206 my ($h5q,$h56kq,$h6q,$h7q,$h78kq,$h8q)=map("q$_",(20..25));
232 my $mod_constantd=$t0d;
233 my $mod_constant=$t0;
235 my ($rk0,$rk1,$rk2)=map("v$_.16b",(26..28));
236 my ($rk3,$rk4,$rk5)=map("v$_.16b",(26..28));
237 my ($rk6,$rk7,$rk8)=map("v$_.16b",(26..28));
238 my ($rk9,$rk10,$rk11)=map("v$_.16b",(26..28));
239 my ($rk12,$rk13,$rk14)=map("v$_.16b",(26..28));
240 my ($rk0q,$rk1q,$rk2q)=map("q$_",(26..28));
241 my ($rk3q,$rk4q,$rk5q)=map("q$_",(26..28));
242 my ($rk6q,$rk7q,$rk8q)=map("q$_",(26..28));
243 my ($rk9q,$rk10q,$rk11q)=map("q$_",(26..28));
244 my ($rk12q,$rk13q,$rk14q)=map("q$_",(26..28));
250 #########################################################################################
251 # size_t unroll8_eor3_aes_gcm_enc_128_kernel(const unsigned char *in,
253 # unsigned char *out,
255 # unsigned char ivec[16],
259 .global unroll8_eor3_aes_gcm_enc_128_kernel
260 .type unroll8_eor3_aes_gcm_enc_128_kernel
,%function
262 unroll8_eor3_aes_gcm_enc_128_kernel
:
263 AARCH64_VALID_CALL_TARGET
264 cbz x1
, .L128_enc_ret
265 stp d8
, d9
, [sp
, #-80]!
268 stp d10
, d11
, [sp
, #16]
269 stp d12
, d13
, [sp
, #32]
270 stp d14
, d15
, [sp
, #48]
271 mov x5
, #0xc200000000000000
272 stp x5
, xzr
, [sp
, #64]
273 add
$modulo_constant, sp
, #64
275 mov
$constant_temp, #0x100000000 @ set up counter increment
276 movi
$rctr_inc.16b
, #0x0
277 mov
$rctr_inc.d
[1], $constant_temp
278 lsr
$main_end_input_ptr, $bit_length, #3 @ byte_len
279 ld1
{ $ctr0b}, [$counter] @ CTR block
0
281 sub $main_end_input_ptr, $main_end_input_ptr, #1 @ byte_len - 1
283 and $main_end_input_ptr, $main_end_input_ptr, #0xffffffffffffff80 @ number of bytes to be processed in main loop (at least 1 byte must be handled by tail)
285 rev32
$rtmp_ctr.16b
, $ctr0.16b @ set up reversed counter
287 add
$rtmp_ctr.4s
, $rtmp_ctr.4s
, $rctr_inc.4s @ CTR block
0
289 rev32
$ctr1.16b
, $rtmp_ctr.16b @ CTR block
1
290 add
$rtmp_ctr.4s
, $rtmp_ctr.4s
, $rctr_inc.4s @ CTR block
1
292 rev32
$ctr2.16b
, $rtmp_ctr.16b @ CTR block
2
293 add
$rtmp_ctr.4s
, $rtmp_ctr.4s
, $rctr_inc.4s @ CTR block
2
295 rev32
$ctr3.16b
, $rtmp_ctr.16b @ CTR block
3
296 add
$rtmp_ctr.4s
, $rtmp_ctr.4s
, $rctr_inc.4s @ CTR block
3
298 rev32
$ctr4.16b
, $rtmp_ctr.16b @ CTR block
4
299 add
$rtmp_ctr.4s
, $rtmp_ctr.4s
, $rctr_inc.4s @ CTR block
4
301 rev32
$ctr5.16b
, $rtmp_ctr.16b @ CTR block
5
302 add
$rtmp_ctr.4s
, $rtmp_ctr.4s
, $rctr_inc.4s @ CTR block
5
303 ldp
$rk0q, $rk1q, [$cc, #0] @ load rk0, rk1
305 rev32
$ctr6.16b
, $rtmp_ctr.16b @ CTR block
6
306 add
$rtmp_ctr.4s
, $rtmp_ctr.4s
, $rctr_inc.4s @ CTR block
6
308 rev32
$ctr7.16b
, $rtmp_ctr.16b @ CTR block
7
309 add
$rtmp_ctr.4s
, $rtmp_ctr.4s
, $rctr_inc.4s @ CTR block
7
311 aese
$ctr4b, $rk0 \n aesmc
$ctr4b, $ctr4b @ AES block
4 - round
0
312 aese
$ctr6b, $rk0 \n aesmc
$ctr6b, $ctr6b @ AES block
6 - round
0
313 aese
$ctr3b, $rk0 \n aesmc
$ctr3b, $ctr3b @ AES block
3 - round
0
315 aese
$ctr0b, $rk0 \n aesmc
$ctr0b, $ctr0b @ AES block
0 - round
0
316 aese
$ctr1b, $rk0 \n aesmc
$ctr1b, $ctr1b @ AES block
1 - round
0
317 aese
$ctr2b, $rk0 \n aesmc
$ctr2b, $ctr2b @ AES block
2 - round
0
319 aese
$ctr7b, $rk0 \n aesmc
$ctr7b, $ctr7b @ AES block
7 - round
0
320 aese
$ctr5b, $rk0 \n aesmc
$ctr5b, $ctr5b @ AES block
5 - round
0
321 ldp
$rk2q, $rk3q, [$cc, #32] @ load rk2, rk3
323 aese
$ctr3b, $rk1 \n aesmc
$ctr3b, $ctr3b @ AES block
3 - round
1
325 aese
$ctr7b, $rk1 \n aesmc
$ctr7b, $ctr7b @ AES block
7 - round
1
326 aese
$ctr5b, $rk1 \n aesmc
$ctr5b, $ctr5b @ AES block
5 - round
1
327 aese
$ctr4b, $rk1 \n aesmc
$ctr4b, $ctr4b @ AES block
4 - round
1
329 aese
$ctr2b, $rk1 \n aesmc
$ctr2b, $ctr2b @ AES block
2 - round
1
330 aese
$ctr6b, $rk1 \n aesmc
$ctr6b, $ctr6b @ AES block
6 - round
1
331 aese
$ctr0b, $rk1 \n aesmc
$ctr0b, $ctr0b @ AES block
0 - round
1
333 aese
$ctr5b, $rk2 \n aesmc
$ctr5b, $ctr5b @ AES block
5 - round
2
334 aese
$ctr1b, $rk1 \n aesmc
$ctr1b, $ctr1b @ AES block
1 - round
1
335 aese
$ctr0b, $rk2 \n aesmc
$ctr0b, $ctr0b @ AES block
0 - round
2
337 aese
$ctr2b, $rk2 \n aesmc
$ctr2b, $ctr2b @ AES block
2 - round
2
338 aese
$ctr3b, $rk2 \n aesmc
$ctr3b, $ctr3b @ AES block
3 - round
2
339 aese
$ctr7b, $rk2 \n aesmc
$ctr7b, $ctr7b @ AES block
7 - round
2
341 aese
$ctr1b, $rk2 \n aesmc
$ctr1b, $ctr1b @ AES block
1 - round
2
342 aese
$ctr6b, $rk2 \n aesmc
$ctr6b, $ctr6b @ AES block
6 - round
2
343 aese
$ctr4b, $rk2 \n aesmc
$ctr4b, $ctr4b @ AES block
4 - round
2
345 aese
$ctr2b, $rk3 \n aesmc
$ctr2b, $ctr2b @ AES block
2 - round
3
347 ldp
$rk4q, $rk5q, [$cc, #64] @ load rk4, rk5
348 aese
$ctr5b, $rk3 \n aesmc
$ctr5b, $ctr5b @ AES block
5 - round
3
349 aese
$ctr0b, $rk3 \n aesmc
$ctr0b, $ctr0b @ AES block
0 - round
3
351 aese
$ctr4b, $rk3 \n aesmc
$ctr4b, $ctr4b @ AES block
4 - round
3
352 aese
$ctr3b, $rk3 \n aesmc
$ctr3b, $ctr3b @ AES block
3 - round
3
353 aese
$ctr6b, $rk3 \n aesmc
$ctr6b, $ctr6b @ AES block
6 - round
3
355 aese
$ctr7b, $rk3 \n aesmc
$ctr7b, $ctr7b @ AES block
7 - round
3
357 aese
$ctr6b, $rk4 \n aesmc
$ctr6b, $ctr6b @ AES block
6 - round
4
358 aese
$ctr1b, $rk3 \n aesmc
$ctr1b, $ctr1b @ AES block
1 - round
3
359 aese
$ctr5b, $rk4 \n aesmc
$ctr5b, $ctr5b @ AES block
5 - round
4
361 aese
$ctr7b, $rk4 \n aesmc
$ctr7b, $ctr7b @ AES block
7 - round
4
362 aese
$ctr4b, $rk4 \n aesmc
$ctr4b, $ctr4b @ AES block
4 - round
4
363 aese
$ctr0b, $rk4 \n aesmc
$ctr0b, $ctr0b @ AES block
0 - round
4
365 aese
$ctr1b, $rk4 \n aesmc
$ctr1b, $ctr1b @ AES block
1 - round
4
366 aese
$ctr2b, $rk4 \n aesmc
$ctr2b, $ctr2b @ AES block
2 - round
4
367 aese
$ctr3b, $rk4 \n aesmc
$ctr3b, $ctr3b @ AES block
3 - round
4
369 aese
$ctr7b, $rk5 \n aesmc
$ctr7b, $ctr7b @ AES block
7 - round
5
370 aese
$ctr0b, $rk5 \n aesmc
$ctr0b, $ctr0b @ AES block
0 - round
5
371 ldp
$rk6q, $rk7q, [$cc, #96] @ load rk6, rk7
373 aese
$ctr1b, $rk5 \n aesmc
$ctr1b, $ctr1b @ AES block
1 - round
5
374 aese
$ctr3b, $rk5 \n aesmc
$ctr3b, $ctr3b @ AES block
3 - round
5
375 aese
$ctr2b, $rk5 \n aesmc
$ctr2b, $ctr2b @ AES block
2 - round
5
377 aese
$ctr4b, $rk5 \n aesmc
$ctr4b, $ctr4b @ AES block
4 - round
5
378 aese
$ctr5b, $rk5 \n aesmc
$ctr5b, $ctr5b @ AES block
5 - round
5
379 aese
$ctr6b, $rk5 \n aesmc
$ctr6b, $ctr6b @ AES block
6 - round
5
381 aese
$ctr4b, $rk6 \n aesmc
$ctr4b, $ctr4b @ AES block
4 - round
6
382 aese
$ctr3b, $rk6 \n aesmc
$ctr3b, $ctr3b @ AES block
3 - round
6
383 aese
$ctr2b, $rk6 \n aesmc
$ctr2b, $ctr2b @ AES block
2 - round
6
385 aese
$ctr7b, $rk6 \n aesmc
$ctr7b, $ctr7b @ AES block
7 - round
6
386 aese
$ctr6b, $rk6 \n aesmc
$ctr6b, $ctr6b @ AES block
6 - round
6
387 aese
$ctr5b, $rk6 \n aesmc
$ctr5b, $ctr5b @ AES block
5 - round
6
389 aese
$ctr0b, $rk6 \n aesmc
$ctr0b, $ctr0b @ AES block
0 - round
6
390 aese
$ctr1b, $rk6 \n aesmc
$ctr1b, $ctr1b @ AES block
1 - round
6
391 ldp
$rk8q, $rk9q, [$cc, #128] @ load rk8, rk9
393 aese
$ctr5b, $rk7 \n aesmc
$ctr5b, $ctr5b @ AES block
5 - round
7
395 ld1
{ $acc_lb}, [$current_tag]
396 ext
$acc_lb, $acc_lb, $acc_lb, #8
397 rev64
$acc_lb, $acc_lb
399 aese
$ctr7b, $rk7 \n aesmc
$ctr7b, $ctr7b @ AES block
7 - round
7
401 aese
$ctr4b, $rk7 \n aesmc
$ctr4b, $ctr4b @ AES block
4 - round
7
402 aese
$ctr3b, $rk7 \n aesmc
$ctr3b, $ctr3b @ AES block
3 - round
7
403 aese
$ctr6b, $rk7 \n aesmc
$ctr6b, $ctr6b @ AES block
6 - round
7
405 aese
$ctr1b, $rk7 \n aesmc
$ctr1b, $ctr1b @ AES block
1 - round
7
406 aese
$ctr2b, $rk7 \n aesmc
$ctr2b, $ctr2b @ AES block
2 - round
7
407 aese
$ctr0b, $rk7 \n aesmc
$ctr0b, $ctr0b @ AES block
0 - round
7
409 aese
$ctr3b, $rk8 \n aesmc
$ctr3b, $ctr3b @ AES block
8k
+11 - round
8
410 aese
$ctr6b, $rk8 \n aesmc
$ctr6b, $ctr6b @ AES block
8k
+14 - round
8
411 aese
$ctr2b, $rk8 \n aesmc
$ctr2b, $ctr2b @ AES block
8k
+10 - round
8
413 aese
$ctr7b, $rk8 \n aesmc
$ctr7b, $ctr7b @ AES block
8k
+15 - round
8
414 aese
$ctr0b, $rk8 \n aesmc
$ctr0b, $ctr0b @ AES block
8k
+8 - round
8
415 ldr
$rk10q, [$cc, #160] @ load rk10
417 aese
$ctr3b, $rk9 @ AES block
8k
+11 - round
9
418 aese
$ctr4b, $rk8 \n aesmc
$ctr4b, $ctr4b @ AES block
8k
+12 - round
8
419 aese
$ctr2b, $rk9 @ AES block
8k
+10 - round
9
421 aese
$ctr5b, $rk8 \n aesmc
$ctr5b, $ctr5b @ AES block
8k
+13 - round
8
422 aese
$ctr1b, $rk8 \n aesmc
$ctr1b, $ctr1b @ AES block
8k
+9 - round
8
423 aese
$ctr6b, $rk9 @ AES block
8k
+14 - round
9
425 aese
$ctr4b, $rk9 @ AES block
8k
+12 - round
9
426 add
$main_end_input_ptr, $main_end_input_ptr, $input_ptr
427 aese
$ctr0b, $rk9 @ AES block
8k
+8 - round
9
429 aese
$ctr7b, $rk9 @ AES block
8k
+15 - round
9
430 aese
$ctr5b, $rk9 @ AES block
8k
+13 - round
9
431 aese
$ctr1b, $rk9 @ AES block
8k
+9 - round
9
433 add
$end_input_ptr, $input_ptr, $bit_length, lsr
#3 @ end_input_ptr
434 cmp $input_ptr, $main_end_input_ptr @ check
if we have
<= 8 blocks
435 b
.ge .L128_enc_tail @ handle tail
437 ldp
$ctr_t0q, $ctr_t1q, [$input_ptr], #32 @ AES block 0, 1 - load plaintext
439 ldp
$ctr_t2q, $ctr_t3q, [$input_ptr], #32 @ AES block 2, 3 - load plaintext
441 ldp
$ctr_t4q, $ctr_t5q, [$input_ptr], #32 @ AES block 4, 5 - load plaintext
443 ldp
$ctr_t6q, $ctr_t7q, [$input_ptr], #32 @ AES block 6, 7 - load plaintext
444 cmp $input_ptr, $main_end_input_ptr @ check
if we have
<= 8 blocks
446 eor3
$res0b, $ctr_t0b, $ctr0b, $rk10 @ AES block
0 - result
447 rev32
$ctr0.16b
, $rtmp_ctr.16b @ CTR block
8
448 add
$rtmp_ctr.4s
, $rtmp_ctr.4s
, $rctr_inc.4s @ CTR block
8
450 eor3
$res1b, $ctr_t1b, $ctr1b, $rk10 @ AES block
1 - result
451 stp
$res0q, $res1q, [$output_ptr], #32 @ AES block 0, 1 - store result
453 rev32
$ctr1.16b
, $rtmp_ctr.16b @ CTR block
9
454 eor3
$res5b, $ctr_t5b, $ctr5b, $rk10 @ AES block
5 - result
455 add
$rtmp_ctr.4s
, $rtmp_ctr.4s
, $rctr_inc.4s @ CTR block
9
457 eor3
$res2b, $ctr_t2b, $ctr2b, $rk10 @ AES block
2 - result
458 eor3
$res6b, $ctr_t6b, $ctr6b, $rk10 @ AES block
6 - result
459 eor3
$res4b, $ctr_t4b, $ctr4b, $rk10 @ AES block
4 - result
461 rev32
$ctr2.16b
, $rtmp_ctr.16b @ CTR block
10
462 add
$rtmp_ctr.4s
, $rtmp_ctr.4s
, $rctr_inc.4s @ CTR block
10
464 eor3
$res3b, $ctr_t3b, $ctr3b, $rk10 @ AES block
3 - result
465 eor3
$res7b, $ctr_t7b, $ctr7b,$rk10 @ AES block
7 - result
466 stp
$res2q, $res3q, [$output_ptr], #32 @ AES block 2, 3 - store result
468 rev32
$ctr3.16b
, $rtmp_ctr.16b @ CTR block
11
469 add
$rtmp_ctr.4s
, $rtmp_ctr.4s
, $rctr_inc.4s @ CTR block
11
470 stp
$res4q, $res5q, [$output_ptr], #32 @ AES block 4, 5 - store result
472 stp
$res6q, $res7q, [$output_ptr], #32 @ AES block 6, 7 - store result
474 rev32
$ctr4.16b
, $rtmp_ctr.16b @ CTR block
12
475 add
$rtmp_ctr.4s
, $rtmp_ctr.4s
, $rctr_inc.4s @ CTR block
12
476 b
.ge .L128_enc_prepretail @
do prepretail
478 .L128_enc_main_loop
: @ main
loop start
479 rev32
$ctr5.16b
, $rtmp_ctr.16b @ CTR block
8k
+13
480 ldr
$h5q, [$current_tag, #128] @ load h5l | h5h
481 ext
$h5.16b
, $h5.16b
, $h5.16b
, #8
482 ldr
$h6q, [$current_tag, #160] @ load h6l | h6h
483 ext
$h6.16b
, $h6.16b
, $h6.16b
, #8
484 add
$rtmp_ctr.4s
, $rtmp_ctr.4s
, $rctr_inc.4s @ CTR block
8k
+13
486 rev64
$res1b, $res1b @ GHASH block
8k
+1
487 rev64
$res0b, $res0b @ GHASH block
8k
488 ldr
$h7q, [$current_tag, #176] @ load h7l | h7h
489 ext
$h7.16b
, $h7.16b
, $h7.16b
, #8
490 ldr
$h8q, [$current_tag, #208] @ load h8l | h8h
491 ext
$h8.16b
, $h8.16b
, $h8.16b
, #8
493 rev32
$ctr6.16b
, $rtmp_ctr.16b @ CTR block
8k
+14
494 add
$rtmp_ctr.4s
, $rtmp_ctr.4s
, $rctr_inc.4s @ CTR block
8k
+14
495 ext
$acc_lb, $acc_lb, $acc_lb, #8 @ PRE 0
497 ldr
$h56kq, [$current_tag, #144] @ load h6k | h5k
498 ldr
$h78kq, [$current_tag, #192] @ load h8k | h7k
499 rev64
$res5b, $res5b @ GHASH block
8k
+5 (t0
, t1
, t2
and t3 free
)
500 rev64
$res3b, $res3b @ GHASH block
8k
+3
502 ldp
$rk0q, $rk1q, [$cc, #0] @ load rk0, rk1
503 eor
$res0b, $res0b, $acc_lb @ PRE
1
504 rev32
$ctr7.16b
, $rtmp_ctr.16b @ CTR block
8k
+15
506 rev64
$res7b, $res7b @ GHASH block
8k
+7 (t0
, t1
, t2
and t3 free
)
508 pmull2
$t0.1q
, $res1.2d
, $h7.2d @ GHASH block
8k
+1 - high
509 rev64
$res2b, $res2b @ GHASH block
8k
+2
510 pmull2
$acc_h.1q
, $res0.2d
, $h8.2d @ GHASH block
8k
- high
512 pmull
$h7.1q
, $res1.1d
, $h7.1d @ GHASH block
8k
+1 - low
513 trn1
$acc_m.2d
, $res1.2d
, $res0.2d @ GHASH block
8k
, 8k
+1 - mid
514 pmull
$acc_l.1q
, $res0.1d
, $h8.1d @ GHASH block
8k
- low
516 trn2
$res0.2d
, $res1.2d
, $res0.2d @ GHASH block
8k
, 8k
+1 - mid
517 pmull2
$t1.1q
, $res2.2d
, $h6.2d @ GHASH block
8k
+2 - high
518 pmull2
$t2.1q
, $res3.2d
, $h5.2d @ GHASH block
8k
+3 - high
520 eor
$acc_lb, $acc_lb, $h7.16b @ GHASH block
8k
+1 - low
521 ldr
$h3q, [$current_tag, #80] @ load h3l | h3h
522 ext
$h3.16b
, $h3.16b
, $h3.16b
, #8
523 ldr
$h4q, [$current_tag, #112] @ load h3l | h3h
524 ext
$h4.16b
, $h4.16b
, $h4.16b
, #8
525 aese
$ctr5b, $rk0 \n aesmc
$ctr5b, $ctr5b @ AES block
8k
+13 - round
0
527 aese
$ctr1b, $rk0 \n aesmc
$ctr1b, $ctr1b @ AES block
8k
+9 - round
0
528 aese
$ctr4b, $rk0 \n aesmc
$ctr4b, $ctr4b @ AES block
8k
+12 - round
0
529 eor
$acc_hb, $acc_hb, $t0.16b @ GHASH block
8k
+1 - high
531 add
$rtmp_ctr.4s
, $rtmp_ctr.4s
, $rctr_inc.4s @ CTR block
8k
+15
532 aese
$ctr2b, $rk0 \n aesmc
$ctr2b, $ctr2b @ AES block
8k
+10 - round
0
533 eor
$res0.16b
, $res0.16b
, $acc_m.16b @ GHASH block
8k
, 8k
+1 - mid
535 aese
$ctr6b, $rk0 \n aesmc
$ctr6b, $ctr6b @ AES block
8k
+14 - round
0
536 aese
$ctr1b, $rk1 \n aesmc
$ctr1b, $ctr1b @ AES block
8k
+9 - round
1
537 aese
$ctr0b, $rk0 \n aesmc
$ctr0b, $ctr0b @ AES block
8k
+8 - round
0
539 aese
$ctr2b, $rk1 \n aesmc
$ctr2b, $ctr2b @ AES block
8k
+10 - round
1
540 aese
$ctr3b, $rk0 \n aesmc
$ctr3b, $ctr3b @ AES block
8k
+11 - round
0
541 pmull
$h6.1q
, $res2.1d
, $h6.1d @ GHASH block
8k
+2 - low
543 aese
$ctr5b, $rk1 \n aesmc
$ctr5b, $ctr5b @ AES block
8k
+13 - round
1
544 aese
$ctr7b, $rk0 \n aesmc
$ctr7b, $ctr7b @ AES block
8k
+15 - round
0
545 aese
$ctr0b, $rk1 \n aesmc
$ctr0b, $ctr0b @ AES block
8k
+8 - round
1
547 eor3
$acc_hb, $acc_hb, $t1.16b
,$t2.16b @ GHASH block
8k
+2, 8k
+3 - high
548 trn1
$t3.2d
, $res3.2d
, $res2.2d @ GHASH block
8k
+2, 8k
+3 - mid
549 trn2
$res2.2d
, $res3.2d
, $res2.2d @ GHASH block
8k
+2, 8k
+3 - mid
551 ldp
$rk2q, $rk3q, [$cc, #32] @ load rk2, rk3
552 aese
$ctr4b, $rk1 \n aesmc
$ctr4b, $ctr4b @ AES block
8k
+12 - round
1
553 aese
$ctr3b, $rk1 \n aesmc
$ctr3b, $ctr3b @ AES block
8k
+11 - round
1
555 pmull
$h5.1q
, $res3.1d
, $h5.1d @ GHASH block
8k
+3 - low
556 aese
$ctr7b, $rk1 \n aesmc
$ctr7b, $ctr7b @ AES block
8k
+15 - round
1
557 aese
$ctr6b, $rk1 \n aesmc
$ctr6b, $ctr6b @ AES block
8k
+14 - round
1
559 pmull2
$acc_m.1q
, $res0.2d
, $h78k.2d @ GHASH block
8k
- mid
560 eor
$res2.16b
, $res2.16b
, $t3.16b @ GHASH block
8k
+2, 8k
+3 - mid
561 pmull
$h78k.1q
, $res0.1d
, $h78k.1d @ GHASH block
8k
+1 - mid
563 rev64
$res6b, $res6b @ GHASH block
8k
+6 (t0
, t1
, and t2 free
)
564 eor3
$acc_lb, $acc_lb, $h6.16b
, $h5.16b @ GHASH block
8k
+2, 8k
+3 - low
566 pmull2
$t3.1q
, $res2.2d
, $h56k.2d @ GHASH block
8k
+2 - mid
567 eor
$acc_mb, $acc_mb, $h78k.16b @ GHASH block
8k
+1 - mid
568 pmull
$h56k.1q
, $res2.1d
, $h56k.1d @ GHASH block
8k
+3 - mid
570 aese
$ctr5b, $rk2 \n aesmc
$ctr5b, $ctr5b @ AES block
8k
+13 - round
2
571 aese
$ctr4b, $rk2 \n aesmc
$ctr4b, $ctr4b @ AES block
8k
+12 - round
2
572 aese
$ctr2b, $rk2 \n aesmc
$ctr2b, $ctr2b @ AES block
8k
+10 - round
2
574 aese
$ctr1b, $rk2 \n aesmc
$ctr1b, $ctr1b @ AES block
8k
+9 - round
2
575 eor3
$acc_mb, $acc_mb, $h56k.16b
, $t3.16b @ GHASH block
8k
+2, 8k
+3 - mid
576 aese
$ctr6b, $rk2 \n aesmc
$ctr6b, $ctr6b @ AES block
8k
+14 - round
2
578 aese
$ctr0b, $rk2 \n aesmc
$ctr0b, $ctr0b @ AES block
8k
+8 - round
2
579 aese
$ctr3b, $rk2 \n aesmc
$ctr3b, $ctr3b @ AES block
8k
+11 - round
2
580 aese
$ctr7b, $rk2 \n aesmc
$ctr7b, $ctr7b @ AES block
8k
+15 - round
2
582 aese
$ctr6b, $rk3 \n aesmc
$ctr6b, $ctr6b @ AES block
8k
+14 - round
3
583 ldr
$h12kq, [$current_tag, #48] @ load h2k | h1k
584 ldr
$h34kq, [$current_tag, #96] @ load h4k | h3k
585 rev64
$res4b, $res4b @ GHASH block
8k
+4 (t0
, t1
, and t2 free
)
587 ldp
$rk4q, $rk5q, [$cc, #64] @ load rk4, rk5
588 aese
$ctr2b, $rk3 \n aesmc
$ctr2b, $ctr2b @ AES block
8k
+10 - round
3
589 aese
$ctr1b, $rk3 \n aesmc
$ctr1b, $ctr1b @ AES block
8k
+9 - round
3
591 ldr
$h1q, [$current_tag, #32] @ load h1l | h1h
592 ext
$h1.16b
, $h1.16b
, $h1.16b
, #8
593 ldr
$h2q, [$current_tag, #64] @ load h1l | h1h
594 ext
$h2.16b
, $h2.16b
, $h2.16b
, #8
595 pmull2
$t4.1q
, $res4.2d
, $h4.2d @ GHASH block
8k
+4 - high
596 pmull
$h4.1q
, $res4.1d
, $h4.1d @ GHASH block
8k
+4 - low
598 trn1
$t6.2d
, $res5.2d
, $res4.2d @ GHASH block
8k
+4, 8k
+5 - mid
599 trn2
$res4.2d
, $res5.2d
, $res4.2d @ GHASH block
8k
+4, 8k
+5 - mid
601 aese
$ctr0b, $rk3 \n aesmc
$ctr0b, $ctr0b @ AES block
8k
+8 - round
3
602 aese
$ctr3b, $rk3 \n aesmc
$ctr3b, $ctr3b @ AES block
8k
+11 - round
3
604 aese
$ctr7b, $rk3 \n aesmc
$ctr7b, $ctr7b @ AES block
8k
+15 - round
3
605 aese
$ctr4b, $rk3 \n aesmc
$ctr4b, $ctr4b @ AES block
8k
+12 - round
3
607 aese
$ctr5b, $rk3 \n aesmc
$ctr5b, $ctr5b @ AES block
8k
+13 - round
3
608 aese
$ctr0b, $rk4 \n aesmc
$ctr0b, $ctr0b @ AES block
8k
+8 - round
4
610 aese
$ctr7b, $rk4 \n aesmc
$ctr7b, $ctr7b @ AES block
8k
+15 - round
4
611 aese
$ctr3b, $rk4 \n aesmc
$ctr3b, $ctr3b @ AES block
8k
+11 - round
4
612 aese
$ctr4b, $rk4 \n aesmc
$ctr4b, $ctr4b @ AES block
8k
+12 - round
4
614 aese
$ctr5b, $rk4 \n aesmc
$ctr5b, $ctr5b @ AES block
8k
+13 - round
4
615 aese
$ctr6b, $rk4 \n aesmc
$ctr6b, $ctr6b @ AES block
8k
+14 - round
4
616 aese
$ctr1b, $rk4 \n aesmc
$ctr1b, $ctr1b @ AES block
8k
+9 - round
4
618 pmull2
$t5.1q
, $res5.2d
, $h3.2d @ GHASH block
8k
+5 - high
619 eor
$res4.16b
, $res4.16b
, $t6.16b @ GHASH block
8k
+4, 8k
+5 - mid
620 pmull
$h3.1q
, $res5.1d
, $h3.1d @ GHASH block
8k
+5 - low
622 aese
$ctr2b, $rk4 \n aesmc
$ctr2b, $ctr2b @ AES block
8k
+10 - round
4
623 ldp
$rk6q, $rk7q, [$cc, #96] @ load rk6, rk7
624 trn1
$t9.2d
, $res7.2d
, $res6.2d @ GHASH block
8k
+6, 8k
+7 - mid
626 pmull2
$t6.1q
, $res4.2d
, $h34k.2d @ GHASH block
8k
+4 - mid
627 pmull
$h34k.1q
, $res4.1d
, $h34k.1d @ GHASH block
8k
+5 - mid
628 pmull2
$t7.1q
, $res6.2d
, $h2.2d @ GHASH block
8k
+6 - high
630 pmull2
$t8.1q
, $res7.2d
, $h1.2d @ GHASH block
8k
+7 - high
631 aese
$ctr2b, $rk5 \n aesmc
$ctr2b, $ctr2b @ AES block
8k
+10 - round
5
632 aese
$ctr5b, $rk5 \n aesmc
$ctr5b, $ctr5b @ AES block
8k
+13 - round
5
634 pmull
$h2.1q
, $res6.1d
, $h2.1d @ GHASH block
8k
+6 - low
635 eor3
$acc_hb, $acc_hb, $t4.16b
, $t5.16b @ GHASH block
8k
+4, 8k
+5 - high
636 trn2
$res6.2d
, $res7.2d
, $res6.2d @ GHASH block
8k
+6, 8k
+7 - mid
638 eor3
$acc_lb, $acc_lb, $h4.16b
, $h3.16b @ GHASH block
8k
+4, 8k
+5 - low
639 aese
$ctr6b, $rk5 \n aesmc
$ctr6b, $ctr6b @ AES block
8k
+14 - round
5
641 eor
$res6.16b
, $res6.16b
, $t9.16b @ GHASH block
8k
+6, 8k
+7 - mid
642 aese
$ctr7b, $rk5 \n aesmc
$ctr7b, $ctr7b @ AES block
8k
+15 - round
5
643 aese
$ctr1b, $rk5 \n aesmc
$ctr1b, $ctr1b @ AES block
8k
+9 - round
5
645 aese
$ctr3b, $rk5 \n aesmc
$ctr3b, $ctr3b @ AES block
8k
+11 - round
5
646 aese
$ctr4b, $rk5 \n aesmc
$ctr4b, $ctr4b @ AES block
8k
+12 - round
5
647 aese
$ctr0b, $rk5 \n aesmc
$ctr0b, $ctr0b @ AES block
8k
+8 - round
5
649 eor3
$acc_mb, $acc_mb, $h34k.16b
, $t6.16b @ GHASH block
8k
+4, 8k
+5 - mid
650 ldr
$mod_constantd, [$modulo_constant] @ MODULO
- load modulo constant
651 pmull
$h1.1q
, $res7.1d
, $h1.1d @ GHASH block
8k
+7 - low
653 aese
$ctr7b, $rk6 \n aesmc
$ctr7b, $ctr7b @ AES block
8k
+15 - round
6
654 aese
$ctr5b, $rk6 \n aesmc
$ctr5b, $ctr5b @ AES block
8k
+13 - round
6
656 pmull2
$t9.1q
, $res6.2d
, $h12k.2d @ GHASH block
8k
+6 - mid
657 aese
$ctr1b, $rk6 \n aesmc
$ctr1b, $ctr1b @ AES block
8k
+9 - round
6
658 aese
$ctr2b, $rk6 \n aesmc
$ctr2b, $ctr2b @ AES block
8k
+10 - round
6
660 pmull
$h12k.1q
, $res6.1d
, $h12k.1d @ GHASH block
8k
+7 - mid
661 eor3
$acc_lb, $acc_lb, $h2.16b
, $h1.16b @ GHASH block
8k
+6, 8k
+7 - low
662 ldp
$ctr_t0q, $ctr_t1q, [$input_ptr], #32 @ AES block 8k+8, 8k+9 - load plaintext
664 aese
$ctr3b, $rk6 \n aesmc
$ctr3b, $ctr3b @ AES block
8k
+11 - round
6
665 rev32
$h1.16b
, $rtmp_ctr.16b @ CTR block
8k
+16
666 add
$rtmp_ctr.4s
, $rtmp_ctr.4s
, $rctr_inc.4s @ CTR block
8k
+16
668 aese
$ctr4b, $rk6 \n aesmc
$ctr4b, $ctr4b @ AES block
8k
+12 - round
6
669 aese
$ctr0b, $rk6 \n aesmc
$ctr0b, $ctr0b @ AES block
8k
+8 - round
6
670 aese
$ctr6b, $rk6 \n aesmc
$ctr6b, $ctr6b @ AES block
8k
+14 - round
6
672 eor3
$acc_mb, $acc_mb, $h12k.16b
, $t9.16b @ GHASH block
8k
+6, 8k
+7 - mid
673 ldp
$rk8q, $rk9q, [$cc, #128] @ load rk8, rk9
674 eor3
$acc_hb, $acc_hb, $t7.16b
, $t8.16b @ GHASH block
8k
+6, 8k
+7 - high
676 aese
$ctr2b, $rk7 \n aesmc
$ctr2b, $ctr2b @ AES block
8k
+10 - round
7
677 aese
$ctr7b, $rk7 \n aesmc
$ctr7b, $ctr7b @ AES block
8k
+15 - round
7
678 ldp
$ctr_t2q, $ctr_t3q, [$input_ptr], #32 @ AES block 8k+10, 8k+11 - load plaintext
680 aese
$ctr5b, $rk7 \n aesmc
$ctr5b, $ctr5b @ AES block
8k
+13 - round
7
681 aese
$ctr6b, $rk7 \n aesmc
$ctr6b, $ctr6b @ AES block
8k
+14 - round
7
682 aese
$ctr1b, $rk7 \n aesmc
$ctr1b, $ctr1b @ AES block
8k
+9 - round
7
684 pmull
$t11.1q
, $acc_h.1d
, $mod_constant.1d @ MODULO
- top
64b align with mid
685 aese
$ctr0b, $rk7 \n aesmc
$ctr0b, $ctr0b @ AES block
8k
+8 - round
7
686 aese
$ctr4b, $rk7 \n aesmc
$ctr4b, $ctr4b @ AES block
8k
+12 - round
7
688 rev32
$h2.16b
, $rtmp_ctr.16b @ CTR block
8k
+17
689 aese
$ctr3b, $rk7 \n aesmc
$ctr3b, $ctr3b @ AES block
8k
+11 - round
7
691 aese
$ctr5b, $rk8 \n aesmc
$ctr5b, $ctr5b @ AES block
8k
+13 - round
8
692 ldp
$ctr_t4q, $ctr_t5q, [$input_ptr], #32 @ AES block 8k+12, 8k+13 - load plaintext
693 add
$rtmp_ctr.4s
, $rtmp_ctr.4s
, $rctr_inc.4s @ CTR block
8k
+17
695 aese
$ctr2b, $rk8 \n aesmc
$ctr2b, $ctr2b @ AES block
8k
+10 - round
8
696 aese
$ctr1b, $rk8 \n aesmc
$ctr1b, $ctr1b @ AES block
8k
+9 - round
8
697 aese
$ctr7b, $rk8 \n aesmc
$ctr7b, $ctr7b @ AES block
8k
+15 - round
8
699 aese
$ctr4b, $rk8 \n aesmc
$ctr4b, $ctr4b @ AES block
8k
+12 - round
8
700 eor3
$acc_mb, $acc_mb, $acc_hb, $acc_lb @ MODULO
- karatsuba tidy up
701 ldr
$rk10q, [$cc, #160] @ load rk10
703 ext
$t12.16b
, $acc_hb, $acc_hb, #8 @ MODULO - other top alignment
704 rev32
$h3.16b
, $rtmp_ctr.16b @ CTR block
8k
+18
705 add
$rtmp_ctr.4s
, $rtmp_ctr.4s
, $rctr_inc.4s @ CTR block
8k
+18
706 aese
$ctr3b, $rk8 \n aesmc
$ctr3b, $ctr3b @ AES block
8k
+11 - round
8
708 aese
$ctr0b, $rk8 \n aesmc
$ctr0b, $ctr0b @ AES block
8k
+8 - round
8
709 eor3
$acc_mb, $acc_mb, $t12.16b
, $t11.16b @ MODULO
- fold into mid
710 aese
$ctr6b, $rk8 \n aesmc
$ctr6b, $ctr6b @ AES block
8k
+14 - round
8
712 aese
$ctr2b, $rk9 @ AES block
8k
+10 - round
9
713 aese
$ctr4b, $rk9 @ AES block
8k
+12 - round
9
714 aese
$ctr1b, $rk9 @ AES block
8k
+9 - round
9
716 ldp
$ctr_t6q, $ctr_t7q, [$input_ptr], #32 @ AES block 8k+14, 8k+15 - load plaintext
717 rev32
$h4.16b
, $rtmp_ctr.16b @ CTR block
8k
+19
718 add
$rtmp_ctr.4s
, $rtmp_ctr.4s
, $rctr_inc.4s @ CTR block
8k
+19
720 cmp $input_ptr, $main_end_input_ptr @ LOOP CONTROL
721 eor3
$res4b, $ctr_t4b, $ctr4b, $rk10 @ AES block
4 - result
722 aese
$ctr7b, $rk9 @ AES block
8k
+15 - round
9
724 aese
$ctr6b, $rk9 @ AES block
8k
+14 - round
9
725 aese
$ctr3b, $rk9 @ AES block
8k
+11 - round
9
727 eor3
$res2b, $ctr_t2b, $ctr2b, $rk10 @ AES block
8k
+10 - result
729 mov
$ctr2.16b
, $h3.16b @ CTR block
8k
+18
730 aese
$ctr0b, $rk9 @ AES block
8k
+8 - round
9
732 rev32
$ctr4.16b
, $rtmp_ctr.16b @ CTR block
8k
+20
733 add
$rtmp_ctr.4s
, $rtmp_ctr.4s
, $rctr_inc.4s @ CTR block
8k
+20
735 eor3
$res7b, $ctr_t7b, $ctr7b, $rk10 @ AES block
7 - result
736 aese
$ctr5b, $rk9 @ AES block
8k
+13 - round
9
737 pmull
$acc_h.1q
, $acc_m.1d
, $mod_constant.1d @ MODULO
- mid
64b align with low
739 eor3
$res1b, $ctr_t1b, $ctr1b, $rk10 @ AES block
8k
+9 - result
740 eor3
$res3b, $ctr_t3b, $ctr3b, $rk10 @ AES block
8k
+11 - result
741 mov
$ctr3.16b
, $h4.16b @ CTR block
8k
+19
743 ext
$t11.16b
, $acc_mb, $acc_mb, #8 @ MODULO - other mid alignment
744 eor3
$res5b, $ctr_t5b, $ctr5b, $rk10 @ AES block
5 - result
745 mov
$ctr1.16b
, $h2.16b @ CTR block
8k
+17
747 eor3
$res0b, $ctr_t0b, $ctr0b, $rk10 @ AES block
8k
+8 - result
748 mov
$ctr0.16b
, $h1.16b @ CTR block
8k
+16
749 stp
$res0q, $res1q, [$output_ptr], #32 @ AES block 8k+8, 8k+9 - store result
751 stp
$res2q, $res3q, [$output_ptr], #32 @ AES block 8k+10, 8k+11 - store result
752 eor3
$res6b, $ctr_t6b, $ctr6b, $rk10 @ AES block
6 - result
754 stp
$res4q, $res5q, [$output_ptr], #32 @ AES block 8k+12, 8k+13 - store result
755 eor3
$acc_lb, $acc_lb, $acc_hb, $t11.16b @ MODULO
- fold into low
757 stp
$res6q, $res7q, [$output_ptr], #32 @ AES block 8k+14, 8k+15 - store result
758 b
.lt .L128_enc_main_loop
760 .L128_enc_prepretail
: @ PREPRETAIL
761 rev32
$ctr5.16b
, $rtmp_ctr.16b @ CTR block
8k
+13
762 ldr
$h7q, [$current_tag, #176] @ load h7l | h7h
763 ext
$h7.16b
, $h7.16b
, $h7.16b
, #8
764 ldr
$h8q, [$current_tag, #208] @ load h8l | h8h
765 ext
$h8.16b
, $h8.16b
, $h8.16b
, #8
766 ext
$acc_lb, $acc_lb, $acc_lb, #8 @ PRE 0
768 ldr
$h5q, [$current_tag, #128] @ load h5l | h5h
769 ext
$h5.16b
, $h5.16b
, $h5.16b
, #8
770 ldr
$h6q, [$current_tag, #160] @ load h6l | h6h
771 ext
$h6.16b
, $h6.16b
, $h6.16b
, #8
772 rev64
$res0b, $res0b @ GHASH block
8k
773 rev64
$res1b, $res1b @ GHASH block
8k
+1
775 ldr
$h56kq, [$current_tag, #144] @ load h6k | h5k
776 ldr
$h78kq, [$current_tag, #192] @ load h6k | h5k
777 add
$rtmp_ctr.4s
, $rtmp_ctr.4s
, $rctr_inc.4s @ CTR block
8k
+13
778 rev64
$res3b, $res3b @ GHASH block
8k
+3
780 rev64
$res2b, $res2b @ GHASH block
8k
+2
781 eor
$res0b, $res0b, $acc_lb @ PRE
1
783 rev32
$ctr6.16b
, $rtmp_ctr.16b @ CTR block
8k
+14
785 pmull2
$t0.1q
, $res1.2d
, $h7.2d @ GHASH block
8k
+1 - high
786 pmull
$acc_l.1q
, $res0.1d
, $h8.1d @ GHASH block
8k
- low
787 pmull2
$acc_h.1q
, $res0.2d
, $h8.2d @ GHASH block
8k
- high
789 rev64
$res5b, $res5b @ GHASH block
8k
+5 (t0
, t1
, t2
and t3 free
)
790 trn1
$acc_m.2d
, $res1.2d
, $res0.2d @ GHASH block
8k
, 8k
+1 - mid
792 pmull
$h7.1q
, $res1.1d
, $h7.1d @ GHASH block
8k
+1 - low
793 eor
$acc_hb, $acc_hb, $t0.16b @ GHASH block
8k
+1 - high
794 trn2
$res0.2d
, $res1.2d
, $res0.2d @ GHASH block
8k
, 8k
+1 - mid
796 eor
$acc_lb, $acc_lb, $h7.16b @ GHASH block
8k
+1 - low
797 eor
$res0.16b
, $res0.16b
, $acc_m.16b @ GHASH block
8k
, 8k
+1 - mid
799 ldp
$rk0q, $rk1q, [$cc, #0] @ load rk0, rk1
800 add
$rtmp_ctr.4s
, $rtmp_ctr.4s
, $rctr_inc.4s @ CTR block
8k
+14
802 pmull2
$acc_m.1q
, $res0.2d
, $h78k.2d @ GHASH block
8k
- mid
803 pmull
$h78k.1q
, $res0.1d
, $h78k.1d @ GHASH block
8k
+1 - mid
805 rev64
$res4b, $res4b @ GHASH block
8k
+4 (t0
, t1
, and t2 free
)
806 rev64
$res7b, $res7b @ GHASH block
8k
+7 (t0
, t1
, t2
and t3 free
)
808 eor
$acc_mb, $acc_mb, $h78k.16b @ GHASH block
8k
+1 - mid
810 rev32
$ctr7.16b
, $rtmp_ctr.16b @ CTR block
8k
+15
812 rev64
$res6b, $res6b @ GHASH block
8k
+6 (t0
, t1
, and t2 free
)
814 aese
$ctr2b, $rk0 \n aesmc
$ctr2b, $ctr2b @ AES block
8k
+10 - round
0
816 pmull2
$t2.1q
, $res3.2d
, $h5.2d @ GHASH block
8k
+3 - high
817 pmull2
$t1.1q
, $res2.2d
, $h6.2d @ GHASH block
8k
+2 - high
819 aese
$ctr6b, $rk0 \n aesmc
$ctr6b, $ctr6b @ AES block
8k
+14 - round
0
820 aese
$ctr3b, $rk0 \n aesmc
$ctr3b, $ctr3b @ AES block
8k
+11 - round
0
822 pmull
$h6.1q
, $res2.1d
, $h6.1d @ GHASH block
8k
+2 - low
823 aese
$ctr1b, $rk0 \n aesmc
$ctr1b, $ctr1b @ AES block
8k
+9 - round
0
825 eor3
$acc_hb, $acc_hb, $t1.16b
, $t2.16b @ GHASH block
8k
+2, 8k
+3 - high
826 trn1
$t3.2d
, $res3.2d
, $res2.2d @ GHASH block
8k
+2, 8k
+3 - mid
827 trn2
$res2.2d
, $res3.2d
, $res2.2d @ GHASH block
8k
+2, 8k
+3 - mid
829 aese
$ctr5b, $rk0 \n aesmc
$ctr5b, $ctr5b @ AES block
8k
+13 - round
0
830 aese
$ctr7b, $rk0 \n aesmc
$ctr7b, $ctr7b @ AES block
8k
+15 - round
0
832 eor
$res2.16b
, $res2.16b
, $t3.16b @ GHASH block
8k
+2, 8k
+3 - mid
833 aese
$ctr4b, $rk0 \n aesmc
$ctr4b, $ctr4b @ AES block
8k
+12 - round
0
834 aese
$ctr0b, $rk0 \n aesmc
$ctr0b, $ctr0b @ AES block
8k
+8 - round
0
836 aese
$ctr3b, $rk1 \n aesmc
$ctr3b, $ctr3b @ AES block
8k
+11 - round
1
837 pmull
$h5.1q
, $res3.1d
, $h5.1d @ GHASH block
8k
+3 - low
839 ldr
$h3q, [$current_tag, #80] @ load h3l | h3h
840 ext
$h3.16b
, $h3.16b
, $h3.16b
, #8
841 ldr
$h4q, [$current_tag, #112] @ load h4l | h4h
842 ext
$h4.16b
, $h4.16b
, $h4.16b
, #8
844 ldp
$rk2q, $rk3q, [$cc, #32] @ load rk2, rk3
845 aese
$ctr5b, $rk1 \n aesmc
$ctr5b, $ctr5b @ AES block
8k
+13 - round
1
846 pmull2
$t3.1q
, $res2.2d
, $h56k.2d @ GHASH block
8k
+2 - mid
848 eor3
$acc_lb, $acc_lb, $h6.16b
, $h5.16b @ GHASH block
8k
+2, 8k
+3 - low
849 pmull
$h56k.1q
, $res2.1d
, $h56k.1d @ GHASH block
8k
+3 - mid
851 aese
$ctr1b, $rk1 \n aesmc
$ctr1b, $ctr1b @ AES block
8k
+9 - round
1
852 aese
$ctr0b, $rk1 \n aesmc
$ctr0b, $ctr0b @ AES block
8k
+8 - round
1
854 eor3
$acc_mb, $acc_mb, $h56k.16b
, $t3.16b @ GHASH block
8k
+2, 8k
+3 - mid
855 ldr
$h12kq, [$current_tag, #48] @ load h2k | h1k
856 ldr
$h34kq, [$current_tag, #96] @ load h4k | h3k
857 aese
$ctr2b, $rk1 \n aesmc
$ctr2b, $ctr2b @ AES block
8k
+10 - round
1
859 aese
$ctr4b, $rk1 \n aesmc
$ctr4b, $ctr4b @ AES block
8k
+12 - round
1
860 aese
$ctr7b, $rk1 \n aesmc
$ctr7b, $ctr7b @ AES block
8k
+15 - round
1
862 aese
$ctr5b, $rk2 \n aesmc
$ctr5b, $ctr5b @ AES block
8k
+13 - round
2
863 aese
$ctr2b, $rk2 \n aesmc
$ctr2b, $ctr2b @ AES block
8k
+10 - round
2
864 aese
$ctr3b, $rk2 \n aesmc
$ctr3b, $ctr3b @ AES block
8k
+11 - round
2
866 aese
$ctr1b, $rk2 \n aesmc
$ctr1b, $ctr1b @ AES block
8k
+9 - round
2
867 aese
$ctr6b, $rk1 \n aesmc
$ctr6b, $ctr6b @ AES block
8k
+14 - round
1
868 aese
$ctr4b, $rk2 \n aesmc
$ctr4b, $ctr4b @ AES block
8k
+12 - round
2
870 aese
$ctr5b, $rk3 \n aesmc
$ctr5b, $ctr5b @ AES block
8k
+13 - round
3
871 aese
$ctr0b, $rk2 \n aesmc
$ctr0b, $ctr0b @ AES block
8k
+8 - round
2
873 aese
$ctr6b, $rk2 \n aesmc
$ctr6b, $ctr6b @ AES block
8k
+14 - round
2
874 aese
$ctr7b, $rk2 \n aesmc
$ctr7b, $ctr7b @ AES block
8k
+15 - round
2
875 ldp
$rk4q, $rk5q, [$cc, #64] @ load rk4, rk5
877 ldr
$h1q, [$current_tag, #32] @ load h1l | h1h
878 ext
$h1.16b
, $h1.16b
, $h1.16b
, #8
879 ldr
$h2q, [$current_tag, #64] @ load h1l | h1h
880 ext
$h2.16b
, $h2.16b
, $h2.16b
, #8
881 trn1
$t6.2d
, $res5.2d
, $res4.2d @ GHASH block
8k
+4, 8k
+5 - mid
882 aese
$ctr0b, $rk3 \n aesmc
$ctr0b, $ctr0b @ AES block
8k
+8 - round
3
884 pmull2
$t4.1q
, $res4.2d
, $h4.2d @ GHASH block
8k
+4 - high
885 aese
$ctr6b, $rk3 \n aesmc
$ctr6b, $ctr6b @ AES block
8k
+14 - round
3
886 aese
$ctr3b, $rk3 \n aesmc
$ctr3b, $ctr3b @ AES block
8k
+11 - round
3
888 pmull
$h4.1q
, $res4.1d
, $h4.1d @ GHASH block
8k
+4 - low
889 trn2
$res4.2d
, $res5.2d
, $res4.2d @ GHASH block
8k
+4, 8k
+5 - mid
890 pmull2
$t5.1q
, $res5.2d
, $h3.2d @ GHASH block
8k
+5 - high
892 aese
$ctr2b, $rk3 \n aesmc
$ctr2b, $ctr2b @ AES block
8k
+10 - round
3
893 add
$rtmp_ctr.4s
, $rtmp_ctr.4s
, $rctr_inc.4s @ CTR block
8k
+15
895 aese
$ctr7b, $rk3 \n aesmc
$ctr7b, $ctr7b @ AES block
8k
+15 - round
3
896 aese
$ctr1b, $rk3 \n aesmc
$ctr1b, $ctr1b @ AES block
8k
+9 - round
3
897 eor
$res4.16b
, $res4.16b
, $t6.16b @ GHASH block
8k
+4, 8k
+5 - mid
899 pmull
$h3.1q
, $res5.1d
, $h3.1d @ GHASH block
8k
+5 - low
900 aese
$ctr4b, $rk3 \n aesmc
$ctr4b, $ctr4b @ AES block
8k
+12 - round
3
901 pmull2
$t7.1q
, $res6.2d
, $h2.2d @ GHASH block
8k
+6 - high
903 trn1
$t9.2d
, $res7.2d
, $res6.2d @ GHASH block
8k
+6, 8k
+7 - mid
904 pmull
$h2.1q
, $res6.1d
, $h2.1d @ GHASH block
8k
+6 - low
905 trn2
$res6.2d
, $res7.2d
, $res6.2d @ GHASH block
8k
+6, 8k
+7 - mid
907 aese
$ctr1b, $rk4 \n aesmc
$ctr1b, $ctr1b @ AES block
8k
+9 - round
4
908 aese
$ctr3b, $rk4 \n aesmc
$ctr3b, $ctr3b @ AES block
8k
+11 - round
4
909 eor3
$acc_hb, $acc_hb, $t4.16b
, $t5.16b @ GHASH block
8k
+4, 8k
+5 - high
911 eor3
$acc_lb, $acc_lb, $h4.16b
, $h3.16b @ GHASH block
8k
+4, 8k
+5 - low
912 eor
$res6.16b
, $res6.16b
, $t9.16b @ GHASH block
8k
+6, 8k
+7 - mid
913 pmull2
$t6.1q
, $res4.2d
, $h34k.2d @ GHASH block
8k
+4 - mid
915 aese
$ctr1b, $rk5 \n aesmc
$ctr1b, $ctr1b @ AES block
8k
+9 - round
5
916 aese
$ctr6b, $rk4 \n aesmc
$ctr6b, $ctr6b @ AES block
8k
+14 - round
4
917 aese
$ctr0b, $rk4 \n aesmc
$ctr0b, $ctr0b @ AES block
8k
+8 - round
4
919 aese
$ctr7b, $rk4 \n aesmc
$ctr7b, $ctr7b @ AES block
8k
+15 - round
4
920 aese
$ctr2b, $rk4 \n aesmc
$ctr2b, $ctr2b @ AES block
8k
+10 - round
4
922 pmull
$h34k.1q
, $res4.1d
, $h34k.1d @ GHASH block
8k
+5 - mid
923 aese
$ctr4b, $rk4 \n aesmc
$ctr4b, $ctr4b @ AES block
8k
+12 - round
4
924 aese
$ctr5b, $rk4 \n aesmc
$ctr5b, $ctr5b @ AES block
8k
+13 - round
4
926 pmull2
$t8.1q
, $res7.2d
, $h1.2d @ GHASH block
8k
+7 - high
927 ldp
$rk6q, $rk7q, [$cc, #96] @ load rk6, rk7
928 pmull
$h1.1q
, $res7.1d
, $h1.1d @ GHASH block
8k
+7 - low
930 eor3
$acc_mb, $acc_mb, $h34k.16b
, $t6.16b @ GHASH block
8k
+4, 8k
+5 - mid
931 pmull2
$t9.1q
, $res6.2d
, $h12k.2d @ GHASH block
8k
+6 - mid
932 pmull
$h12k.1q
, $res6.1d
, $h12k.1d @ GHASH block
8k
+7 - mid
934 aese
$ctr0b, $rk5 \n aesmc
$ctr0b, $ctr0b @ AES block
8k
+8 - round
5
935 aese
$ctr7b, $rk5 \n aesmc
$ctr7b, $ctr7b @ AES block
8k
+15 - round
5
936 ldr
$mod_constantd, [$modulo_constant] @ MODULO
- load modulo constant
938 aese
$ctr2b, $rk5 \n aesmc
$ctr2b, $ctr2b @ AES block
8k
+10 - round
5
939 aese
$ctr4b, $rk5 \n aesmc
$ctr4b, $ctr4b @ AES block
8k
+12 - round
5
941 eor3
$acc_hb, $acc_hb, $t7.16b
, $t8.16b @ GHASH block
8k
+6, 8k
+7 - high
942 aese
$ctr5b, $rk5 \n aesmc
$ctr5b, $ctr5b @ AES block
8k
+13 - round
5
943 aese
$ctr6b, $rk5 \n aesmc
$ctr6b, $ctr6b @ AES block
8k
+14 - round
5
945 aese
$ctr3b, $rk5 \n aesmc
$ctr3b, $ctr3b @ AES block
8k
+11 - round
5
946 aese
$ctr4b, $rk6 \n aesmc
$ctr4b, $ctr4b @ AES block
8k
+12 - round
6
948 aese
$ctr5b, $rk6 \n aesmc
$ctr5b, $ctr5b @ AES block
8k
+13 - round
6
949 aese
$ctr2b, $rk6 \n aesmc
$ctr2b, $ctr2b @ AES block
8k
+10 - round
6
950 aese
$ctr0b, $rk6 \n aesmc
$ctr0b, $ctr0b @ AES block
8k
+8 - round
6
952 aese
$ctr3b, $rk6 \n aesmc
$ctr3b, $ctr3b @ AES block
8k
+11 - round
6
953 eor3
$acc_lb, $acc_lb, $h2.16b
, $h1.16b @ GHASH block
8k
+6, 8k
+7 - low
954 eor3
$acc_mb, $acc_mb, $h12k.16b
, $t9.16b @ GHASH block
8k
+6, 8k
+7 - mid
956 aese
$ctr6b, $rk6 \n aesmc
$ctr6b, $ctr6b @ AES block
8k
+14 - round
6
957 aese
$ctr1b, $rk6 \n aesmc
$ctr1b, $ctr1b @ AES block
8k
+9 - round
6
958 aese
$ctr7b, $rk6 \n aesmc
$ctr7b, $ctr7b @ AES block
8k
+15 - round
6
960 pmull
$t11.1q
, $acc_h.1d
, $mod_constant.1d @ MODULO
- top
64b align with mid
961 eor3
$acc_mb, $acc_mb, $acc_hb, $acc_lb @ MODULO
- karatsuba tidy up
962 ldp
$rk8q, $rk9q, [$cc, #128] @ load rk8, rk9
964 aese
$ctr3b, $rk7 \n aesmc
$ctr3b, $ctr3b @ AES block
8k
+11 - round
7
965 aese
$ctr6b, $rk7 \n aesmc
$ctr6b, $ctr6b @ AES block
8k
+14 - round
7
966 aese
$ctr1b, $rk7 \n aesmc
$ctr1b, $ctr1b @ AES block
8k
+9 - round
7
967 ext
$t12.16b
, $acc_hb, $acc_hb, #8 @ MODULO - other top alignment
969 aese
$ctr5b, $rk7 \n aesmc
$ctr5b, $ctr5b @ AES block
8k
+13 - round
7
970 aese
$ctr0b, $rk7 \n aesmc
$ctr0b, $ctr0b @ AES block
8k
+8 - round
7
971 eor3
$acc_mb, $acc_mb, $t12.16b
, $t11.16b @ MODULO
- fold into mid
973 aese
$ctr2b, $rk7 \n aesmc
$ctr2b, $ctr2b @ AES block
8k
+10 - round
7
974 aese
$ctr7b, $rk7 \n aesmc
$ctr7b, $ctr7b @ AES block
8k
+15 - round
7
976 pmull
$acc_h.1q
, $acc_m.1d
, $mod_constant.1d @ MODULO
- mid
64b align with low
977 aese
$ctr4b, $rk7 \n aesmc
$ctr4b, $ctr4b @ AES block
8k
+12 - round
7
979 aese
$ctr7b, $rk8 \n aesmc
$ctr7b, $ctr7b @ AES block
8k
+15 - round
8
980 aese
$ctr2b, $rk8 \n aesmc
$ctr2b, $ctr2b @ AES block
8k
+10 - round
8
981 aese
$ctr1b, $rk8 \n aesmc
$ctr1b, $ctr1b @ AES block
8k
+9 - round
8
982 ext
$acc_mb, $acc_mb, $acc_mb, #8 @ MODULO - other mid alignment
984 aese
$ctr6b, $rk8 \n aesmc
$ctr6b, $ctr6b @ AES block
8k
+14 - round
8
985 eor3
$acc_lb, $acc_lb, $acc_hb, $acc_mb @ MODULO
- fold into low
986 aese
$ctr4b, $rk8 \n aesmc
$ctr4b, $ctr4b @ AES block
8k
+12 - round
8
988 aese
$ctr3b, $rk8 \n aesmc
$ctr3b, $ctr3b @ AES block
8k
+11 - round
8
989 aese
$ctr0b, $rk8 \n aesmc
$ctr0b, $ctr0b @ AES block
8k
+8 - round
8
990 aese
$ctr5b, $rk8 \n aesmc
$ctr5b, $ctr5b @ AES block
8k
+13 - round
8
992 ldr
$rk10q, [$cc, #160] @ load rk10
993 aese
$ctr6b, $rk9 @ AES block
8k
+14 - round
9
994 aese
$ctr2b, $rk9 @ AES block
8k
+10 - round
9
996 aese
$ctr0b, $rk9 @ AES block
8k
+8 - round
9
997 aese
$ctr1b, $rk9 @ AES block
8k
+9 - round
9
999 aese
$ctr3b, $rk9 @ AES block
8k
+11 - round
9
1000 aese
$ctr5b, $rk9 @ AES block
8k
+13 - round
9
1002 aese
$ctr4b, $rk9 @ AES block
8k
+12 - round
9
1003 aese
$ctr7b, $rk9 @ AES block
8k
+15 - round
9
1004 .L128_enc_tail
: @ TAIL
1006 sub $main_end_input_ptr, $end_input_ptr, $input_ptr @ main_end_input_ptr is number of bytes left to process
1007 ldr
$ctr_t0q, [$input_ptr], #16 @ AES block 8k+8 - load plaintext
1010 ldp
$h5q, $h56kq, [$current_tag, #128] @ load h5l | h5h
1011 ext
$h5.16b
, $h5.16b
, $h5.16b
, #8
1013 eor3
$res1b, $ctr_t0b, $ctr0b, $t1.16b @ AES block
8k
+8 - result
1014 ext
$t0.16b
, $acc_lb, $acc_lb, #8 @ prepare final partial tag
1015 ldp
$h6q, $h7q, [$current_tag, #160] @ load h6k | h5k
1016 ext
$h6.16b
, $h6.16b
, $h6.16b
, #8
1017 ext
$h7.16b
, $h7.16b
, $h7.16b
, #8
1019 ldp
$h78kq, $h8q, [$current_tag, #192] @ load h7l | h7h
1020 ext
$h8.16b
, $h8.16b
, $h8.16b
, #8
1021 cmp $main_end_input_ptr, #112
1022 b
.gt .L128_enc_blocks_more_than_7
1028 cmp $main_end_input_ptr, #96
1029 sub $rtmp_ctr.4s
, $rtmp_ctr.4s
, $rctr_inc.4s
1038 b
.gt .L128_enc_blocks_more_than_6
1041 cmp $main_end_input_ptr, #80
1043 sub $rtmp_ctr.4s
, $rtmp_ctr.4s
, $rctr_inc.4s
1049 b
.gt .L128_enc_blocks_more_than_5
1051 cmp $main_end_input_ptr, #64
1052 sub $rtmp_ctr.4s
, $rtmp_ctr.4s
, $rctr_inc.4s
1059 b
.gt .L128_enc_blocks_more_than_4
1062 sub $rtmp_ctr.4s
, $rtmp_ctr.4s
, $rctr_inc.4s
1066 cmp $main_end_input_ptr, #48
1067 b
.gt .L128_enc_blocks_more_than_3
1069 sub $rtmp_ctr.4s
, $rtmp_ctr.4s
, $rctr_inc.4s
1073 cmp $main_end_input_ptr, #32
1074 ldr
$h34kq, [$current_tag, #96] @ load h4k | h3k
1075 b
.gt .L128_enc_blocks_more_than_2
1077 cmp $main_end_input_ptr, #16
1079 sub $rtmp_ctr.4s
, $rtmp_ctr.4s
, $rctr_inc.4s
1081 b
.gt .L128_enc_blocks_more_than_1
1083 ldr
$h12kq, [$current_tag, #48] @ load h2k | h1k
1084 sub $rtmp_ctr.4s
, $rtmp_ctr.4s
, $rctr_inc.4s
1085 b
.L128_enc_blocks_less_than_1
1086 .L128_enc_blocks_more_than_7
: @ blocks left
> 7
1087 st1
{ $res1b}, [$output_ptr], #16 @ AES final-7 block - store result
1089 rev64
$res0b, $res1b @ GHASH final
-7 block
1090 ldr
$ctr_t1q, [$input_ptr], #16 @ AES final-6 block - load plaintext
1092 eor
$res0b, $res0b, $t0.16b @ feed
in partial tag
1094 ins
$rk4v.d
[0], $res0.d
[1] @ GHASH final
-7 block
- mid
1096 pmull2
$acc_h.1q
, $res0.2d
, $h8.2d @ GHASH final
-7 block
- high
1098 ins
$acc_m.d
[0], $h78k.d
[1] @ GHASH final
-7 block
- mid
1100 eor
$rk4v.8b
, $rk4v.8b
, $res0.8b @ GHASH final
-7 block
- mid
1101 movi
$t0.8b
, #0 @ supress further partial tag feed in
1103 eor3
$res1b, $ctr_t1b, $ctr1b, $t1.16b @ AES final
-6 block
- result
1105 pmull
$acc_m.1q
, $rk4v.1d
, $acc_m.1d @ GHASH final
-7 block
- mid
1106 pmull
$acc_l.1q
, $res0.1d
, $h8.1d @ GHASH final
-7 block
- low
1107 .L128_enc_blocks_more_than_6
: @ blocks left
> 6
1109 st1
{ $res1b}, [$output_ptr], #16 @ AES final-6 block - store result
1111 rev64
$res0b, $res1b @ GHASH final
-6 block
1112 ldr
$ctr_t1q, [$input_ptr], #16 @ AES final-5 block - load plaintext
1114 eor
$res0b, $res0b, $t0.16b @ feed
in partial tag
1116 ins
$rk4v.d
[0], $res0.d
[1] @ GHASH final
-6 block
- mid
1118 eor3
$res1b, $ctr_t1b, $ctr2b, $t1.16b @ AES final
-5 block
- result
1119 pmull
$rk3q1, $res0.1d
, $h7.1d @ GHASH final
-6 block
- low
1121 eor
$rk4v.8b
, $rk4v.8b
, $res0.8b @ GHASH final
-6 block
- mid
1122 movi
$t0.8b
, #0 @ supress further partial tag feed in
1124 pmull
$rk4v.1q
, $rk4v.1d
, $h78k.1d @ GHASH final
-6 block
- mid
1125 pmull2
$rk2q1, $res0.2d
, $h7.2d @ GHASH final
-6 block
- high
1127 eor
$acc_lb, $acc_lb, $rk3 @ GHASH final
-6 block
- low
1129 eor
$acc_mb, $acc_mb, $rk4v.16b @ GHASH final
-6 block
- mid
1130 eor
$acc_hb, $acc_hb, $rk2 @ GHASH final
-6 block
- high
1131 .L128_enc_blocks_more_than_5
: @ blocks left
> 5
1133 st1
{ $res1b}, [$output_ptr], #16 @ AES final-5 block - store result
1135 rev64
$res0b, $res1b @ GHASH final
-5 block
1137 eor
$res0b, $res0b, $t0.16b @ feed
in partial tag
1139 ins
$rk4v.d
[0], $res0.d
[1] @ GHASH final
-5 block
- mid
1140 ldr
$ctr_t1q, [$input_ptr], #16 @ AES final-4 block - load plaintext
1141 pmull2
$rk2q1, $res0.2d
, $h6.2d @ GHASH final
-5 block
- high
1143 eor
$acc_hb, $acc_hb, $rk2 @ GHASH final
-5 block
- high
1145 eor
$rk4v.8b
, $rk4v.8b
, $res0.8b @ GHASH final
-5 block
- mid
1147 ins
$rk4v.d
[1], $rk4v.d
[0] @ GHASH final
-5 block
- mid
1149 eor3
$res1b, $ctr_t1b, $ctr3b, $t1.16b @ AES final
-4 block
- result
1150 pmull
$rk3q1, $res0.1d
, $h6.1d @ GHASH final
-5 block
- low
1151 movi
$t0.8b
, #0 @ supress further partial tag feed in
1153 pmull2
$rk4v.1q
, $rk4v.2d
, $h56k.2d @ GHASH final
-5 block
- mid
1154 eor
$acc_lb, $acc_lb, $rk3 @ GHASH final
-5 block
- low
1156 eor
$acc_mb, $acc_mb, $rk4v.16b @ GHASH final
-5 block
- mid
1157 .L128_enc_blocks_more_than_4
: @ blocks left
> 4
1159 st1
{ $res1b}, [$output_ptr], #16 @ AES final-4 block - store result
1161 rev64
$res0b, $res1b @ GHASH final
-4 block
1163 ldr
$ctr_t1q, [$input_ptr], #16 @ AES final-3 block - load plaintext
1165 eor
$res0b, $res0b, $t0.16b @ feed
in partial tag
1167 ins
$rk4v.d
[0], $res0.d
[1] @ GHASH final
-4 block
- mid
1168 movi
$t0.8b
, #0 @ supress further partial tag feed in
1169 pmull2
$rk2q1, $res0.2d
, $h5.2d @ GHASH final
-4 block
- high
1171 eor
$rk4v.8b
, $rk4v.8b
, $res0.8b @ GHASH final
-4 block
- mid
1173 pmull
$rk3q1, $res0.1d
, $h5.1d @ GHASH final
-4 block
- low
1175 eor
$acc_hb, $acc_hb, $rk2 @ GHASH final
-4 block
- high
1176 pmull
$rk4v.1q
, $rk4v.1d
, $h56k.1d @ GHASH final
-4 block
- mid
1178 eor
$acc_lb, $acc_lb, $rk3 @ GHASH final
-4 block
- low
1180 eor3
$res1b, $ctr_t1b, $ctr4b, $t1.16b @ AES final
-3 block
- result
1181 eor
$acc_mb, $acc_mb, $rk4v.16b @ GHASH final
-4 block
- mid
1182 .L128_enc_blocks_more_than_3
: @ blocks left
> 3
1184 st1
{ $res1b}, [$output_ptr], #16 @ AES final-3 block - store result
1186 ldr
$h4q, [$current_tag, #112] @ load h4l | h4h
1187 ext
$h4.16b
, $h4.16b
, $h4.16b
, #8
1189 rev64
$res0b, $res1b @ GHASH final
-3 block
1191 eor
$res0b, $res0b, $t0.16b @ feed
in partial tag
1192 movi
$t0.8b
, #0 @ supress further partial tag feed in
1194 ins
$rk4v.d
[0], $res0.d
[1] @ GHASH final
-3 block
- mid
1195 ldr
$h34kq, [$current_tag, #96] @ load h4k | h3k
1196 pmull
$rk3q1, $res0.1d
, $h4.1d @ GHASH final
-3 block
- low
1198 ldr
$ctr_t1q, [$input_ptr], #16 @ AES final-2 block - load plaintext
1200 eor
$rk4v.8b
, $rk4v.8b
, $res0.8b @ GHASH final
-3 block
- mid
1202 ins
$rk4v.d
[1], $rk4v.d
[0] @ GHASH final
-3 block
- mid
1203 eor
$acc_lb, $acc_lb, $rk3 @ GHASH final
-3 block
- low
1205 eor3
$res1b, $ctr_t1b, $ctr5b, $t1.16b @ AES final
-2 block
- result
1207 pmull2
$rk4v.1q
, $rk4v.2d
, $h34k.2d @ GHASH final
-3 block
- mid
1208 pmull2
$rk2q1, $res0.2d
, $h4.2d @ GHASH final
-3 block
- high
1210 eor
$acc_mb, $acc_mb, $rk4v.16b @ GHASH final
-3 block
- mid
1211 eor
$acc_hb, $acc_hb, $rk2 @ GHASH final
-3 block
- high
1212 .L128_enc_blocks_more_than_2
: @ blocks left
> 2
1214 st1
{ $res1b}, [$output_ptr], #16 @ AES final-2 block - store result
1216 rev64
$res0b, $res1b @ GHASH final
-2 block
1218 eor
$res0b, $res0b, $t0.16b @ feed
in partial tag
1220 ldr
$ctr_t1q, [$input_ptr], #16 @ AES final-1 block - load plaintext
1222 ins
$rk4v.d
[0], $res0.d
[1] @ GHASH final
-2 block
- mid
1223 ldr
$h3q, [$current_tag, #80] @ load h3l | h3h
1224 ext
$h3.16b
, $h3.16b
, $h3.16b
, #8
1225 movi
$t0.8b
, #0 @ supress further partial tag feed in
1227 eor
$rk4v.8b
, $rk4v.8b
, $res0.8b @ GHASH final
-2 block
- mid
1228 eor3
$res1b, $ctr_t1b, $ctr6b, $t1.16b @ AES final
-1 block
- result
1230 pmull2
$rk2q1, $res0.2d
, $h3.2d @ GHASH final
-2 block
- high
1232 pmull
$rk3q1, $res0.1d
, $h3.1d @ GHASH final
-2 block
- low
1233 pmull
$rk4v.1q
, $rk4v.1d
, $h34k.1d @ GHASH final
-2 block
- mid
1235 eor
$acc_hb, $acc_hb, $rk2 @ GHASH final
-2 block
- high
1237 eor
$acc_mb, $acc_mb, $rk4v.16b @ GHASH final
-2 block
- mid
1238 eor
$acc_lb, $acc_lb, $rk3 @ GHASH final
-2 block
- low
1239 .L128_enc_blocks_more_than_1
: @ blocks left
> 1
1241 st1
{ $res1b}, [$output_ptr], #16 @ AES final-1 block - store result
1243 ldr
$h2q, [$current_tag, #64] @ load h2l | h2h
1244 ext
$h2.16b
, $h2.16b
, $h2.16b
, #8
1245 rev64
$res0b, $res1b @ GHASH final
-1 block
1246 ldr
$ctr_t1q, [$input_ptr], #16 @ AES final block - load plaintext
1248 eor
$res0b, $res0b, $t0.16b @ feed
in partial tag
1250 movi
$t0.8b
, #0 @ supress further partial tag feed in
1251 ins
$rk4v.d
[0], $res0.d
[1] @ GHASH final
-1 block
- mid
1252 eor3
$res1b, $ctr_t1b, $ctr7b, $t1.16b @ AES final block
- result
1254 pmull2
$rk2q1, $res0.2d
, $h2.2d @ GHASH final
-1 block
- high
1256 eor
$rk4v.8b
, $rk4v.8b
, $res0.8b @ GHASH final
-1 block
- mid
1258 ldr
$h12kq, [$current_tag, #48] @ load h2k | h1k
1260 ins
$rk4v.d
[1], $rk4v.d
[0] @ GHASH final
-1 block
- mid
1262 pmull
$rk3q1, $res0.1d
, $h2.1d @ GHASH final
-1 block
- low
1263 pmull2
$rk4v.1q
, $rk4v.2d
, $h12k.2d @ GHASH final
-1 block
- mid
1265 eor
$acc_hb, $acc_hb, $rk2 @ GHASH final
-1 block
- high
1267 eor
$acc_mb, $acc_mb, $rk4v.16b @ GHASH final
-1 block
- mid
1268 eor
$acc_lb, $acc_lb, $rk3 @ GHASH final
-1 block
- low
1269 .L128_enc_blocks_less_than_1
: @ blocks left
<= 1
1271 rev32
$rtmp_ctr.16b
, $rtmp_ctr.16b
1272 str
$rtmp_ctrq, [$counter] @ store the updated counter
1273 and $bit_length, $bit_length, #127 @ bit_length %= 128
1275 sub $bit_length, $bit_length, #128 @ bit_length -= 128
1277 neg
$bit_length, $bit_length @ bit_length
= 128 - #bits in input (in range [1,128])
1279 mvn
$temp0_x, xzr @ temp0_x
= 0xffffffffffffffff
1280 ld1
{ $rk0}, [$output_ptr] @ load existing bytes where the possibly partial
last block is to be stored
1281 and $bit_length, $bit_length, #127 @ bit_length %= 128
1283 lsr
$temp0_x, $temp0_x, $bit_length @ temp0_x is mask
for top
64b of
last block
1284 mvn
$temp1_x, xzr @ temp1_x
= 0xffffffffffffffff
1285 cmp $bit_length, #64
1287 csel
$temp2_x, $temp1_x, $temp0_x, lt
1288 csel
$temp3_x, $temp0_x, xzr
, lt
1290 mov
$ctr0.d
[1], $temp3_x
1291 mov
$ctr0.d
[0], $temp2_x @ ctr0b is mask
for last block
1293 and $res1b, $res1b, $ctr0b @ possibly partial
last block has zeroes
in highest bits
1295 rev64
$res0b, $res1b @ GHASH final block
1297 bif
$res1b, $rk0, $ctr0b @ insert existing bytes
in top end of result before storing
1298 st1
{ $res1b}, [$output_ptr] @ store all
16B
1300 eor
$res0b, $res0b, $t0.16b @ feed
in partial tag
1302 ins
$t0.d
[0], $res0.d
[1] @ GHASH final block
- mid
1304 eor
$t0.8b
, $t0.8b
, $res0.8b @ GHASH final block
- mid
1305 ldr
$h1q, [$current_tag, #32] @ load h1l | h1h
1306 ext
$h1.16b
, $h1.16b
, $h1.16b
, #8
1308 pmull
$t0.1q
, $t0.1d
, $h12k.1d @ GHASH final block
- mid
1310 pmull2
$rk2q1, $res0.2d
, $h1.2d @ GHASH final block
- high
1311 eor
$acc_mb, $acc_mb, $t0.16b @ GHASH final block
- mid
1312 ldr
$mod_constantd, [$modulo_constant] @ MODULO
- load modulo constant
1314 pmull
$rk3q1, $res0.1d
, $h1.1d @ GHASH final block
- low
1316 eor
$acc_hb, $acc_hb, $rk2 @ GHASH final block
- high
1318 eor
$acc_lb, $acc_lb, $rk3 @ GHASH final block
- low
1320 ext
$t11.16b
, $acc_hb, $acc_hb, #8 @ MODULO - other top alignment
1321 pmull
$t12.1q
, $acc_h.1d
, $mod_constant.1d @ MODULO
- top
64b align with mid
1323 eor3
$acc_mb, $acc_mb, $acc_hb, $acc_lb @ MODULO
- karatsuba tidy up
1325 eor3
$acc_mb, $acc_mb, $t12.16b
, $t11.16b @ MODULO
- fold into mid
1327 pmull
$acc_h.1q
, $acc_m.1d
, $mod_constant.1d @ MODULO
- mid
64b align with low
1328 ext
$t11.16b
, $acc_mb, $acc_mb, #8 @ MODULO - other mid alignment
1330 eor3
$acc_lb, $acc_lb, $acc_hb, $t11.16b @ MODULO
- fold into low
1331 ext
$acc_lb, $acc_lb, $acc_lb, #8
1332 rev64
$acc_lb, $acc_lb
1333 st1
{ $acc_l.16b
}, [$current_tag]
1334 lsr x0
, $bit_length, #3 @ return sizes
1336 ldp d10
, d11
, [sp
, #16]
1337 ldp d12
, d13
, [sp
, #32]
1338 ldp d14
, d15
, [sp
, #48]
1339 ldp d8
, d9
, [sp
], #80
1345 .size unroll8_eor3_aes_gcm_enc_128_kernel
,.-unroll8_eor3_aes_gcm_enc_128_kernel
1348 #########################################################################################
1349 # size_t unroll8_eor3_aes_gcm_dec_128_kernel(const unsigned char *in,
1351 # unsigned char *out,
1353 # unsigned char ivec[16],
1357 .global unroll8_eor3_aes_gcm_dec_128_kernel
1358 .type unroll8_eor3_aes_gcm_dec_128_kernel
,%function
1360 unroll8_eor3_aes_gcm_dec_128_kernel
:
1361 AARCH64_VALID_CALL_TARGET
1362 cbz x1
, .L128_dec_ret
1363 stp d8
, d9
, [sp
, #-80]!
1366 stp d10
, d11
, [sp
, #16]
1367 stp d12
, d13
, [sp
, #32]
1368 stp d14
, d15
, [sp
, #48]
1369 mov x5
, #0xc200000000000000
1370 stp x5
, xzr
, [sp
, #64]
1371 add
$modulo_constant, sp
, #64
1373 lsr
$main_end_input_ptr, $bit_length, #3 @ byte_len
1374 ld1
{ $ctr0b}, [$counter] @ CTR block
0
1376 ldp
$rk0q, $rk1q, [$cc, #0] @ load rk0, rk1
1377 sub $main_end_input_ptr, $main_end_input_ptr, #1 @ byte_len - 1
1379 mov
$constant_temp, #0x100000000 @ set up counter increment
1380 movi
$rctr_inc.16b
, #0x0
1381 mov
$rctr_inc.d
[1], $constant_temp
1382 ld1
{ $acc_lb}, [$current_tag]
1383 ext
$acc_lb, $acc_lb, $acc_lb, #8
1384 rev64
$acc_lb, $acc_lb
1386 rev32
$rtmp_ctr.16b
, $ctr0.16b @ set up reversed counter
1388 aese
$ctr0b, $rk0 \n aesmc
$ctr0b, $ctr0b @ AES block
0 - round
0
1390 add
$rtmp_ctr.4s
, $rtmp_ctr.4s
, $rctr_inc.4s @ CTR block
0
1392 rev32
$ctr1.16b
, $rtmp_ctr.16b @ CTR block
1
1393 add
$rtmp_ctr.4s
, $rtmp_ctr.4s
, $rctr_inc.4s @ CTR block
1
1395 and $main_end_input_ptr, $main_end_input_ptr, #0xffffffffffffff80 @ number of bytes to be processed in main loop (at least 1 byte must be handled by tail)
1397 rev32
$ctr2.16b
, $rtmp_ctr.16b @ CTR block
2
1398 add
$rtmp_ctr.4s
, $rtmp_ctr.4s
, $rctr_inc.4s @ CTR block
2
1399 aese
$ctr1b, $rk0 \n aesmc
$ctr1b, $ctr1b @ AES block
1 - round
0
1401 rev32
$ctr3.16b
, $rtmp_ctr.16b @ CTR block
3
1402 add
$rtmp_ctr.4s
, $rtmp_ctr.4s
, $rctr_inc.4s @ CTR block
3
1404 aese
$ctr0b, $rk1 \n aesmc
$ctr0b, $ctr0b @ AES block
0 - round
1
1405 aese
$ctr1b, $rk1 \n aesmc
$ctr1b, $ctr1b @ AES block
1 - round
1
1407 rev32
$ctr4.16b
, $rtmp_ctr.16b @ CTR block
4
1408 add
$rtmp_ctr.4s
, $rtmp_ctr.4s
, $rctr_inc.4s @ CTR block
4
1410 rev32
$ctr5.16b
, $rtmp_ctr.16b @ CTR block
5
1411 add
$rtmp_ctr.4s
, $rtmp_ctr.4s
, $rctr_inc.4s @ CTR block
5
1413 aese
$ctr2b, $rk0 \n aesmc
$ctr2b, $ctr2b @ AES block
2 - round
0
1415 rev32
$ctr6.16b
, $rtmp_ctr.16b @ CTR block
6
1416 add
$rtmp_ctr.4s
, $rtmp_ctr.4s
, $rctr_inc.4s @ CTR block
6
1417 aese
$ctr5b, $rk0 \n aesmc
$ctr5b, $ctr5b @ AES block
5 - round
0
1419 aese
$ctr3b, $rk0 \n aesmc
$ctr3b, $ctr3b @ AES block
3 - round
0
1420 aese
$ctr4b, $rk0 \n aesmc
$ctr4b, $ctr4b @ AES block
4 - round
0
1422 rev32
$ctr7.16b
, $rtmp_ctr.16b @ CTR block
7
1424 aese
$ctr6b, $rk0 \n aesmc
$ctr6b, $ctr6b @ AES block
6 - round
0
1425 aese
$ctr2b, $rk1 \n aesmc
$ctr2b, $ctr2b @ AES block
2 - round
1
1427 aese
$ctr7b, $rk0 \n aesmc
$ctr7b, $ctr7b @ AES block
7 - round
0
1429 ldp
$rk2q, $rk3q, [$cc, #32] @ load rk2, rk3
1431 aese
$ctr6b, $rk1 \n aesmc
$ctr6b, $ctr6b @ AES block
6 - round
1
1432 aese
$ctr5b, $rk1 \n aesmc
$ctr5b, $ctr5b @ AES block
5 - round
1
1434 aese
$ctr4b, $rk1 \n aesmc
$ctr4b, $ctr4b @ AES block
4 - round
1
1435 aese
$ctr7b, $rk1 \n aesmc
$ctr7b, $ctr7b @ AES block
7 - round
1
1437 aese
$ctr7b, $rk2 \n aesmc
$ctr7b, $ctr7b @ AES block
7 - round
2
1438 aese
$ctr0b, $rk2 \n aesmc
$ctr0b, $ctr0b @ AES block
0 - round
2
1439 aese
$ctr3b, $rk1 \n aesmc
$ctr3b, $ctr3b @ AES block
3 - round
1
1441 aese
$ctr6b, $rk2 \n aesmc
$ctr6b, $ctr6b @ AES block
6 - round
2
1442 aese
$ctr2b, $rk2 \n aesmc
$ctr2b, $ctr2b @ AES block
2 - round
2
1443 aese
$ctr5b, $rk2 \n aesmc
$ctr5b, $ctr5b @ AES block
5 - round
2
1445 aese
$ctr4b, $rk2 \n aesmc
$ctr4b, $ctr4b @ AES block
4 - round
2
1446 aese
$ctr3b, $rk2 \n aesmc
$ctr3b, $ctr3b @ AES block
3 - round
2
1447 aese
$ctr1b, $rk2 \n aesmc
$ctr1b, $ctr1b @ AES block
1 - round
2
1449 aese
$ctr6b, $rk3 \n aesmc
$ctr6b, $ctr6b @ AES block
6 - round
3
1450 aese
$ctr2b, $rk3 \n aesmc
$ctr2b, $ctr2b @ AES block
2 - round
3
1452 ldp
$rk4q, $rk5q, [$cc, #64] @ load rk4, rk5
1453 aese
$ctr5b, $rk3 \n aesmc
$ctr5b, $ctr5b @ AES block
5 - round
3
1455 aese
$ctr0b, $rk3 \n aesmc
$ctr0b, $ctr0b @ AES block
0 - round
3
1456 aese
$ctr7b, $rk3 \n aesmc
$ctr7b, $ctr7b @ AES block
7 - round
3
1458 aese
$ctr3b, $rk3 \n aesmc
$ctr3b, $ctr3b @ AES block
3 - round
3
1459 aese
$ctr1b, $rk3 \n aesmc
$ctr1b, $ctr1b @ AES block
1 - round
3
1461 aese
$ctr0b, $rk4 \n aesmc
$ctr0b, $ctr0b @ AES block
0 - round
4
1462 aese
$ctr7b, $rk4 \n aesmc
$ctr7b, $ctr7b @ AES block
7 - round
4
1463 aese
$ctr4b, $rk3 \n aesmc
$ctr4b, $ctr4b @ AES block
4 - round
3
1465 aese
$ctr6b, $rk4 \n aesmc
$ctr6b, $ctr6b @ AES block
6 - round
4
1466 aese
$ctr1b, $rk4 \n aesmc
$ctr1b, $ctr1b @ AES block
1 - round
4
1467 aese
$ctr3b, $rk4 \n aesmc
$ctr3b, $ctr3b @ AES block
3 - round
4
1469 aese
$ctr5b, $rk4 \n aesmc
$ctr5b, $ctr5b @ AES block
5 - round
4
1470 aese
$ctr4b, $rk4 \n aesmc
$ctr4b, $ctr4b @ AES block
4 - round
4
1471 aese
$ctr2b, $rk4 \n aesmc
$ctr2b, $ctr2b @ AES block
2 - round
4
1473 ldp
$rk6q, $rk7q, [$cc, #96] @ load rk6, rk7
1474 aese
$ctr2b, $rk5 \n aesmc
$ctr2b, $ctr2b @ AES block
2 - round
5
1475 aese
$ctr3b, $rk5 \n aesmc
$ctr3b, $ctr3b @ AES block
3 - round
5
1477 aese
$ctr6b, $rk5 \n aesmc
$ctr6b, $ctr6b @ AES block
6 - round
5
1478 aese
$ctr1b, $rk5 \n aesmc
$ctr1b, $ctr1b @ AES block
1 - round
5
1480 aese
$ctr7b, $rk5 \n aesmc
$ctr7b, $ctr7b @ AES block
7 - round
5
1481 aese
$ctr5b, $rk5 \n aesmc
$ctr5b, $ctr5b @ AES block
5 - round
5
1483 aese
$ctr4b, $rk5 \n aesmc
$ctr4b, $ctr4b @ AES block
4 - round
5
1485 aese
$ctr3b, $rk6 \n aesmc
$ctr3b, $ctr3b @ AES block
3 - round
6
1486 aese
$ctr2b, $rk6 \n aesmc
$ctr2b, $ctr2b @ AES block
2 - round
6
1487 aese
$ctr0b, $rk5 \n aesmc
$ctr0b, $ctr0b @ AES block
0 - round
5
1489 aese
$ctr5b, $rk6 \n aesmc
$ctr5b, $ctr5b @ AES block
5 - round
6
1490 aese
$ctr4b, $rk6 \n aesmc
$ctr4b, $ctr4b @ AES block
4 - round
6
1491 aese
$ctr1b, $rk6 \n aesmc
$ctr1b, $ctr1b @ AES block
1 - round
6
1493 aese
$ctr0b, $rk6 \n aesmc
$ctr0b, $ctr0b @ AES block
0 - round
6
1494 aese
$ctr7b, $rk6 \n aesmc
$ctr7b, $ctr7b @ AES block
7 - round
6
1495 aese
$ctr6b, $rk6 \n aesmc
$ctr6b, $ctr6b @ AES block
6 - round
6
1497 aese
$ctr3b, $rk7 \n aesmc
$ctr3b, $ctr3b @ AES block
3 - round
7
1498 aese
$ctr4b, $rk7 \n aesmc
$ctr4b, $ctr4b @ AES block
4 - round
7
1499 aese
$ctr1b, $rk7 \n aesmc
$ctr1b, $ctr1b @ AES block
1 - round
7
1501 aese
$ctr7b, $rk7 \n aesmc
$ctr7b, $ctr7b @ AES block
7 - round
7
1502 aese
$ctr5b, $rk7 \n aesmc
$ctr5b, $ctr5b @ AES block
5 - round
7
1503 ldp
$rk8q, $rk9q, [$cc, #128] @ load rk8, rk9
1505 aese
$ctr6b, $rk7 \n aesmc
$ctr6b, $ctr6b @ AES block
6 - round
7
1506 aese
$ctr2b, $rk7 \n aesmc
$ctr2b, $ctr2b @ AES block
2 - round
7
1507 aese
$ctr0b, $rk7 \n aesmc
$ctr0b, $ctr0b @ AES block
0 - round
7
1509 add
$main_end_input_ptr, $main_end_input_ptr, $input_ptr
1510 add
$rtmp_ctr.4s
, $rtmp_ctr.4s
, $rctr_inc.4s @ CTR block
7
1512 aese
$ctr6b, $rk8 \n aesmc
$ctr6b, $ctr6b @ AES block
6 - round
8
1513 aese
$ctr0b, $rk8 \n aesmc
$ctr0b, $ctr0b @ AES block
0 - round
8
1515 aese
$ctr1b, $rk8 \n aesmc
$ctr1b, $ctr1b @ AES block
1 - round
8
1516 aese
$ctr7b, $rk8 \n aesmc
$ctr7b, $ctr7b @ AES block
7 - round
8
1517 aese
$ctr3b, $rk8 \n aesmc
$ctr3b, $ctr3b @ AES block
3 - round
8
1519 aese
$ctr5b, $rk8 \n aesmc
$ctr5b, $ctr5b @ AES block
5 - round
8
1520 aese
$ctr2b, $rk8 \n aesmc
$ctr2b, $ctr2b @ AES block
2 - round
8
1521 aese
$ctr4b, $rk8 \n aesmc
$ctr4b, $ctr4b @ AES block
4 - round
8
1523 aese
$ctr0b, $rk9 @ AES block
0 - round
9
1524 aese
$ctr1b, $rk9 @ AES block
1 - round
9
1525 aese
$ctr6b, $rk9 @ AES block
6 - round
9
1527 ldr
$rk10q, [$cc, #160] @ load rk10
1528 aese
$ctr4b, $rk9 @ AES block
4 - round
9
1529 aese
$ctr3b, $rk9 @ AES block
3 - round
9
1531 aese
$ctr2b, $rk9 @ AES block
2 - round
9
1532 aese
$ctr5b, $rk9 @ AES block
5 - round
9
1533 aese
$ctr7b, $rk9 @ AES block
7 - round
9
1535 add
$end_input_ptr, $input_ptr, $bit_length, lsr
#3 @ end_input_ptr
1536 cmp $input_ptr, $main_end_input_ptr @ check
if we have
<= 8 blocks
1537 b
.ge .L128_dec_tail @ handle tail
1539 ldp
$res0q, $res1q, [$input_ptr], #32 @ AES block 0, 1 - load ciphertext
1541 eor3
$ctr0b, $res0b, $ctr0b, $rk10 @ AES block
0 - result
1542 eor3
$ctr1b, $res1b, $ctr1b, $rk10 @ AES block
1 - result
1543 stp
$ctr0q, $ctr1q, [$output_ptr], #32 @ AES block 0, 1 - store result
1545 rev32
$ctr0.16b
, $rtmp_ctr.16b @ CTR block
8
1546 add
$rtmp_ctr.4s
, $rtmp_ctr.4s
, $rctr_inc.4s @ CTR block
8
1547 ldp
$res2q, $res3q, [$input_ptr], #32 @ AES block 2, 3 - load ciphertext
1549 ldp
$res4q, $res5q, [$input_ptr], #32 @ AES block 4, 5 - load ciphertext
1551 rev32
$ctr1.16b
, $rtmp_ctr.16b @ CTR block
9
1552 add
$rtmp_ctr.4s
, $rtmp_ctr.4s
, $rctr_inc.4s @ CTR block
9
1553 ldp
$res6q, $res7q, [$input_ptr], #32 @ AES block 6, 7 - load ciphertext
1555 eor3
$ctr3b, $res3b, $ctr3b, $rk10 @ AES block
3 - result
1556 eor3
$ctr2b, $res2b, $ctr2b, $rk10 @ AES block
2 - result
1557 stp
$ctr2q, $ctr3q, [$output_ptr], #32 @ AES block 2, 3 - store result
1559 rev32
$ctr2.16b
, $rtmp_ctr.16b @ CTR block
10
1560 add
$rtmp_ctr.4s
, $rtmp_ctr.4s
, $rctr_inc.4s @ CTR block
10
1562 eor3
$ctr6b, $res6b, $ctr6b, $rk10 @ AES block
6 - result
1564 rev32
$ctr3.16b
, $rtmp_ctr.16b @ CTR block
11
1565 add
$rtmp_ctr.4s
, $rtmp_ctr.4s
, $rctr_inc.4s @ CTR block
11
1567 eor3
$ctr4b, $res4b, $ctr4b, $rk10 @ AES block
4 - result
1568 eor3
$ctr5b, $res5b, $ctr5b, $rk10 @ AES block
5 - result
1569 stp
$ctr4q, $ctr5q, [$output_ptr], #32 @ AES block 4, 5 - store result
1571 eor3
$ctr7b, $res7b, $ctr7b, $rk10 @ AES block
7 - result
1572 stp
$ctr6q, $ctr7q, [$output_ptr], #32 @ AES block 6, 7 - store result
1573 rev32
$ctr4.16b
, $rtmp_ctr.16b @ CTR block
12
1575 cmp $input_ptr, $main_end_input_ptr @ check
if we have
<= 8 blocks
1576 add
$rtmp_ctr.4s
, $rtmp_ctr.4s
, $rctr_inc.4s @ CTR block
12
1577 b
.ge .L128_dec_prepretail @
do prepretail
1579 .L128_dec_main_loop
: @ main
loop start
1580 ldr
$h7q, [$current_tag, #176] @ load h7l | h7h
1581 ext
$h7.16b
, $h7.16b
, $h7.16b
, #8
1582 ldr
$h8q, [$current_tag, #208] @ load h7l | h7h
1583 ext
$h8.16b
, $h8.16b
, $h8.16b
, #8
1585 rev64
$res1b, $res1b @ GHASH block
8k
+1
1586 rev64
$res0b, $res0b @ GHASH block
8k
1587 ext
$acc_lb, $acc_lb, $acc_lb, #8 @ PRE 0
1589 rev64
$res6b, $res6b @ GHASH block
8k
+6
1590 ldr
$h5q, [$current_tag, #128] @ load h5l | h5h
1591 ext
$h5.16b
, $h5.16b
, $h5.16b
, #8
1592 ldr
$h6q, [$current_tag, #160] @ load h6l | h6h
1593 ext
$h6.16b
, $h6.16b
, $h6.16b
, #8
1595 eor
$res0b, $res0b, $acc_lb @ PRE
1
1596 rev32
$ctr5.16b
, $rtmp_ctr.16b @ CTR block
8k
+13
1597 add
$rtmp_ctr.4s
, $rtmp_ctr.4s
, $rctr_inc.4s @ CTR block
8k
+13
1599 rev64
$res2b, $res2b @ GHASH block
8k
+2
1600 rev64
$res4b, $res4b @ GHASH block
8k
+4
1601 ldp
$rk0q, $rk1q, [$cc, #0] @ load rk0, rk1
1603 rev32
$ctr6.16b
, $rtmp_ctr.16b @ CTR block
8k
+14
1604 add
$rtmp_ctr.4s
, $rtmp_ctr.4s
, $rctr_inc.4s @ CTR block
8k
+14
1605 ldr
$h56kq, [$current_tag, #144] @ load h6k | h5k
1606 ldr
$h78kq, [$current_tag, #192] @ load h6k | h5k
1608 pmull2
$t0.1q
, $res1.2d
, $h7.2d @ GHASH block
8k
+1 - high
1609 pmull2
$acc_h.1q
, $res0.2d
, $h8.2d @ GHASH block
8k
- high
1610 rev64
$res3b, $res3b @ GHASH block
8k
+3
1612 rev32
$ctr7.16b
, $rtmp_ctr.16b @ CTR block
8k
+15
1613 trn1
$acc_m.2d
, $res1.2d
, $res0.2d @ GHASH block
8k
, 8k
+1 - mid
1614 rev64
$res5b, $res5b @ GHASH block
8k
+5
1616 pmull
$h7.1q
, $res1.1d
, $h7.1d @ GHASH block
8k
+1 - low
1617 pmull
$acc_l.1q
, $res0.1d
, $h8.1d @ GHASH block
8k
- low
1618 trn2
$res0.2d
, $res1.2d
, $res0.2d @ GHASH block
8k
, 8k
+1 - mid
1620 pmull2
$t1.1q
, $res2.2d
, $h6.2d @ GHASH block
8k
+2 - high
1621 aese
$ctr4b, $rk0 \n aesmc
$ctr4b, $ctr4b @ AES block
8k
+12 - round
0
1622 pmull2
$t2.1q
, $res3.2d
, $h5.2d @ GHASH block
8k
+3 - high
1624 aese
$ctr6b, $rk0 \n aesmc
$ctr6b, $ctr6b @ AES block
8k
+14 - round
0
1625 aese
$ctr5b, $rk0 \n aesmc
$ctr5b, $ctr5b @ AES block
8k
+13 - round
0
1626 aese
$ctr7b, $rk0 \n aesmc
$ctr7b, $ctr7b @ AES block
8k
+15 - round
0
1628 aese
$ctr3b, $rk0 \n aesmc
$ctr3b, $ctr3b @ AES block
8k
+11 - round
0
1629 aese
$ctr2b, $rk0 \n aesmc
$ctr2b, $ctr2b @ AES block
8k
+10 - round
0
1630 eor
$acc_hb, $acc_hb, $t0.16b @ GHASH block
8k
+1 - high
1632 aese
$ctr1b, $rk0 \n aesmc
$ctr1b, $ctr1b @ AES block
8k
+9 - round
0
1633 eor
$res0.16b
, $res0.16b
, $acc_m.16b @ GHASH block
8k
, 8k
+1 - mid
1634 aese
$ctr0b, $rk0 \n aesmc
$ctr0b, $ctr0b @ AES block
8k
+8 - round
0
1636 aese
$ctr2b, $rk1 \n aesmc
$ctr2b, $ctr2b @ AES block
8k
+10 - round
1
1637 eor
$acc_lb, $acc_lb, $h7.16b @ GHASH block
8k
+1 - low
1638 eor3
$acc_hb, $acc_hb, $t1.16b
, $t2.16b @ GHASH block
8k
+2, 8k
+3 - high
1640 ldp
$rk2q, $rk3q, [$cc, #32] @ load rk2, rk3
1641 trn1
$t3.2d
, $res3.2d
, $res2.2d @ GHASH block
8k
+2, 8k
+3 - mid
1642 aese
$ctr7b, $rk1 \n aesmc
$ctr7b, $ctr7b @ AES block
8k
+15 - round
1
1644 pmull
$h6.1q
, $res2.1d
, $h6.1d @ GHASH block
8k
+2 - low
1645 trn2
$res2.2d
, $res3.2d
, $res2.2d @ GHASH block
8k
+2, 8k
+3 - mid
1646 pmull2
$acc_m.1q
, $res0.2d
, $h78k.2d @ GHASH block
8k
- mid
1648 ldr
$h3q, [$current_tag, #80] @ load h3l | h3h
1649 ext
$h3.16b
, $h3.16b
, $h3.16b
, #8
1650 ldr
$h4q, [$current_tag, #112] @ load h4l | h4h
1651 ext
$h4.16b
, $h4.16b
, $h4.16b
, #8
1652 pmull
$h78k.1q
, $res0.1d
, $h78k.1d @ GHASH block
8k
+1 - mid
1653 aese
$ctr6b, $rk1 \n aesmc
$ctr6b, $ctr6b @ AES block
8k
+14 - round
1
1655 aese
$ctr4b, $rk1 \n aesmc
$ctr4b, $ctr4b @ AES block
8k
+12 - round
1
1656 aese
$ctr5b, $rk1 \n aesmc
$ctr5b, $ctr5b @ AES block
8k
+13 - round
1
1657 pmull
$h5.1q
, $res3.1d
, $h5.1d @ GHASH block
8k
+3 - low
1659 aese
$ctr3b, $rk1 \n aesmc
$ctr3b, $ctr3b @ AES block
8k
+11 - round
1
1660 aese
$ctr0b, $rk1 \n aesmc
$ctr0b, $ctr0b @ AES block
8k
+8 - round
1
1661 aese
$ctr1b, $rk1 \n aesmc
$ctr1b, $ctr1b @ AES block
8k
+9 - round
1
1663 aese
$ctr7b, $rk2 \n aesmc
$ctr7b, $ctr7b @ AES block
8k
+15 - round
2
1664 aese
$ctr2b, $rk2 \n aesmc
$ctr2b, $ctr2b @ AES block
8k
+10 - round
2
1665 eor3
$acc_lb, $acc_lb, $h6.16b
, $h5.16b @ GHASH block
8k
+2, 8k
+3 - low
1667 aese
$ctr4b, $rk2 \n aesmc
$ctr4b, $ctr4b @ AES block
8k
+12 - round
2
1668 eor
$res2.16b
, $res2.16b
, $t3.16b @ GHASH block
8k
+2, 8k
+3 - mid
1669 ldr
$h1q, [$current_tag, #32] @ load h1l | h1h
1670 ext
$h1.16b
, $h1.16b
, $h1.16b
, #8
1671 ldr
$h2q, [$current_tag, #64] @ load h1l | h1h
1672 ext
$h2.16b
, $h2.16b
, $h2.16b
, #8
1674 eor
$acc_mb, $acc_mb, $h78k.16b @ GHASH block
8k
+1 - mid
1675 aese
$ctr1b, $rk2 \n aesmc
$ctr1b, $ctr1b @ AES block
8k
+9 - round
2
1676 aese
$ctr3b, $rk2 \n aesmc
$ctr3b, $ctr3b @ AES block
8k
+11 - round
2
1678 trn1
$t6.2d
, $res5.2d
, $res4.2d @ GHASH block
8k
+4, 8k
+5 - mid
1679 aese
$ctr5b, $rk2 \n aesmc
$ctr5b, $ctr5b @ AES block
8k
+13 - round
2
1680 aese
$ctr0b, $rk2 \n aesmc
$ctr0b, $ctr0b @ AES block
8k
+8 - round
2
1682 aese
$ctr6b, $rk2 \n aesmc
$ctr6b, $ctr6b @ AES block
8k
+14 - round
2
1683 pmull2
$t3.1q
, $res2.2d
, $h56k.2d @ GHASH block
8k
+2 - mid
1684 pmull
$h56k.1q
, $res2.1d
, $h56k.1d @ GHASH block
8k
+3 - mid
1686 aese
$ctr7b, $rk3 \n aesmc
$ctr7b, $ctr7b @ AES block
8k
+15 - round
3
1687 rev64
$res7b, $res7b @ GHASH block
8k
+7
1688 pmull2
$t4.1q
, $res4.2d
, $h4.2d @ GHASH block
8k
+4 - high
1690 ldp
$rk4q, $rk5q, [$cc, #64] @ load rk4, rk5
1691 pmull
$h4.1q
, $res4.1d
, $h4.1d @ GHASH block
8k
+4 - low
1692 eor3
$acc_mb, $acc_mb, $h56k.16b
, $t3.16b @ GHASH block
8k
+2, 8k
+3 - mid
1694 ldr
$h12kq, [$current_tag, #48] @ load h2k | h1k
1695 ldr
$h34kq, [$current_tag, #96] @ load h4k | h3k
1696 aese
$ctr2b, $rk3 \n aesmc
$ctr2b, $ctr2b @ AES block
8k
+10 - round
3
1697 trn2
$res4.2d
, $res5.2d
, $res4.2d @ GHASH block
8k
+4, 8k
+5 - mid
1699 aese
$ctr4b, $rk3 \n aesmc
$ctr4b, $ctr4b @ AES block
8k
+12 - round
3
1700 aese
$ctr3b, $rk3 \n aesmc
$ctr3b, $ctr3b @ AES block
8k
+11 - round
3
1701 aese
$ctr1b, $rk3 \n aesmc
$ctr1b, $ctr1b @ AES block
8k
+9 - round
3
1703 aese
$ctr0b, $rk3 \n aesmc
$ctr0b, $ctr0b @ AES block
8k
+8 - round
3
1704 aese
$ctr6b, $rk3 \n aesmc
$ctr6b, $ctr6b @ AES block
8k
+14 - round
3
1705 aese
$ctr5b, $rk3 \n aesmc
$ctr5b, $ctr5b @ AES block
8k
+13 - round
3
1707 pmull2
$t5.1q
, $res5.2d
, $h3.2d @ GHASH block
8k
+5 - high
1708 pmull
$h3.1q
, $res5.1d
, $h3.1d @ GHASH block
8k
+5 - low
1709 pmull2
$t7.1q
, $res6.2d
, $h2.2d @ GHASH block
8k
+6 - high
1711 pmull
$h2.1q
, $res6.1d
, $h2.1d @ GHASH block
8k
+6 - low
1712 aese
$ctr0b, $rk4 \n aesmc
$ctr0b, $ctr0b @ AES block
8k
+8 - round
4
1713 aese
$ctr7b, $rk4 \n aesmc
$ctr7b, $ctr7b @ AES block
8k
+15 - round
4
1715 eor
$res4.16b
, $res4.16b
, $t6.16b @ GHASH block
8k
+4, 8k
+5 - mid
1716 trn1
$t9.2d
, $res7.2d
, $res6.2d @ GHASH block
8k
+6, 8k
+7 - mid
1717 aese
$ctr3b, $rk4 \n aesmc
$ctr3b, $ctr3b @ AES block
8k
+11 - round
4
1719 aese
$ctr1b, $rk4 \n aesmc
$ctr1b, $ctr1b @ AES block
8k
+9 - round
4
1720 aese
$ctr5b, $rk4 \n aesmc
$ctr5b, $ctr5b @ AES block
8k
+13 - round
4
1721 aese
$ctr6b, $rk4 \n aesmc
$ctr6b, $ctr6b @ AES block
8k
+14 - round
4
1723 aese
$ctr2b, $rk4 \n aesmc
$ctr2b, $ctr2b @ AES block
8k
+10 - round
4
1724 aese
$ctr4b, $rk4 \n aesmc
$ctr4b, $ctr4b @ AES block
8k
+12 - round
4
1725 trn2
$res6.2d
, $res7.2d
, $res6.2d @ GHASH block
8k
+6, 8k
+7 - mid
1727 ldp
$rk6q, $rk7q, [$cc, #96] @ load rk6, rk7
1728 aese
$ctr0b, $rk5 \n aesmc
$ctr0b, $ctr0b @ AES block
8k
+8 - round
5
1729 pmull2
$t6.1q
, $res4.2d
, $h34k.2d @ GHASH block
8k
+4 - mid
1731 aese
$ctr2b, $rk5 \n aesmc
$ctr2b, $ctr2b @ AES block
8k
+10 - round
5
1732 eor
$res6.16b
, $res6.16b
, $t9.16b @ GHASH block
8k
+6, 8k
+7 - mid
1733 aese
$ctr1b, $rk5 \n aesmc
$ctr1b, $ctr1b @ AES block
8k
+9 - round
5
1735 pmull
$h34k.1q
, $res4.1d
, $h34k.1d @ GHASH block
8k
+5 - mid
1736 aese
$ctr6b, $rk5 \n aesmc
$ctr6b, $ctr6b @ AES block
8k
+14 - round
5
1737 aese
$ctr7b, $rk5 \n aesmc
$ctr7b, $ctr7b @ AES block
8k
+15 - round
5
1739 aese
$ctr3b, $rk5 \n aesmc
$ctr3b, $ctr3b @ AES block
8k
+11 - round
5
1740 aese
$ctr5b, $rk5 \n aesmc
$ctr5b, $ctr5b @ AES block
8k
+13 - round
5
1741 aese
$ctr4b, $rk5 \n aesmc
$ctr4b, $ctr4b @ AES block
8k
+12 - round
5
1743 pmull2
$t8.1q
, $res7.2d
, $h1.2d @ GHASH block
8k
+7 - high
1744 eor3
$acc_mb, $acc_mb, $h34k.16b
, $t6.16b @ GHASH block
8k
+4, 8k
+5 - mid
1745 eor3
$acc_lb, $acc_lb, $h4.16b
, $h3.16b @ GHASH block
8k
+4, 8k
+5 - low
1747 aese
$ctr3b, $rk6 \n aesmc
$ctr3b, $ctr3b @ AES block
8k
+11 - round
6
1748 eor3
$acc_hb, $acc_hb, $t4.16b
, $t5.16b @ GHASH block
8k
+4, 8k
+5 - high
1749 aese
$ctr7b, $rk6 \n aesmc
$ctr7b, $ctr7b @ AES block
8k
+15 - round
6
1751 aese
$ctr1b, $rk6 \n aesmc
$ctr1b, $ctr1b @ AES block
8k
+9 - round
6
1752 pmull2
$t9.1q
, $res6.2d
, $h12k.2d @ GHASH block
8k
+6 - mid
1753 aese
$ctr6b, $rk6 \n aesmc
$ctr6b, $ctr6b @ AES block
8k
+14 - round
6
1755 aese
$ctr2b, $rk6 \n aesmc
$ctr2b, $ctr2b @ AES block
8k
+10 - round
6
1756 aese
$ctr5b, $rk6 \n aesmc
$ctr5b, $ctr5b @ AES block
8k
+13 - round
6
1757 pmull
$h1.1q
, $res7.1d
, $h1.1d @ GHASH block
8k
+7 - low
1759 pmull
$h12k.1q
, $res6.1d
, $h12k.1d @ GHASH block
8k
+7 - mid
1760 aese
$ctr0b, $rk6 \n aesmc
$ctr0b, $ctr0b @ AES block
8k
+8 - round
6
1761 add
$rtmp_ctr.4s
, $rtmp_ctr.4s
, $rctr_inc.4s @ CTR block
8k
+15
1763 eor3
$acc_hb, $acc_hb, $t7.16b
, $t8.16b @ GHASH block
8k
+6, 8k
+7 - high
1764 aese
$ctr4b, $rk6 \n aesmc
$ctr4b, $ctr4b @ AES block
8k
+12 - round
6
1765 ldp
$rk8q, $rk9q, [$cc, #128] @ load rk8, rk9
1767 ldr
$mod_constantd, [$modulo_constant] @ MODULO
- load modulo constant
1768 eor3
$acc_lb, $acc_lb, $h2.16b
, $h1.16b @ GHASH block
8k
+6, 8k
+7 - low
1769 aese
$ctr5b, $rk7 \n aesmc
$ctr5b, $ctr5b @ AES block
8k
+13 - round
7
1771 rev32
$h1.16b
, $rtmp_ctr.16b @ CTR block
8k
+16
1772 eor3
$acc_mb, $acc_mb, $h12k.16b
, $t9.16b @ GHASH block
8k
+6, 8k
+7 - mid
1773 add
$rtmp_ctr.4s
, $rtmp_ctr.4s
, $rctr_inc.4s @ CTR block
8k
+16
1775 aese
$ctr6b, $rk7 \n aesmc
$ctr6b, $ctr6b @ AES block
8k
+14 - round
7
1776 aese
$ctr3b, $rk7 \n aesmc
$ctr3b, $ctr3b @ AES block
8k
+11 - round
7
1777 aese
$ctr7b, $rk7 \n aesmc
$ctr7b, $ctr7b @ AES block
8k
+15 - round
7
1779 aese
$ctr2b, $rk7 \n aesmc
$ctr2b, $ctr2b @ AES block
8k
+10 - round
7
1780 aese
$ctr1b, $rk7 \n aesmc
$ctr1b, $ctr1b @ AES block
8k
+9 - round
7
1781 rev32
$h2.16b
, $rtmp_ctr.16b @ CTR block
8k
+17
1783 aese
$ctr4b, $rk7 \n aesmc
$ctr4b, $ctr4b @ AES block
8k
+12 - round
7
1784 ext
$t11.16b
, $acc_hb, $acc_hb, #8 @ MODULO - other top alignment
1785 pmull
$t12.1q
, $acc_h.1d
, $mod_constant.1d @ MODULO
- top
64b align with mid
1787 eor3
$acc_mb, $acc_mb, $acc_hb, $acc_lb @ MODULO
- karatsuba tidy up
1788 aese
$ctr0b, $rk7 \n aesmc
$ctr0b, $ctr0b @ AES block
8k
+8 - round
7
1789 add
$rtmp_ctr.4s
, $rtmp_ctr.4s
, $rctr_inc.4s @ CTR block
8k
+17
1791 aese
$ctr5b, $rk8 \n aesmc
$ctr5b, $ctr5b @ AES block
8k
+13 - round
8
1792 aese
$ctr1b, $rk8 \n aesmc
$ctr1b, $ctr1b @ AES block
8k
+9 - round
8
1793 ldp
$res0q, $res1q, [$input_ptr], #32 @ AES block 8k+8, 8k+9 - load ciphertext
1795 ldp
$res2q, $res3q, [$input_ptr], #32 @ AES block 8k+10, 8k+11 - load ciphertext
1796 aese
$ctr0b, $rk8 \n aesmc
$ctr0b, $ctr0b @ AES block
8k
+8 - round
8
1797 rev32
$h3.16b
, $rtmp_ctr.16b @ CTR block
8k
+18
1799 ldp
$res4q, $res5q, [$input_ptr], #32 @ AES block 8k+12, 8k+13 - load ciphertext
1800 aese
$ctr4b, $rk8 \n aesmc
$ctr4b, $ctr4b @ AES block
8k
+12 - round
8
1801 eor3
$acc_mb, $acc_mb, $t12.16b
, $t11.16b @ MODULO
- fold into mid
1803 ldp
$res6q, $res7q, [$input_ptr], #32 @ AES block 8k+14, 8k+15 - load ciphertext
1804 aese
$ctr3b, $rk8 \n aesmc
$ctr3b, $ctr3b @ AES block
8k
+11 - round
8
1805 add
$rtmp_ctr.4s
, $rtmp_ctr.4s
, $rctr_inc.4s @ CTR block
8k
+18
1807 aese
$ctr7b, $rk8 \n aesmc
$ctr7b, $ctr7b @ AES block
8k
+15 - round
8
1808 aese
$ctr2b, $rk8 \n aesmc
$ctr2b, $ctr2b @ AES block
8k
+10 - round
8
1809 aese
$ctr6b, $rk8 \n aesmc
$ctr6b, $ctr6b @ AES block
8k
+14 - round
8
1811 aese
$ctr0b, $rk9 @ AES block
8k
+8 - round
9
1812 aese
$ctr1b, $rk9 @ AES block
8k
+9 - round
9
1813 ldr
$rk10q, [$cc, #160] @ load rk10
1815 aese
$ctr6b, $rk9 @ AES block
8k
+14 - round
9
1816 pmull
$acc_h.1q
, $acc_m.1d
, $mod_constant.1d @ MODULO
- mid
64b align with low
1817 aese
$ctr2b, $rk9 @ AES block
8k
+10 - round
9
1819 aese
$ctr7b, $rk9 @ AES block
8k
+15 - round
9
1820 aese
$ctr4b, $rk9 @ AES block
8k
+12 - round
9
1821 ext
$t11.16b
, $acc_mb, $acc_mb, #8 @ MODULO - other mid alignment
1823 rev32
$h4.16b
, $rtmp_ctr.16b @ CTR block
8k
+19
1824 add
$rtmp_ctr.4s
, $rtmp_ctr.4s
, $rctr_inc.4s @ CTR block
8k
+19
1826 aese
$ctr3b, $rk9 @ AES block
8k
+11 - round
9
1827 aese
$ctr5b, $rk9 @ AES block
8k
+13 - round
9
1828 eor3
$ctr1b, $res1b, $ctr1b, $rk10 @ AES block
8k
+9 - result
1830 eor3
$ctr0b, $res0b, $ctr0b, $rk10 @ AES block
8k
+8 - result
1831 eor3
$ctr7b, $res7b, $ctr7b, $rk10 @ AES block
8k
+15 - result
1832 eor3
$ctr6b, $res6b, $ctr6b, $rk10 @ AES block
8k
+14 - result
1834 eor3
$ctr2b, $res2b, $ctr2b, $rk10 @ AES block
8k
+10 - result
1835 stp
$ctr0q, $ctr1q, [$output_ptr], #32 @ AES block 8k+8, 8k+9 - store result
1836 mov
$ctr1.16b
, $h2.16b @ CTR block
8k
+17
1838 eor3
$ctr4b, $res4b, $ctr4b, $rk10 @ AES block
8k
+12 - result
1839 eor3
$acc_lb, $acc_lb, $acc_hb, $t11.16b @ MODULO
- fold into low
1840 mov
$ctr0.16b
, $h1.16b @ CTR block
8k
+16
1842 eor3
$ctr3b, $res3b, $ctr3b, $rk10 @ AES block
8k
+11 - result
1843 cmp $input_ptr, $main_end_input_ptr @ LOOP CONTROL
1844 stp
$ctr2q, $ctr3q, [$output_ptr], #32 @ AES block 8k+10, 8k+11 - store result
1846 eor3
$ctr5b, $res5b, $ctr5b, $rk10 @ AES block
8k
+13 - result
1847 mov
$ctr2.16b
, $h3.16b @ CTR block
8k
+18
1849 stp
$ctr4q, $ctr5q, [$output_ptr], #32 @ AES block 8k+12, 8k+13 - store result
1850 rev32
$ctr4.16b
, $rtmp_ctr.16b @ CTR block
8k
+20
1851 add
$rtmp_ctr.4s
, $rtmp_ctr.4s
, $rctr_inc.4s @ CTR block
8k
+20
1853 stp
$ctr6q, $ctr7q, [$output_ptr], #32 @ AES block 8k+14, 8k+15 - store result
1854 mov
$ctr3.16b
, $h4.16b @ CTR block
8k
+19
1855 b
.lt .L128_dec_main_loop
1857 .L128_dec_prepretail
: @ PREPRETAIL
1858 rev64
$res3b, $res3b @ GHASH block
8k
+3
1859 ext
$acc_lb, $acc_lb, $acc_lb, #8 @ PRE 0
1860 rev64
$res0b, $res0b @ GHASH block
8k
1862 rev64
$res2b, $res2b @ GHASH block
8k
+2
1863 rev32
$ctr5.16b
, $rtmp_ctr.16b @ CTR block
8k
+13
1864 ldp
$rk0q, $rk1q, [$cc, #0] @ load rk0, rk1
1866 ldr
$h7q, [$current_tag, #176] @ load h7l | h7h
1867 ext
$h7.16b
, $h7.16b
, $h7.16b
, #8
1868 ldr
$h8q, [$current_tag, #208] @ load h8l | h8h
1869 ext
$h8.16b
, $h8.16b
, $h8.16b
, #8
1870 eor
$res0b, $res0b, $acc_lb @ PRE
1
1871 rev64
$res1b, $res1b @ GHASH block
8k
+1
1873 add
$rtmp_ctr.4s
, $rtmp_ctr.4s
, $rctr_inc.4s @ CTR block
8k
+13
1874 ldr
$h5q, [$current_tag, #128] @ load h5l | h5h
1875 ext
$h5.16b
, $h5.16b
, $h5.16b
, #8
1876 ldr
$h6q, [$current_tag, #160] @ load h6l | h6h
1877 ext
$h6.16b
, $h6.16b
, $h6.16b
, #8
1878 rev64
$res5b, $res5b @ GHASH block
8k
+5
1880 rev64
$res4b, $res4b @ GHASH block
8k
+4
1882 rev64
$res6b, $res6b @ GHASH block
8k
+6
1884 ldr
$h56kq, [$current_tag, #144] @ load h6k | h5k
1885 ldr
$h78kq, [$current_tag, #192] @ load h6k | h5k
1886 rev32
$ctr6.16b
, $rtmp_ctr.16b @ CTR block
8k
+14
1887 add
$rtmp_ctr.4s
, $rtmp_ctr.4s
, $rctr_inc.4s @ CTR block
8k
+14
1889 pmull2
$t0.1q
, $res1.2d
, $h7.2d @ GHASH block
8k
+1 - high
1890 pmull
$acc_l.1q
, $res0.1d
, $h8.1d @ GHASH block
8k
- low
1891 pmull2
$acc_h.1q
, $res0.2d
, $h8.2d @ GHASH block
8k
- high
1893 trn1
$acc_m.2d
, $res1.2d
, $res0.2d @ GHASH block
8k
, 8k
+1 - mid
1894 trn2
$res0.2d
, $res1.2d
, $res0.2d @ GHASH block
8k
, 8k
+1 - mid
1895 pmull2
$t1.1q
, $res2.2d
, $h6.2d @ GHASH block
8k
+2 - high
1897 pmull
$h7.1q
, $res1.1d
, $h7.1d @ GHASH block
8k
+1 - low
1898 pmull2
$t2.1q
, $res3.2d
, $h5.2d @ GHASH block
8k
+3 - high
1899 aese
$ctr0b, $rk0 \n aesmc
$ctr0b, $ctr0b @ AES block
8k
+8 - round
0
1901 eor
$acc_hb, $acc_hb, $t0.16b @ GHASH block
8k
+1 - high
1902 aese
$ctr4b, $rk0 \n aesmc
$ctr4b, $ctr4b @ AES block
8k
+12 - round
0
1903 eor
$res0.16b
, $res0.16b
, $acc_m.16b @ GHASH block
8k
, 8k
+1 - mid
1905 pmull
$h6.1q
, $res2.1d
, $h6.1d @ GHASH block
8k
+2 - low
1906 rev32
$ctr7.16b
, $rtmp_ctr.16b @ CTR block
8k
+15
1907 aese
$ctr3b, $rk0 \n aesmc
$ctr3b, $ctr3b @ AES block
8k
+11 - round
0
1909 eor3
$acc_hb, $acc_hb, $t1.16b
, $t2.16b @ GHASH block
8k
+2, 8k
+3 - high
1910 trn1
$t3.2d
, $res3.2d
, $res2.2d @ GHASH block
8k
+2, 8k
+3 - mid
1911 trn2
$res2.2d
, $res3.2d
, $res2.2d @ GHASH block
8k
+2, 8k
+3 - mid
1913 aese
$ctr2b, $rk0 \n aesmc
$ctr2b, $ctr2b @ AES block
8k
+10 - round
0
1914 aese
$ctr1b, $rk0 \n aesmc
$ctr1b, $ctr1b @ AES block
8k
+9 - round
0
1915 aese
$ctr5b, $rk0 \n aesmc
$ctr5b, $ctr5b @ AES block
8k
+13 - round
0
1917 pmull2
$acc_m.1q
, $res0.2d
, $h78k.2d @ GHASH block
8k
- mid
1918 pmull
$h78k.1q
, $res0.1d
, $h78k.1d @ GHASH block
8k
+1 - mid
1919 pmull
$h5.1q
, $res3.1d
, $h5.1d @ GHASH block
8k
+3 - low
1921 aese
$ctr2b, $rk1 \n aesmc
$ctr2b, $ctr2b @ AES block
8k
+10 - round
1
1922 aese
$ctr7b, $rk0 \n aesmc
$ctr7b, $ctr7b @ AES block
8k
+15 - round
0
1923 aese
$ctr6b, $rk0 \n aesmc
$ctr6b, $ctr6b @ AES block
8k
+14 - round
0
1925 eor
$acc_lb, $acc_lb, $h7.16b @ GHASH block
8k
+1 - low
1926 eor
$res2.16b
, $res2.16b
, $t3.16b @ GHASH block
8k
+2, 8k
+3 - mid
1927 eor
$acc_mb, $acc_mb, $h78k.16b @ GHASH block
8k
+1 - mid
1929 aese
$ctr6b, $rk1 \n aesmc
$ctr6b, $ctr6b @ AES block
8k
+14 - round
1
1930 aese
$ctr4b, $rk1 \n aesmc
$ctr4b, $ctr4b @ AES block
8k
+12 - round
1
1931 aese
$ctr5b, $rk1 \n aesmc
$ctr5b, $ctr5b @ AES block
8k
+13 - round
1
1933 ldp
$rk2q, $rk3q, [$cc, #32] @ load rk2, rk3
1934 eor3
$acc_lb, $acc_lb, $h6.16b
, $h5.16b @ GHASH block
8k
+2, 8k
+3 - low
1935 pmull2
$t3.1q
, $res2.2d
, $h56k.2d @ GHASH block
8k
+2 - mid
1937 ldr
$h3q, [$current_tag, #80] @ load h3l | h3h
1938 ext
$h3.16b
, $h3.16b
, $h3.16b
, #8
1939 ldr
$h4q, [$current_tag, #112] @ load h4l | h4h
1940 ext
$h4.16b
, $h4.16b
, $h4.16b
, #8
1941 aese
$ctr1b, $rk1 \n aesmc
$ctr1b, $ctr1b @ AES block
8k
+9 - round
1
1942 pmull
$h56k.1q
, $res2.1d
, $h56k.1d @ GHASH block
8k
+3 - mid
1944 aese
$ctr3b, $rk1 \n aesmc
$ctr3b, $ctr3b @ AES block
8k
+11 - round
1
1945 aese
$ctr7b, $rk1 \n aesmc
$ctr7b, $ctr7b @ AES block
8k
+15 - round
1
1946 aese
$ctr0b, $rk1 \n aesmc
$ctr0b, $ctr0b @ AES block
8k
+8 - round
1
1948 ldr
$h1q, [$current_tag, #32] @ load h1l | h1h
1949 ext
$h1.16b
, $h1.16b
, $h1.16b
, #8
1950 ldr
$h2q, [$current_tag, #64] @ load h1l | h1h
1951 ext
$h2.16b
, $h2.16b
, $h2.16b
, #8
1952 eor3
$acc_mb, $acc_mb, $h56k.16b
, $t3.16b @ GHASH block
8k
+2, 8k
+3 - mid
1954 aese
$ctr0b, $rk2 \n aesmc
$ctr0b, $ctr0b @ AES block
8k
+8 - round
2
1955 aese
$ctr6b, $rk2 \n aesmc
$ctr6b, $ctr6b @ AES block
8k
+14 - round
2
1956 aese
$ctr2b, $rk2 \n aesmc
$ctr2b, $ctr2b @ AES block
8k
+10 - round
2
1958 aese
$ctr4b, $rk2 \n aesmc
$ctr4b, $ctr4b @ AES block
8k
+12 - round
2
1959 trn1
$t6.2d
, $res5.2d
, $res4.2d @ GHASH block
8k
+4, 8k
+5 - mid
1960 aese
$ctr7b, $rk2 \n aesmc
$ctr7b, $ctr7b @ AES block
8k
+15 - round
2
1962 aese
$ctr1b, $rk2 \n aesmc
$ctr1b, $ctr1b @ AES block
8k
+9 - round
2
1963 aese
$ctr5b, $rk2 \n aesmc
$ctr5b, $ctr5b @ AES block
8k
+13 - round
2
1964 aese
$ctr3b, $rk2 \n aesmc
$ctr3b, $ctr3b @ AES block
8k
+11 - round
2
1966 pmull2
$t4.1q
, $res4.2d
, $h4.2d @ GHASH block
8k
+4 - high
1967 pmull
$h4.1q
, $res4.1d
, $h4.1d @ GHASH block
8k
+4 - low
1968 trn2
$res4.2d
, $res5.2d
, $res4.2d @ GHASH block
8k
+4, 8k
+5 - mid
1970 ldp
$rk4q, $rk5q, [$cc, #64] @ load rk4, rk5
1971 rev64
$res7b, $res7b @ GHASH block
8k
+7
1972 aese
$ctr6b, $rk3 \n aesmc
$ctr6b, $ctr6b @ AES block
8k
+14 - round
3
1974 ldr
$h12kq, [$current_tag, #48] @ load h2k | h1k
1975 ldr
$h34kq, [$current_tag, #96] @ load h4k | h3k
1976 pmull2
$t5.1q
, $res5.2d
, $h3.2d @ GHASH block
8k
+5 - high
1977 pmull
$h3.1q
, $res5.1d
, $h3.1d @ GHASH block
8k
+5 - low
1979 aese
$ctr2b, $rk3 \n aesmc
$ctr2b, $ctr2b @ AES block
8k
+10 - round
3
1980 aese
$ctr0b, $rk3 \n aesmc
$ctr0b, $ctr0b @ AES block
8k
+8 - round
3
1981 trn1
$t9.2d
, $res7.2d
, $res6.2d @ GHASH block
8k
+6, 8k
+7 - mid
1983 pmull2
$t7.1q
, $res6.2d
, $h2.2d @ GHASH block
8k
+6 - high
1984 pmull
$h2.1q
, $res6.1d
, $h2.1d @ GHASH block
8k
+6 - low
1985 trn2
$res6.2d
, $res7.2d
, $res6.2d @ GHASH block
8k
+6, 8k
+7 - mid
1987 aese
$ctr4b, $rk3 \n aesmc
$ctr4b, $ctr4b @ AES block
8k
+12 - round
3
1988 aese
$ctr3b, $rk3 \n aesmc
$ctr3b, $ctr3b @ AES block
8k
+11 - round
3
1989 aese
$ctr7b, $rk3 \n aesmc
$ctr7b, $ctr7b @ AES block
8k
+15 - round
3
1991 aese
$ctr1b, $rk3 \n aesmc
$ctr1b, $ctr1b @ AES block
8k
+9 - round
3
1992 aese
$ctr5b, $rk3 \n aesmc
$ctr5b, $ctr5b @ AES block
8k
+13 - round
3
1993 eor
$res4.16b
, $res4.16b
, $t6.16b @ GHASH block
8k
+4, 8k
+5 - mid
1995 eor3
$acc_hb, $acc_hb, $t4.16b
, $t5.16b @ GHASH block
8k
+4, 8k
+5 - high
1996 aese
$ctr0b, $rk4 \n aesmc
$ctr0b, $ctr0b @ AES block
8k
+8 - round
4
1997 aese
$ctr2b, $rk4 \n aesmc
$ctr2b, $ctr2b @ AES block
8k
+10 - round
4
1999 eor
$res6.16b
, $res6.16b
, $t9.16b @ GHASH block
8k
+6, 8k
+7 - mid
2000 aese
$ctr5b, $rk4 \n aesmc
$ctr5b, $ctr5b @ AES block
8k
+13 - round
4
2001 pmull2
$t6.1q
, $res4.2d
, $h34k.2d @ GHASH block
8k
+4 - mid
2003 aese
$ctr1b, $rk4 \n aesmc
$ctr1b, $ctr1b @ AES block
8k
+9 - round
4
2004 aese
$ctr6b, $rk4 \n aesmc
$ctr6b, $ctr6b @ AES block
8k
+14 - round
4
2005 aese
$ctr4b, $rk4 \n aesmc
$ctr4b, $ctr4b @ AES block
8k
+12 - round
4
2007 aese
$ctr7b, $rk4 \n aesmc
$ctr7b, $ctr7b @ AES block
8k
+15 - round
4
2008 aese
$ctr3b, $rk4 \n aesmc
$ctr3b, $ctr3b @ AES block
8k
+11 - round
4
2009 pmull
$h34k.1q
, $res4.1d
, $h34k.1d @ GHASH block
8k
+5 - mid
2011 pmull2
$t8.1q
, $res7.2d
, $h1.2d @ GHASH block
8k
+7 - high
2012 pmull2
$t9.1q
, $res6.2d
, $h12k.2d @ GHASH block
8k
+6 - mid
2013 pmull
$h12k.1q
, $res6.1d
, $h12k.1d @ GHASH block
8k
+7 - mid
2015 ldp
$rk6q, $rk7q, [$cc, #96] @ load rk6, rk7
2016 eor3
$acc_mb, $acc_mb, $h34k.16b
, $t6.16b @ GHASH block
8k
+4, 8k
+5 - mid
2017 aese
$ctr6b, $rk5 \n aesmc
$ctr6b, $ctr6b @ AES block
8k
+14 - round
5
2019 ldr
$mod_constantd, [$modulo_constant] @ MODULO
- load modulo constant
2020 pmull
$h1.1q
, $res7.1d
, $h1.1d @ GHASH block
8k
+7 - low
2021 eor3
$acc_lb, $acc_lb, $h4.16b
, $h3.16b @ GHASH block
8k
+4, 8k
+5 - low
2023 aese
$ctr0b, $rk5 \n aesmc
$ctr0b, $ctr0b @ AES block
8k
+8 - round
5
2024 aese
$ctr2b, $rk5 \n aesmc
$ctr2b, $ctr2b @ AES block
8k
+10 - round
5
2025 aese
$ctr4b, $rk5 \n aesmc
$ctr4b, $ctr4b @ AES block
8k
+12 - round
5
2027 aese
$ctr3b, $rk5 \n aesmc
$ctr3b, $ctr3b @ AES block
8k
+11 - round
5
2028 aese
$ctr1b, $rk5 \n aesmc
$ctr1b, $ctr1b @ AES block
8k
+9 - round
5
2029 aese
$ctr5b, $rk5 \n aesmc
$ctr5b, $ctr5b @ AES block
8k
+13 - round
5
2031 aese
$ctr7b, $rk5 \n aesmc
$ctr7b, $ctr7b @ AES block
8k
+15 - round
5
2032 eor3
$acc_mb, $acc_mb, $h12k.16b
, $t9.16b @ GHASH block
8k
+6, 8k
+7 - mid
2033 eor3
$acc_lb, $acc_lb, $h2.16b
, $h1.16b @ GHASH block
8k
+6, 8k
+7 - low
2035 aese
$ctr4b, $rk6 \n aesmc
$ctr4b, $ctr4b @ AES block
8k
+12 - round
6
2036 aese
$ctr1b, $rk6 \n aesmc
$ctr1b, $ctr1b @ AES block
8k
+9 - round
6
2037 aese
$ctr2b, $rk6 \n aesmc
$ctr2b, $ctr2b @ AES block
8k
+10 - round
6
2039 eor3
$acc_hb, $acc_hb, $t7.16b
, $t8.16b @ GHASH block
8k
+6, 8k
+7 - high
2040 aese
$ctr5b, $rk6 \n aesmc
$ctr5b, $ctr5b @ AES block
8k
+13 - round
6
2041 aese
$ctr0b, $rk6 \n aesmc
$ctr0b, $ctr0b @ AES block
8k
+8 - round
6
2043 aese
$ctr3b, $rk6 \n aesmc
$ctr3b, $ctr3b @ AES block
8k
+11 - round
6
2044 aese
$ctr6b, $rk6 \n aesmc
$ctr6b, $ctr6b @ AES block
8k
+14 - round
6
2045 aese
$ctr7b, $rk6 \n aesmc
$ctr7b, $ctr7b @ AES block
8k
+15 - round
6
2047 aese
$ctr4b, $rk7 \n aesmc
$ctr4b, $ctr4b @ AES block
8k
+12 - round
7
2048 eor3
$acc_mb, $acc_mb, $acc_hb, $acc_lb @ MODULO
- karatsuba tidy up
2049 ldp
$rk8q, $rk9q, [$cc, #128] @ load rk8, rk9
2051 pmull
$t12.1q
, $acc_h.1d
, $mod_constant.1d @ MODULO
- top
64b align with mid
2052 aese
$ctr3b, $rk7 \n aesmc
$ctr3b, $ctr3b @ AES block
8k
+11 - round
7
2053 ext
$t11.16b
, $acc_hb, $acc_hb, #8 @ MODULO - other top alignment
2055 aese
$ctr5b, $rk7 \n aesmc
$ctr5b, $ctr5b @ AES block
8k
+13 - round
7
2056 aese
$ctr6b, $rk7 \n aesmc
$ctr6b, $ctr6b @ AES block
8k
+14 - round
7
2057 aese
$ctr0b, $rk7 \n aesmc
$ctr0b, $ctr0b @ AES block
8k
+8 - round
7
2059 aese
$ctr7b, $rk7 \n aesmc
$ctr7b, $ctr7b @ AES block
8k
+15 - round
7
2060 aese
$ctr1b, $rk7 \n aesmc
$ctr1b, $ctr1b @ AES block
8k
+9 - round
7
2061 aese
$ctr2b, $rk7 \n aesmc
$ctr2b, $ctr2b @ AES block
8k
+10 - round
7
2063 eor3
$acc_mb, $acc_mb, $t12.16b
, $t11.16b @ MODULO
- fold into mid
2064 ldr
$rk10q, [$cc, #160] @ load rk10
2066 aese
$ctr3b, $rk8 \n aesmc
$ctr3b, $ctr3b @ AES block
8k
+11 - round
8
2067 aese
$ctr0b, $rk8 \n aesmc
$ctr0b, $ctr0b @ AES block
8k
+8 - round
8
2069 pmull
$acc_h.1q
, $acc_m.1d
, $mod_constant.1d @ MODULO
- mid
64b align with low
2070 aese
$ctr6b, $rk8 \n aesmc
$ctr6b, $ctr6b @ AES block
8k
+14 - round
8
2071 ext
$t11.16b
, $acc_mb, $acc_mb, #8 @ MODULO - other mid alignment
2073 aese
$ctr2b, $rk8 \n aesmc
$ctr2b, $ctr2b @ AES block
8k
+10 - round
8
2074 aese
$ctr1b, $rk8 \n aesmc
$ctr1b, $ctr1b @ AES block
8k
+9 - round
8
2075 aese
$ctr7b, $rk8 \n aesmc
$ctr7b, $ctr7b @ AES block
8k
+15 - round
8
2077 aese
$ctr6b, $rk9 @ AES block
8k
+14 - round
9
2078 aese
$ctr5b, $rk8 \n aesmc
$ctr5b, $ctr5b @ AES block
8k
+13 - round
8
2079 aese
$ctr4b, $rk8 \n aesmc
$ctr4b, $ctr4b @ AES block
8k
+12 - round
8
2081 eor3
$acc_lb, $acc_lb, $acc_hb, $t11.16b @ MODULO
- fold into low
2082 add
$rtmp_ctr.4s
, $rtmp_ctr.4s
, $rctr_inc.4s @ CTR block
8k
+15
2083 aese
$ctr2b, $rk9 @ AES block
8k
+10 - round
9
2085 aese
$ctr3b, $rk9 @ AES block
8k
+11 - round
9
2086 aese
$ctr5b, $rk9 @ AES block
8k
+13 - round
9
2087 aese
$ctr0b, $rk9 @ AES block
8k
+8 - round
9
2089 aese
$ctr4b, $rk9 @ AES block
8k
+12 - round
9
2090 aese
$ctr1b, $rk9 @ AES block
8k
+9 - round
9
2091 aese
$ctr7b, $rk9 @ AES block
8k
+15 - round
9
2093 .L128_dec_tail
: @ TAIL
2096 sub $main_end_input_ptr, $end_input_ptr, $input_ptr @ main_end_input_ptr is number of bytes left to process
2098 cmp $main_end_input_ptr, #112
2100 ldp
$h78kq, $h8q, [$current_tag, #192] @ load h7l | h7h
2101 ext
$h8.16b
, $h8.16b
, $h8.16b
, #8
2102 ldr
$res1q, [$input_ptr], #16 @ AES block 8k+8 - load ciphertext
2104 ldp
$h5q, $h56kq, [$current_tag, #128] @ load h5l | h5h
2105 ext
$h5.16b
, $h5.16b
, $h5.16b
, #8
2106 ext
$t0.16b
, $acc_lb, $acc_lb, #8 @ prepare final partial tag
2108 ldp
$h6q, $h7q, [$current_tag, #160] @ load h6k | h5k
2109 ext
$h6.16b
, $h6.16b
, $h6.16b
, #8
2110 ext
$h7.16b
, $h7.16b
, $h7.16b
, #8
2112 eor3
$res4b, $res1b, $ctr0b, $t1.16b @ AES block
8k
+8 - result
2113 b
.gt .L128_dec_blocks_more_than_7
2115 cmp $main_end_input_ptr, #96
2128 sub $rtmp_ctr.4s
, $rtmp_ctr.4s
, $rctr_inc.4s
2129 b
.gt .L128_dec_blocks_more_than_6
2131 cmp $main_end_input_ptr, #80
2132 sub $rtmp_ctr.4s
, $rtmp_ctr.4s
, $rctr_inc.4s
2140 b
.gt .L128_dec_blocks_more_than_5
2142 cmp $main_end_input_ptr, #64
2149 sub $rtmp_ctr.4s
, $rtmp_ctr.4s
, $rctr_inc.4s
2150 b
.gt .L128_dec_blocks_more_than_4
2152 sub $rtmp_ctr.4s
, $rtmp_ctr.4s
, $rctr_inc.4s
2157 cmp $main_end_input_ptr, #48
2158 b
.gt .L128_dec_blocks_more_than_3
2160 sub $rtmp_ctr.4s
, $rtmp_ctr.4s
, $rctr_inc.4s
2162 cmp $main_end_input_ptr, #32
2164 ldr
$h34kq, [$current_tag, #96] @ load h4k | h3k
2166 b
.gt .L128_dec_blocks_more_than_2
2168 cmp $main_end_input_ptr, #16
2171 sub $rtmp_ctr.4s
, $rtmp_ctr.4s
, $rctr_inc.4s
2172 b
.gt L128_dec_blocks_more_than_1
2174 sub $rtmp_ctr.4s
, $rtmp_ctr.4s
, $rctr_inc.4s
2175 ldr
$h12kq, [$current_tag, #48] @ load h2k | h1k
2176 b
.L128_dec_blocks_less_than_1
2177 .L128_dec_blocks_more_than_7
: @ blocks left
> 7
2178 rev64
$res0b, $res1b @ GHASH final
-7 block
2180 eor
$res0b, $res0b, $t0.16b @ feed
in partial tag
2182 ins
$acc_m.d
[0], $h78k.d
[1] @ GHASH final
-7 block
- mid
2184 pmull
$acc_l.1q
, $res0.1d
, $h8.1d @ GHASH final
-7 block
- low
2185 ins
$rk4v.d
[0], $res0.d
[1] @ GHASH final
-7 block
- mid
2187 movi
$t0.8b
, #0 @ supress further partial tag feed in
2188 ldr
$res1q, [$input_ptr], #16 @ AES final-6 block - load ciphertext
2190 eor
$rk4v.8b
, $rk4v.8b
, $res0.8b @ GHASH final
-7 block
- mid
2192 pmull2
$acc_h.1q
, $res0.2d
, $h8.2d @ GHASH final
-7 block
- high
2193 st1
{ $res4b}, [$output_ptr], #16 @ AES final-7 block - store result
2194 eor3
$res4b, $res1b, $ctr1b, $t1.16b @ AES final
-6 block
- result
2196 pmull
$acc_m.1q
, $rk4v.1d
, $acc_m.1d @ GHASH final
-7 block
- mid
2197 .L128_dec_blocks_more_than_6
: @ blocks left
> 6
2199 rev64
$res0b, $res1b @ GHASH final
-6 block
2201 eor
$res0b, $res0b, $t0.16b @ feed
in partial tag
2203 ins
$rk4v.d
[0], $res0.d
[1] @ GHASH final
-6 block
- mid
2205 eor
$rk4v.8b
, $rk4v.8b
, $res0.8b @ GHASH final
-6 block
- mid
2207 pmull
$rk3q1, $res0.1d
, $h7.1d @ GHASH final
-6 block
- low
2208 ldr
$res1q, [$input_ptr], #16 @ AES final-5 block - load ciphertext
2209 movi
$t0.8b
, #0 @ supress further partial tag feed in
2211 pmull
$rk4v.1q
, $rk4v.1d
, $h78k.1d @ GHASH final
-6 block
- mid
2212 st1
{ $res4b}, [$output_ptr], #16 @ AES final-6 block - store result
2213 pmull2
$rk2q1, $res0.2d
, $h7.2d @ GHASH final
-6 block
- high
2215 eor
$acc_lb, $acc_lb, $rk3 @ GHASH final
-6 block
- low
2216 eor
$acc_hb, $acc_hb, $rk2 @ GHASH final
-6 block
- high
2218 eor
$acc_mb, $acc_mb, $rk4v.16b @ GHASH final
-6 block
- mid
2219 eor3
$res4b, $res1b, $ctr2b, $t1.16b @ AES final
-5 block
- result
2220 .L128_dec_blocks_more_than_5
: @ blocks left
> 5
2222 rev64
$res0b, $res1b @ GHASH final
-5 block
2224 ldr
$res1q, [$input_ptr], #16 @ AES final-4 block - load ciphertext
2225 st1
{ $res4b}, [$output_ptr], #16 @ AES final-5 block - store result
2227 eor
$res0b, $res0b, $t0.16b @ feed
in partial tag
2229 ins
$rk4v.d
[0], $res0.d
[1] @ GHASH final
-5 block
- mid
2231 eor3
$res4b, $res1b, $ctr3b, $t1.16b @ AES final
-4 block
- result
2233 eor
$rk4v.8b
, $rk4v.8b
, $res0.8b @ GHASH final
-5 block
- mid
2235 ins
$rk4v.d
[1], $rk4v.d
[0] @ GHASH final
-5 block
- mid
2236 pmull
$rk3q1, $res0.1d
, $h6.1d @ GHASH final
-5 block
- low
2237 movi
$t0.8b
, #0 @ supress further partial tag feed in
2239 pmull2
$rk4v.1q
, $rk4v.2d
, $h56k.2d @ GHASH final
-5 block
- mid
2240 pmull2
$rk2q1, $res0.2d
, $h6.2d @ GHASH final
-5 block
- high
2241 eor
$acc_lb, $acc_lb, $rk3 @ GHASH final
-5 block
- low
2243 eor
$acc_mb, $acc_mb, $rk4v.16b @ GHASH final
-5 block
- mid
2244 eor
$acc_hb, $acc_hb, $rk2 @ GHASH final
-5 block
- high
2245 .L128_dec_blocks_more_than_4
: @ blocks left
> 4
2247 rev64
$res0b, $res1b @ GHASH final
-4 block
2249 eor
$res0b, $res0b, $t0.16b @ feed
in partial tag
2250 ldr
$res1q, [$input_ptr], #16 @ AES final-3 block - load ciphertext
2252 ins
$rk4v.d
[0], $res0.d
[1] @ GHASH final
-4 block
- mid
2253 movi
$t0.8b
, #0 @ supress further partial tag feed in
2254 pmull2
$rk2q1, $res0.2d
, $h5.2d @ GHASH final
-4 block
- high
2256 pmull
$rk3q1, $res0.1d
, $h5.1d @ GHASH final
-4 block
- low
2258 eor
$acc_hb, $acc_hb, $rk2 @ GHASH final
-4 block
- high
2260 st1
{ $res4b}, [$output_ptr], #16 @ AES final-4 block - store result
2261 eor
$rk4v.8b
, $rk4v.8b
, $res0.8b @ GHASH final
-4 block
- mid
2263 eor3
$res4b, $res1b, $ctr4b, $t1.16b @ AES final
-3 block
- result
2264 eor
$acc_lb, $acc_lb, $rk3 @ GHASH final
-4 block
- low
2266 pmull
$rk4v.1q
, $rk4v.1d
, $h56k.1d @ GHASH final
-4 block
- mid
2268 eor
$acc_mb, $acc_mb, $rk4v.16b @ GHASH final
-4 block
- mid
2269 .L128_dec_blocks_more_than_3
: @ blocks left
> 3
2271 st1
{ $res4b}, [$output_ptr], #16 @ AES final-3 block - store result
2272 rev64
$res0b, $res1b @ GHASH final
-3 block
2274 eor
$res0b, $res0b, $t0.16b @ feed
in partial tag
2276 ins
$rk4v.d
[0], $res0.d
[1] @ GHASH final
-3 block
- mid
2278 ldr
$h4q, [$current_tag, #112] @ load h4l | h4h
2279 ext
$h4.16b
, $h4.16b
, $h4.16b
, #8
2280 ldr
$h34kq, [$current_tag, #96] @ load h4k | h3k
2282 eor
$rk4v.8b
, $rk4v.8b
, $res0.8b @ GHASH final
-3 block
- mid
2284 ldr
$res1q, [$input_ptr], #16 @ AES final-2 block - load ciphertext
2286 ins
$rk4v.d
[1], $rk4v.d
[0] @ GHASH final
-3 block
- mid
2287 pmull
$rk3q1, $res0.1d
, $h4.1d @ GHASH final
-3 block
- low
2288 pmull2
$rk2q1, $res0.2d
, $h4.2d @ GHASH final
-3 block
- high
2290 movi
$t0.8b
, #0 @ supress further partial tag feed in
2291 eor3
$res4b, $res1b, $ctr5b, $t1.16b @ AES final
-2 block
- result
2292 eor
$acc_lb, $acc_lb, $rk3 @ GHASH final
-3 block
- low
2294 pmull2
$rk4v.1q
, $rk4v.2d
, $h34k.2d @ GHASH final
-3 block
- mid
2296 eor
$acc_hb, $acc_hb, $rk2 @ GHASH final
-3 block
- high
2297 eor
$acc_mb, $acc_mb, $rk4v.16b @ GHASH final
-3 block
- mid
2298 .L128_dec_blocks_more_than_2
: @ blocks left
> 2
2300 rev64
$res0b, $res1b @ GHASH final
-2 block
2302 st1
{ $res4b}, [$output_ptr], #16 @ AES final-2 block - store result
2304 eor
$res0b, $res0b, $t0.16b @ feed
in partial tag
2305 ldr
$h3q, [$current_tag, #80] @ load h3l | h3h
2306 ext
$h3.16b
, $h3.16b
, $h3.16b
, #8
2307 movi
$t0.8b
, #0 @ supress further partial tag feed in
2309 ins
$rk4v.d
[0], $res0.d
[1] @ GHASH final
-2 block
- mid
2311 eor
$rk4v.8b
, $rk4v.8b
, $res0.8b @ GHASH final
-2 block
- mid
2313 pmull
$rk3q1, $res0.1d
, $h3.1d @ GHASH final
-2 block
- low
2315 pmull2
$rk2q1, $res0.2d
, $h3.2d @ GHASH final
-2 block
- high
2316 pmull
$rk4v.1q
, $rk4v.1d
, $h34k.1d @ GHASH final
-2 block
- mid
2317 ldr
$res1q, [$input_ptr], #16 @ AES final-1 block - load ciphertext
2319 eor
$acc_mb, $acc_mb, $rk4v.16b @ GHASH final
-2 block
- mid
2321 eor
$acc_lb, $acc_lb, $rk3 @ GHASH final
-2 block
- low
2323 eor3
$res4b, $res1b, $ctr6b, $t1.16b @ AES final
-1 block
- result
2324 eor
$acc_hb, $acc_hb, $rk2 @ GHASH final
-2 block
- high
2325 .L128_dec_blocks_more_than_1
: @ blocks left
> 1
2327 st1
{ $res4b}, [$output_ptr], #16 @ AES final-1 block - store result
2328 rev64
$res0b, $res1b @ GHASH final
-1 block
2330 ldr
$h2q, [$current_tag, #64] @ load h1l | h1h
2331 ext
$h2.16b
, $h2.16b
, $h2.16b
, #8
2333 eor
$res0b, $res0b, $t0.16b @ feed
in partial tag
2335 movi
$t0.8b
, #0 @ supress further partial tag feed in
2337 ins
$rk4v.d
[0], $res0.d
[1] @ GHASH final
-1 block
- mid
2339 ldr
$res1q, [$input_ptr], #16 @ AES final block - load ciphertext
2340 pmull2
$rk2q1, $res0.2d
, $h2.2d @ GHASH final
-1 block
- high
2342 eor
$rk4v.8b
, $rk4v.8b
, $res0.8b @ GHASH final
-1 block
- mid
2343 eor
$acc_hb, $acc_hb, $rk2 @ GHASH final
-1 block
- high
2344 ldr
$h12kq, [$current_tag, #48] @ load h2k | h1k
2346 ins
$rk4v.d
[1], $rk4v.d
[0] @ GHASH final
-1 block
- mid
2347 eor3
$res4b, $res1b, $ctr7b, $t1.16b @ AES final block
- result
2349 pmull
$rk3q1, $res0.1d
, $h2.1d @ GHASH final
-1 block
- low
2351 pmull2
$rk4v.1q
, $rk4v.2d
, $h12k.2d @ GHASH final
-1 block
- mid
2353 eor
$acc_lb, $acc_lb, $rk3 @ GHASH final
-1 block
- low
2355 eor
$acc_mb, $acc_mb, $rk4v.16b @ GHASH final
-1 block
- mid
2356 .L128_dec_blocks_less_than_1
: @ blocks left
<= 1
2358 and $bit_length, $bit_length, #127 @ bit_length %= 128
2360 sub $bit_length, $bit_length, #128 @ bit_length -= 128
2362 neg
$bit_length, $bit_length @ bit_length
= 128 - #bits in input (in range [1,128])
2364 mvn
$temp0_x, xzr @ temp0_x
= 0xffffffffffffffff
2365 and $bit_length, $bit_length, #127 @ bit_length %= 128
2367 lsr
$temp0_x, $temp0_x, $bit_length @ temp0_x is mask
for top
64b of
last block
2368 cmp $bit_length, #64
2369 mvn
$temp1_x, xzr @ temp1_x
= 0xffffffffffffffff
2371 csel
$temp2_x, $temp1_x, $temp0_x, lt
2372 csel
$temp3_x, $temp0_x, xzr
, lt
2374 mov
$ctr0.d
[1], $temp3_x
2375 mov
$ctr0.d
[0], $temp2_x @ ctr0b is mask
for last block
2377 ldr
$h1q, [$current_tag, #32] @ load h1l | h1h
2378 ext
$h1.16b
, $h1.16b
, $h1.16b
, #8
2379 ld1
{ $rk0}, [$output_ptr] @ load existing bytes where the possibly partial
last block is to be stored
2381 and $res1b, $res1b, $ctr0b @ possibly partial
last block has zeroes
in highest bits
2383 rev64
$res0b, $res1b @ GHASH final block
2385 eor
$res0b, $res0b, $t0.16b @ feed
in partial tag
2387 pmull2
$rk2q1, $res0.2d
, $h1.2d @ GHASH final block
- high
2388 ins
$t0.d
[0], $res0.d
[1] @ GHASH final block
- mid
2390 eor
$acc_hb, $acc_hb, $rk2 @ GHASH final block
- high
2391 eor
$t0.8b
, $t0.8b
, $res0.8b @ GHASH final block
- mid
2393 bif
$res4b, $rk0, $ctr0b @ insert existing bytes
in top end of result before storing
2395 pmull
$t0.1q
, $t0.1d
, $h12k.1d @ GHASH final block
- mid
2396 st1
{ $res4b}, [$output_ptr] @ store all
16B
2398 pmull
$rk3q1, $res0.1d
, $h1.1d @ GHASH final block
- low
2400 eor
$acc_mb, $acc_mb, $t0.16b @ GHASH final block
- mid
2401 ldr
$mod_constantd, [$modulo_constant] @ MODULO
- load modulo constant
2403 eor
$acc_lb, $acc_lb, $rk3 @ GHASH final block
- low
2405 eor
$t10.16b
, $acc_hb, $acc_lb @ MODULO
- karatsuba tidy up
2407 pmull
$t11.1q
, $acc_h.1d
, $mod_constant.1d @ MODULO
- top
64b align with mid
2408 ext
$acc_hb, $acc_hb, $acc_hb, #8 @ MODULO - other top alignment
2410 eor
$acc_mb, $acc_mb, $t10.16b @ MODULO
- karatsuba tidy up
2412 eor3
$acc_mb, $acc_mb, $acc_hb, $t11.16b @ MODULO
- fold into mid
2414 pmull
$acc_h.1q
, $acc_m.1d
, $mod_constant.1d @ MODULO
- mid
64b align with low
2415 ext
$acc_mb, $acc_mb, $acc_mb, #8 @ MODULO - other mid alignment
2417 eor3
$acc_lb, $acc_lb, $acc_mb, $acc_hb @ MODULO
- fold into low
2418 ext
$acc_lb, $acc_lb, $acc_lb, #8
2419 rev64
$acc_lb, $acc_lb
2420 st1
{ $acc_l.16b
}, [$current_tag]
2421 rev32
$rtmp_ctr.16b
, $rtmp_ctr.16b
2423 str
$rtmp_ctrq, [$counter] @ store the updated counter
2425 lsr x0
, $bit_length, #3
2427 ldp d10
, d11
, [sp
, #16]
2428 ldp d12
, d13
, [sp
, #32]
2429 ldp d14
, d15
, [sp
, #48]
2430 ldp d8
, d9
, [sp
], #80
2435 .size unroll8_eor3_aes_gcm_dec_128_kernel
,.-unroll8_eor3_aes_gcm_dec_128_kernel
2440 my ($end_input_ptr,$main_end_input_ptr,$temp0_x,$temp1_x)=map("x$_",(4..7));
2441 my ($temp2_x,$temp3_x)=map("x$_",(13..14));
2442 my ($ctr0b,$ctr1b,$ctr2b,$ctr3b,$ctr4b,$ctr5b,$ctr6b,$ctr7b,$res0b,$res1b,$res2b,$res3b,$res4b,$res5b,$res6b,$res7b)=map("v$_.16b",(0..15));
2443 my ($ctr0,$ctr1,$ctr2,$ctr3,$ctr4,$ctr5,$ctr6,$ctr7,$res0,$res1,$res2,$res3,$res4,$res5,$res6,$res7)=map("v$_",(0..15));
2444 my ($ctr0d,$ctr1d,$ctr2d,$ctr3d,$ctr4d,$ctr5d,$ctr6d,$ctr7d)=map("d$_",(0..7));
2445 my ($ctr0q,$ctr1q,$ctr2q,$ctr3q,$ctr4q,$ctr5q,$ctr6q,$ctr7q)=map("q$_",(0..7));
2446 my ($res0q,$res1q,$res2q,$res3q,$res4q,$res5q,$res6q,$res7q)=map("q$_",(8..15));
2448 my ($ctr_t0,$ctr_t1,$ctr_t2,$ctr_t3,$ctr_t4,$ctr_t5,$ctr_t6,$ctr_t7)=map("v$_",(8..15));
2449 my ($ctr_t0b,$ctr_t1b,$ctr_t2b,$ctr_t3b,$ctr_t4b,$ctr_t5b,$ctr_t6b,$ctr_t7b)=map("v$_.16b",(8..15));
2450 my ($ctr_t0q,$ctr_t1q,$ctr_t2q,$ctr_t3q,$ctr_t4q,$ctr_t5q,$ctr_t6q,$ctr_t7q)=map("q$_",(8..15));
2452 my ($acc_hb,$acc_mb,$acc_lb)=map("v$_.16b",(17..19));
2453 my ($acc_h,$acc_m,$acc_l)=map("v$_",(17..19));
2455 my ($h1,$h12k,$h2,$h3,$h34k,$h4)=map("v$_",(20..25));
2456 my ($h5,$h56k,$h6,$h7,$h78k,$h8)=map("v$_",(20..25));
2457 my ($h1q,$h12kq,$h2q,$h3q,$h34kq,$h4q)=map("q$_",(20..25));
2458 my ($h5q,$h56kq,$h6q,$h7q,$h78kq,$h8q)=map("q$_",(20..25));
2480 my $rtmp_ctrq="q30";
2482 my $rctr_incd="d31";
2484 my $mod_constantd=$t0d;
2485 my $mod_constant=$t0;
2487 my ($rk0,$rk1,$rk2)=map("v$_.16b",(26..28));
2488 my ($rk3,$rk4,$rk5)=map("v$_.16b",(26..28));
2489 my ($rk6,$rk7,$rk8)=map("v$_.16b",(26..28));
2490 my ($rk9,$rk10,$rk11)=map("v$_.16b",(26..28));
2491 my ($rk12,$rk13,$rk14)=map("v$_.16b",(26..28));
2492 my ($rk0q,$rk1q,$rk2q)=map("q$_",(26..28));
2493 my ($rk3q,$rk4q,$rk5q)=map("q$_",(26..28));
2494 my ($rk6q,$rk7q,$rk8q)=map("q$_",(26..28));
2495 my ($rk9q,$rk10q,$rk11q)=map("q$_",(26..28));
2496 my ($rk12q,$rk13q,$rk14q)=map("q$_",(26..28));
2501 #########################################################################################
2502 # size_t unroll8_eor3_aes_gcm_enc_192_kernel(const unsigned char *in,
2504 # unsigned char *out,
2506 # unsigned char ivec[16],
2510 .global unroll8_eor3_aes_gcm_enc_192_kernel
2511 .type unroll8_eor3_aes_gcm_enc_192_kernel
,%function
2513 unroll8_eor3_aes_gcm_enc_192_kernel
:
2514 AARCH64_VALID_CALL_TARGET
2515 cbz x1
, .L192_enc_ret
2516 stp d8
, d9
, [sp
, #-80]!
2519 stp d10
, d11
, [sp
, #16]
2520 stp d12
, d13
, [sp
, #32]
2521 stp d14
, d15
, [sp
, #48]
2522 mov x5
, #0xc200000000000000
2523 stp x5
, xzr
, [sp
, #64]
2524 add
$modulo_constant, sp
, #64
2526 lsr
$main_end_input_ptr, $bit_length, #3 @ byte_len
2527 ld1
{ $ctr0b}, [$counter] @ CTR block
0
2529 mov
$constant_temp, #0x100000000 @ set up counter increment
2530 movi
$rctr_inc.16b
, #0x0
2531 mov
$rctr_inc.d
[1], $constant_temp
2533 rev32
$rtmp_ctr.16b
, $ctr0.16b @ set up reversed counter
2535 add
$rtmp_ctr.4s
, $rtmp_ctr.4s
, $rctr_inc.4s @ CTR block
0
2537 rev32
$ctr1.16b
, $rtmp_ctr.16b @ CTR block
1
2538 add
$rtmp_ctr.4s
, $rtmp_ctr.4s
, $rctr_inc.4s @ CTR block
1
2540 rev32
$ctr2.16b
, $rtmp_ctr.16b @ CTR block
2
2541 add
$rtmp_ctr.4s
, $rtmp_ctr.4s
, $rctr_inc.4s @ CTR block
2
2543 rev32
$ctr3.16b
, $rtmp_ctr.16b @ CTR block
3
2544 add
$rtmp_ctr.4s
, $rtmp_ctr.4s
, $rctr_inc.4s @ CTR block
3
2546 rev32
$ctr4.16b
, $rtmp_ctr.16b @ CTR block
4
2547 add
$rtmp_ctr.4s
, $rtmp_ctr.4s
, $rctr_inc.4s @ CTR block
4
2548 sub $main_end_input_ptr, $main_end_input_ptr, #1 @ byte_len - 1
2550 and $main_end_input_ptr, $main_end_input_ptr, #0xffffffffffffff80 @ number of bytes to be processed in main loop (at least 1 byte must be handled by tail)
2552 rev32
$ctr5.16b
, $rtmp_ctr.16b @ CTR block
5
2553 add
$rtmp_ctr.4s
, $rtmp_ctr.4s
, $rctr_inc.4s @ CTR block
5
2554 ldp
$rk0q, $rk1q, [$cc, #0] @ load rk0, rk1
2556 add
$main_end_input_ptr, $main_end_input_ptr, $input_ptr
2558 rev32
$ctr6.16b
, $rtmp_ctr.16b @ CTR block
6
2559 add
$rtmp_ctr.4s
, $rtmp_ctr.4s
, $rctr_inc.4s @ CTR block
6
2561 rev32
$ctr7.16b
, $rtmp_ctr.16b @ CTR block
7
2563 aese
$ctr5b, $rk0 \n aesmc
$ctr5b, $ctr5b @ AES block
5 - round
0
2564 aese
$ctr4b, $rk0 \n aesmc
$ctr4b, $ctr4b @ AES block
4 - round
0
2565 aese
$ctr3b, $rk0 \n aesmc
$ctr3b, $ctr3b @ AES block
3 - round
0
2567 aese
$ctr0b, $rk0 \n aesmc
$ctr0b, $ctr0b @ AES block
0 - round
0
2568 aese
$ctr1b, $rk0 \n aesmc
$ctr1b, $ctr1b @ AES block
1 - round
0
2569 aese
$ctr7b, $rk0 \n aesmc
$ctr7b, $ctr7b @ AES block
7 - round
0
2571 aese
$ctr6b, $rk0 \n aesmc
$ctr6b, $ctr6b @ AES block
6 - round
0
2572 aese
$ctr2b, $rk0 \n aesmc
$ctr2b, $ctr2b @ AES block
2 - round
0
2573 ldp
$rk2q, $rk3q, [$cc, #32] @ load rk2, rk3
2575 aese
$ctr5b, $rk1 \n aesmc
$ctr5b, $ctr5b @ AES block
5 - round
1
2576 aese
$ctr7b, $rk1 \n aesmc
$ctr7b, $ctr7b @ AES block
7 - round
1
2578 aese
$ctr2b, $rk1 \n aesmc
$ctr2b, $ctr2b @ AES block
2 - round
1
2579 aese
$ctr3b, $rk1 \n aesmc
$ctr3b, $ctr3b @ AES block
3 - round
1
2580 aese
$ctr6b, $rk1 \n aesmc
$ctr6b, $ctr6b @ AES block
6 - round
1
2582 aese
$ctr5b, $rk2 \n aesmc
$ctr5b, $ctr5b @ AES block
5 - round
2
2583 aese
$ctr4b, $rk1 \n aesmc
$ctr4b, $ctr4b @ AES block
4 - round
1
2584 aese
$ctr0b, $rk1 \n aesmc
$ctr0b, $ctr0b @ AES block
0 - round
1
2586 aese
$ctr1b, $rk1 \n aesmc
$ctr1b, $ctr1b @ AES block
1 - round
1
2587 aese
$ctr7b, $rk2 \n aesmc
$ctr7b, $ctr7b @ AES block
7 - round
2
2588 aese
$ctr3b, $rk2 \n aesmc
$ctr3b, $ctr3b @ AES block
3 - round
2
2590 aese
$ctr2b, $rk2 \n aesmc
$ctr2b, $ctr2b @ AES block
2 - round
2
2591 aese
$ctr0b, $rk2 \n aesmc
$ctr0b, $ctr0b @ AES block
0 - round
2
2593 aese
$ctr1b, $rk2 \n aesmc
$ctr1b, $ctr1b @ AES block
1 - round
2
2594 aese
$ctr4b, $rk2 \n aesmc
$ctr4b, $ctr4b @ AES block
4 - round
2
2595 aese
$ctr6b, $rk2 \n aesmc
$ctr6b, $ctr6b @ AES block
6 - round
2
2597 ldp
$rk4q, $rk5q, [$cc, #64] @ load rk4, rk5
2598 aese
$ctr4b, $rk3 \n aesmc
$ctr4b, $ctr4b @ AES block
4 - round
3
2600 aese
$ctr7b, $rk3 \n aesmc
$ctr7b, $ctr7b @ AES block
7 - round
3
2601 aese
$ctr3b, $rk3 \n aesmc
$ctr3b, $ctr3b @ AES block
3 - round
3
2602 aese
$ctr2b, $rk3 \n aesmc
$ctr2b, $ctr2b @ AES block
2 - round
3
2604 aese
$ctr1b, $rk3 \n aesmc
$ctr1b, $ctr1b @ AES block
1 - round
3
2606 aese
$ctr0b, $rk3 \n aesmc
$ctr0b, $ctr0b @ AES block
0 - round
3
2608 aese
$ctr6b, $rk3 \n aesmc
$ctr6b, $ctr6b @ AES block
6 - round
3
2610 aese
$ctr0b, $rk4 \n aesmc
$ctr0b, $ctr0b @ AES block
0 - round
4
2611 aese
$ctr1b, $rk4 \n aesmc
$ctr1b, $ctr1b @ AES block
1 - round
4
2612 aese
$ctr5b, $rk3 \n aesmc
$ctr5b, $ctr5b @ AES block
5 - round
3
2614 aese
$ctr3b, $rk4 \n aesmc
$ctr3b, $ctr3b @ AES block
3 - round
4
2615 aese
$ctr2b, $rk4 \n aesmc
$ctr2b, $ctr2b @ AES block
2 - round
4
2616 aese
$ctr4b, $rk4 \n aesmc
$ctr4b, $ctr4b @ AES block
4 - round
4
2618 aese
$ctr6b, $rk4 \n aesmc
$ctr6b, $ctr6b @ AES block
6 - round
4
2619 aese
$ctr7b, $rk4 \n aesmc
$ctr7b, $ctr7b @ AES block
7 - round
4
2620 aese
$ctr5b, $rk4 \n aesmc
$ctr5b, $ctr5b @ AES block
5 - round
4
2622 aese
$ctr1b, $rk5 \n aesmc
$ctr1b, $ctr1b @ AES block
1 - round
5
2623 ldp
$rk6q, $rk7q, [$cc, #96] @ load rk6, rk7
2624 aese
$ctr2b, $rk5 \n aesmc
$ctr2b, $ctr2b @ AES block
2 - round
5
2626 aese
$ctr4b, $rk5 \n aesmc
$ctr4b, $ctr4b @ AES block
4 - round
5
2627 aese
$ctr7b, $rk5 \n aesmc
$ctr7b, $ctr7b @ AES block
7 - round
5
2628 aese
$ctr0b, $rk5 \n aesmc
$ctr0b, $ctr0b @ AES block
0 - round
5
2630 aese
$ctr5b, $rk5 \n aesmc
$ctr5b, $ctr5b @ AES block
5 - round
5
2631 aese
$ctr6b, $rk5 \n aesmc
$ctr6b, $ctr6b @ AES block
6 - round
5
2632 aese
$ctr3b, $rk5 \n aesmc
$ctr3b, $ctr3b @ AES block
3 - round
5
2634 add
$rtmp_ctr.4s
, $rtmp_ctr.4s
, $rctr_inc.4s @ CTR block
7
2636 aese
$ctr5b, $rk6 \n aesmc
$ctr5b, $ctr5b @ AES block
5 - round
6
2637 aese
$ctr4b, $rk6 \n aesmc
$ctr4b, $ctr4b @ AES block
4 - round
6
2638 aese
$ctr3b, $rk6 \n aesmc
$ctr3b, $ctr3b @ AES block
3 - round
6
2640 aese
$ctr2b, $rk6 \n aesmc
$ctr2b, $ctr2b @ AES block
2 - round
6
2641 aese
$ctr6b, $rk6 \n aesmc
$ctr6b, $ctr6b @ AES block
6 - round
6
2642 aese
$ctr1b, $rk6 \n aesmc
$ctr1b, $ctr1b @ AES block
1 - round
6
2644 aese
$ctr0b, $rk6 \n aesmc
$ctr0b, $ctr0b @ AES block
0 - round
6
2645 aese
$ctr7b, $rk6 \n aesmc
$ctr7b, $ctr7b @ AES block
7 - round
6
2646 ldp
$rk8q, $rk9q, [$cc, #128] @ load rk8, rk9
2648 aese
$ctr6b, $rk7 \n aesmc
$ctr6b, $ctr6b @ AES block
6 - round
7
2649 aese
$ctr3b, $rk7 \n aesmc
$ctr3b, $ctr3b @ AES block
3 - round
7
2651 aese
$ctr4b, $rk7 \n aesmc
$ctr4b, $ctr4b @ AES block
4 - round
7
2652 aese
$ctr0b, $rk7 \n aesmc
$ctr0b, $ctr0b @ AES block
0 - round
7
2654 aese
$ctr7b, $rk7 \n aesmc
$ctr7b, $ctr7b @ AES block
7 - round
7
2655 aese
$ctr1b, $rk7 \n aesmc
$ctr1b, $ctr1b @ AES block
1 - round
7
2657 aese
$ctr2b, $rk7 \n aesmc
$ctr2b, $ctr2b @ AES block
2 - round
7
2658 aese
$ctr5b, $rk7 \n aesmc
$ctr5b, $ctr5b @ AES block
5 - round
7
2660 aese
$ctr7b, $rk8 \n aesmc
$ctr7b, $ctr7b @ AES block
7 - round
8
2661 aese
$ctr0b, $rk8 \n aesmc
$ctr0b, $ctr0b @ AES block
0 - round
8
2663 aese
$ctr4b, $rk8 \n aesmc
$ctr4b, $ctr4b @ AES block
4 - round
8
2664 aese
$ctr3b, $rk8 \n aesmc
$ctr3b, $ctr3b @ AES block
3 - round
8
2665 aese
$ctr5b, $rk8 \n aesmc
$ctr5b, $ctr5b @ AES block
5 - round
8
2667 aese
$ctr2b, $rk8 \n aesmc
$ctr2b, $ctr2b @ AES block
2 - round
8
2668 aese
$ctr1b, $rk8 \n aesmc
$ctr1b, $ctr1b @ AES block
1 - round
8
2669 aese
$ctr6b, $rk8 \n aesmc
$ctr6b, $ctr6b @ AES block
6 - round
8
2671 add
$end_input_ptr, $input_ptr, $bit_length, lsr
#3 @ end_input_ptr
2672 cmp $input_ptr, $main_end_input_ptr @ check
if we have
<= 8 blocks
2673 aese
$ctr3b, $rk9 \n aesmc
$ctr3b, $ctr3b @ AES block
3 - round
9
2675 ld1
{ $acc_lb}, [$current_tag]
2676 ext
$acc_lb, $acc_lb, $acc_lb, #8
2677 rev64
$acc_lb, $acc_lb
2678 ldp
$rk10q, $rk11q, [$cc, #160] @ load rk10, rk11
2680 aese
$ctr6b, $rk9 \n aesmc
$ctr6b, $ctr6b @ AES block
6 - round
9
2681 aese
$ctr1b, $rk9 \n aesmc
$ctr1b, $ctr1b @ AES block
1 - round
9
2683 aese
$ctr5b, $rk9 \n aesmc
$ctr5b, $ctr5b @ AES block
5 - round
9
2684 aese
$ctr2b, $rk9 \n aesmc
$ctr2b, $ctr2b @ AES block
2 - round
9
2686 aese
$ctr0b, $rk9 \n aesmc
$ctr0b, $ctr0b @ AES block
0 - round
9
2687 aese
$ctr4b, $rk9 \n aesmc
$ctr4b, $ctr4b @ AES block
4 - round
9
2689 aese
$ctr6b, $rk10 \n aesmc
$ctr6b, $ctr6b @ AES block
14 - round
10
2690 aese
$ctr7b, $rk9 \n aesmc
$ctr7b, $ctr7b @ AES block
7 - round
9
2691 aese
$ctr3b, $rk10 \n aesmc
$ctr3b, $ctr3b @ AES block
11 - round
10
2693 aese
$ctr1b, $rk10 \n aesmc
$ctr1b, $ctr1b @ AES block
9 - round
10
2694 aese
$ctr5b, $rk10 \n aesmc
$ctr5b, $ctr5b @ AES block
13 - round
10
2695 aese
$ctr4b, $rk10 \n aesmc
$ctr4b, $ctr4b @ AES block
12 - round
10
2697 aese
$ctr0b, $rk10 \n aesmc
$ctr0b, $ctr0b @ AES block
8 - round
10
2698 aese
$ctr2b, $rk10 \n aesmc
$ctr2b, $ctr2b @ AES block
10 - round
10
2699 aese
$ctr7b, $rk10 \n aesmc
$ctr7b, $ctr7b @ AES block
15 - round
10
2701 aese
$ctr6b, $rk11 @ AES block
14 - round
11
2702 aese
$ctr3b, $rk11 @ AES block
11 - round
11
2704 aese
$ctr4b, $rk11 @ AES block
12 - round
11
2705 aese
$ctr7b, $rk11 @ AES block
15 - round
11
2706 ldr
$rk12q, [$cc, #192] @ load rk12
2708 aese
$ctr1b, $rk11 @ AES block
9 - round
11
2709 aese
$ctr5b, $rk11 @ AES block
13 - round
11
2711 aese
$ctr2b, $rk11 @ AES block
10 - round
11
2712 aese
$ctr0b, $rk11 @ AES block
8 - round
11
2713 b
.ge .L192_enc_tail @ handle tail
2715 ldp
$ctr_t0q, $ctr_t1q, [$input_ptr], #32 @ AES block 0, 1 - load plaintext
2717 ldp
$ctr_t2q, $ctr_t3q, [$input_ptr], #32 @ AES block 2, 3 - load plaintext
2719 ldp
$ctr_t4q, $ctr_t5q, [$input_ptr], #32 @ AES block 4, 5 - load plaintext
2721 ldp
$ctr_t6q, $ctr_t7q, [$input_ptr], #32 @ AES block 6, 7 - load plaintext
2723 eor3
$res0b, $ctr_t0b, $ctr0b, $rk12 @ AES block
0 - result
2724 rev32
$ctr0.16b
, $rtmp_ctr.16b @ CTR block
8
2725 add
$rtmp_ctr.4s
, $rtmp_ctr.4s
, $rctr_inc.4s @ CTR block
8
2727 eor3
$res3b, $ctr_t3b, $ctr3b, $rk12 @ AES block
3 - result
2728 eor3
$res1b, $ctr_t1b, $ctr1b, $rk12 @ AES block
1 - result
2730 rev32
$ctr1.16b
, $rtmp_ctr.16b @ CTR block
9
2731 add
$rtmp_ctr.4s
, $rtmp_ctr.4s
, $rctr_inc.4s @ CTR block
9
2732 eor3
$res4b, $ctr_t4b, $ctr4b, $rk12 @ AES block
4 - result
2734 eor3
$res5b, $ctr_t5b, $ctr5b, $rk12 @ AES block
5 - result
2735 eor3
$res7b, $ctr_t7b, $ctr7b, $rk12 @ AES block
7 - result
2736 stp
$res0q, $res1q, [$output_ptr], #32 @ AES block 0, 1 - store result
2738 eor3
$res2b, $ctr_t2b, $ctr2b, $rk12 @ AES block
2 - result
2739 rev32
$ctr2.16b
, $rtmp_ctr.16b @ CTR block
10
2740 add
$rtmp_ctr.4s
, $rtmp_ctr.4s
, $rctr_inc.4s @ CTR block
10
2742 stp
$res2q, $res3q, [$output_ptr], #32 @ AES block 2, 3 - store result
2743 cmp $input_ptr, $main_end_input_ptr @ check
if we have
<= 8 blocks
2745 rev32
$ctr3.16b
, $rtmp_ctr.16b @ CTR block
11
2746 add
$rtmp_ctr.4s
, $rtmp_ctr.4s
, $rctr_inc.4s @ CTR block
11
2747 eor3
$res6b, $ctr_t6b, $ctr6b, $rk12 @ AES block
6 - result
2749 stp
$res4q, $res5q, [$output_ptr], #32 @ AES block 4, 5 - store result
2751 rev32
$ctr4.16b
, $rtmp_ctr.16b @ CTR block
12
2752 stp
$res6q, $res7q, [$output_ptr], #32 @ AES block 6, 7 - store result
2753 add
$rtmp_ctr.4s
, $rtmp_ctr.4s
, $rctr_inc.4s @ CTR block
12
2755 b
.ge .L192_enc_prepretail @
do prepretail
2757 .L192_enc_main_loop
: @ main
loop start
2758 rev64
$res4b, $res4b @ GHASH block
8k
+4 (t0
, t1
, and t2 free
)
2759 ldp
$rk0q, $rk1q, [$cc, #0] @ load rk0, rk1
2760 rev64
$res2b, $res2b @ GHASH block
8k
+2
2762 rev32
$ctr5.16b
, $rtmp_ctr.16b @ CTR block
8k
+13
2763 add
$rtmp_ctr.4s
, $rtmp_ctr.4s
, $rctr_inc.4s @ CTR block
8k
+13
2764 ldr
$h7q, [$current_tag, #176] @ load h7l | h7h
2765 ext
$h7.16b
, $h7.16b
, $h7.16b
, #8
2766 ldr
$h8q, [$current_tag, #208] @ load h8l | h8h
2767 ext
$h8.16b
, $h8.16b
, $h8.16b
, #8
2769 ext
$acc_lb, $acc_lb, $acc_lb, #8 @ PRE 0
2770 rev64
$res0b, $res0b @ GHASH block
8k
2771 ldr
$h5q, [$current_tag, #128] @ load h5l | h5h
2772 ext
$h5.16b
, $h5.16b
, $h5.16b
, #8
2773 ldr
$h6q, [$current_tag, #160] @ load h6l | h6h
2774 ext
$h6.16b
, $h6.16b
, $h6.16b
, #8
2776 rev64
$res1b, $res1b @ GHASH block
8k
+1
2777 rev32
$ctr6.16b
, $rtmp_ctr.16b @ CTR block
8k
+14
2778 add
$rtmp_ctr.4s
, $rtmp_ctr.4s
, $rctr_inc.4s @ CTR block
8k
+14
2780 eor
$res0b, $res0b, $acc_lb @ PRE
1
2781 rev64
$res3b, $res3b @ GHASH block
8k
+3
2782 rev64
$res5b, $res5b @ GHASH block
8k
+5 (t0
, t1
, t2
and t3 free
)
2784 aese
$ctr0b, $rk0 \n aesmc
$ctr0b, $ctr0b @ AES block
8k
+8 - round
0
2785 rev32
$ctr7.16b
, $rtmp_ctr.16b @ CTR block
8k
+15
2786 aese
$ctr1b, $rk0 \n aesmc
$ctr1b, $ctr1b @ AES block
8k
+9 - round
0
2788 aese
$ctr3b, $rk0 \n aesmc
$ctr3b, $ctr3b @ AES block
8k
+11 - round
0
2789 aese
$ctr5b, $rk0 \n aesmc
$ctr5b, $ctr5b @ AES block
8k
+13 - round
0
2790 aese
$ctr2b, $rk0 \n aesmc
$ctr2b, $ctr2b @ AES block
8k
+10 - round
0
2792 aese
$ctr7b, $rk0 \n aesmc
$ctr7b, $ctr7b @ AES block
8k
+15 - round
0
2793 aese
$ctr4b, $rk0 \n aesmc
$ctr4b, $ctr4b @ AES block
8k
+12 - round
0
2794 aese
$ctr6b, $rk0 \n aesmc
$ctr6b, $ctr6b @ AES block
8k
+14 - round
0
2796 ldp
$rk2q, $rk3q, [$cc, #32] @ load rk2, rk3
2797 pmull2
$acc_h.1q
, $res0.2d
, $h8.2d @ GHASH block
8k
- high
2798 aese
$ctr0b, $rk1 \n aesmc
$ctr0b, $ctr0b @ AES block
8k
+8 - round
1
2800 aese
$ctr4b, $rk1 \n aesmc
$ctr4b, $ctr4b @ AES block
8k
+12 - round
1
2801 pmull2
$t0.1q
, $res1.2d
, $h7.2d @ GHASH block
8k
+1 - high
2802 pmull
$h7.1q
, $res1.1d
, $h7.1d @ GHASH block
8k
+1 - low
2804 trn1
$acc_m.2d
, $res1.2d
, $res0.2d @ GHASH block
8k
, 8k
+1 - mid
2805 aese
$ctr3b, $rk1 \n aesmc
$ctr3b, $ctr3b @ AES block
8k
+11 - round
1
2806 ldr
$h56kq, [$current_tag, #144] @ load h6k | h5k
2807 ldr
$h78kq, [$current_tag, #192] @ load h8k | h7k
2809 pmull2
$t1.1q
, $res2.2d
, $h6.2d @ GHASH block
8k
+2 - high
2810 pmull
$acc_l.1q
, $res0.1d
, $h8.1d @ GHASH block
8k
- low
2811 trn2
$res0.2d
, $res1.2d
, $res0.2d @ GHASH block
8k
, 8k
+1 - mid
2813 aese
$ctr1b, $rk1 \n aesmc
$ctr1b, $ctr1b @ AES block
8k
+9 - round
1
2814 aese
$ctr2b, $rk1 \n aesmc
$ctr2b, $ctr2b @ AES block
8k
+10 - round
1
2815 aese
$ctr5b, $rk1 \n aesmc
$ctr5b, $ctr5b @ AES block
8k
+13 - round
1
2817 eor
$acc_hb, $acc_hb, $t0.16b @ GHASH block
8k
+1 - high
2818 aese
$ctr6b, $rk1 \n aesmc
$ctr6b, $ctr6b @ AES block
8k
+14 - round
1
2819 aese
$ctr7b, $rk1 \n aesmc
$ctr7b, $ctr7b @ AES block
8k
+15 - round
1
2821 pmull2
$t2.1q
, $res3.2d
, $h5.2d @ GHASH block
8k
+3 - high
2822 eor
$res0.16b
, $res0.16b
, $acc_m.16b @ GHASH block
8k
, 8k
+1 - mid
2823 aese
$ctr1b, $rk2 \n aesmc
$ctr1b, $ctr1b @ AES block
8k
+9 - round
2
2825 aese
$ctr3b, $rk2 \n aesmc
$ctr3b, $ctr3b @ AES block
8k
+11 - round
2
2826 aese
$ctr4b, $rk2 \n aesmc
$ctr4b, $ctr4b @ AES block
8k
+12 - round
2
2827 aese
$ctr6b, $rk2 \n aesmc
$ctr6b, $ctr6b @ AES block
8k
+14 - round
2
2829 aese
$ctr5b, $rk2 \n aesmc
$ctr5b, $ctr5b @ AES block
8k
+13 - round
2
2830 aese
$ctr1b, $rk3 \n aesmc
$ctr1b, $ctr1b @ AES block
8k
+9 - round
3
2831 eor3
$acc_hb, $acc_hb, $t1.16b
, $t2.16b @ GHASH block
8k
+2, 8k
+3 - high
2833 pmull
$h6.1q
, $res2.1d
, $h6.1d @ GHASH block
8k
+2 - low
2834 aese
$ctr7b, $rk2 \n aesmc
$ctr7b, $ctr7b @ AES block
8k
+15 - round
2
2835 aese
$ctr4b, $rk3 \n aesmc
$ctr4b, $ctr4b @ AES block
8k
+12 - round
3
2837 aese
$ctr2b, $rk2 \n aesmc
$ctr2b, $ctr2b @ AES block
8k
+10 - round
2
2838 trn1
$t3.2d
, $res3.2d
, $res2.2d @ GHASH block
8k
+2, 8k
+3 - mid
2839 aese
$ctr0b, $rk2 \n aesmc
$ctr0b, $ctr0b @ AES block
8k
+8 - round
2
2841 trn2
$res2.2d
, $res3.2d
, $res2.2d @ GHASH block
8k
+2, 8k
+3 - mid
2842 aese
$ctr3b, $rk3 \n aesmc
$ctr3b, $ctr3b @ AES block
8k
+11 - round
3
2843 ldp
$rk4q, $rk5q, [$cc, #64] @ load rk4, rk5
2845 aese
$ctr0b, $rk3 \n aesmc
$ctr0b, $ctr0b @ AES block
8k
+8 - round
3
2846 eor
$acc_lb, $acc_lb, $h7.16b @ GHASH block
8k
+1 - low
2847 ldr
$h3q, [$current_tag, #80] @ load h3l | h3h
2848 ext
$h3.16b
, $h3.16b
, $h3.16b
, #8
2849 ldr
$h4q, [$current_tag, #112] @ load h4l | h4h
2850 ext
$h4.16b
, $h4.16b
, $h4.16b
, #8
2852 pmull2
$acc_m.1q
, $res0.2d
, $h78k.2d @ GHASH block
8k
- mid
2853 pmull
$h78k.1q
, $res0.1d
, $h78k.1d @ GHASH block
8k
+1 - mid
2854 pmull
$h5.1q
, $res3.1d
, $h5.1d @ GHASH block
8k
+3 - low
2856 aese
$ctr5b, $rk3 \n aesmc
$ctr5b, $ctr5b @ AES block
8k
+13 - round
3
2857 eor
$res2.16b
, $res2.16b
, $t3.16b @ GHASH block
8k
+2, 8k
+3 - mid
2858 trn1
$t6.2d
, $res5.2d
, $res4.2d @ GHASH block
8k
+4, 8k
+5 - mid
2860 eor
$acc_mb, $acc_mb, $h78k.16b @ GHASH block
8k
+1 - mid
2861 aese
$ctr6b, $rk3 \n aesmc
$ctr6b, $ctr6b @ AES block
8k
+14 - round
3
2862 eor3
$acc_lb, $acc_lb, $h6.16b
, $h5.16b @ GHASH block
8k
+2, 8k
+3 - low
2864 aese
$ctr1b, $rk4 \n aesmc
$ctr1b, $ctr1b @ AES block
8k
+9 - round
4
2865 aese
$ctr3b, $rk4 \n aesmc
$ctr3b, $ctr3b @ AES block
8k
+11 - round
4
2866 aese
$ctr7b, $rk3 \n aesmc
$ctr7b, $ctr7b @ AES block
8k
+15 - round
3
2868 pmull2
$t3.1q
, $res2.2d
, $h56k.2d @ GHASH block
8k
+2 - mid
2869 aese
$ctr6b, $rk4 \n aesmc
$ctr6b, $ctr6b @ AES block
8k
+14 - round
4
2870 aese
$ctr2b, $rk3 \n aesmc
$ctr2b, $ctr2b @ AES block
8k
+10 - round
3
2872 pmull
$h56k.1q
, $res2.1d
, $h56k.1d @ GHASH block
8k
+3 - mid
2873 aese
$ctr0b, $rk4 \n aesmc
$ctr0b, $ctr0b @ AES block
8k
+8 - round
4
2874 aese
$ctr4b, $rk4 \n aesmc
$ctr4b, $ctr4b @ AES block
8k
+12 - round
4
2876 aese
$ctr2b, $rk4 \n aesmc
$ctr2b, $ctr2b @ AES block
8k
+10 - round
4
2877 aese
$ctr5b, $rk4 \n aesmc
$ctr5b, $ctr5b @ AES block
8k
+13 - round
4
2878 aese
$ctr7b, $rk4 \n aesmc
$ctr7b, $ctr7b @ AES block
8k
+15 - round
4
2880 eor3
$acc_mb, $acc_mb, $h56k.16b
, $t3.16b @ GHASH block
8k
+2, 8k
+3 - mid
2881 aese
$ctr4b, $rk5 \n aesmc
$ctr4b, $ctr4b @ AES block
8k
+12 - round
5
2882 ldr
$h1q, [$current_tag, #32] @ load h1l | h1h
2883 ext
$h1.16b
, $h1.16b
, $h1.16b
, #8
2884 ldr
$h2q, [$current_tag, #64] @ load h1l | h1h
2885 ext
$h2.16b
, $h2.16b
, $h2.16b
, #8
2887 ldp
$rk6q, $rk7q, [$cc, #96] @ load rk6, rk7
2888 aese
$ctr2b, $rk5 \n aesmc
$ctr2b, $ctr2b @ AES block
8k
+10 - round
5
2889 rev64
$res7b, $res7b @ GHASH block
8k
+7 (t0
, t1
, t2
and t3 free
)
2891 rev64
$res6b, $res6b @ GHASH block
8k
+6 (t0
, t1
, and t2 free
)
2892 pmull2
$t4.1q
, $res4.2d
, $h4.2d @ GHASH block
8k
+4 - high
2893 pmull
$h4.1q
, $res4.1d
, $h4.1d @ GHASH block
8k
+4 - low
2895 aese
$ctr5b, $rk5 \n aesmc
$ctr5b, $ctr5b @ AES block
8k
+13 - round
5
2896 trn2
$res4.2d
, $res5.2d
, $res4.2d @ GHASH block
8k
+4, 8k
+5 - mid
2898 aese
$ctr6b, $rk5 \n aesmc
$ctr6b, $ctr6b @ AES block
8k
+14 - round
5
2899 ldr
$h12kq, [$current_tag, #48] @ load h2k | h1k
2900 ldr
$h34kq, [$current_tag, #96] @ load h4k | h3k
2902 aese
$ctr1b, $rk5 \n aesmc
$ctr1b, $ctr1b @ AES block
8k
+9 - round
5
2903 pmull2
$t5.1q
, $res5.2d
, $h3.2d @ GHASH block
8k
+5 - high
2904 eor
$res4.16b
, $res4.16b
, $t6.16b @ GHASH block
8k
+4, 8k
+5 - mid
2906 aese
$ctr3b, $rk5 \n aesmc
$ctr3b, $ctr3b @ AES block
8k
+11 - round
5
2907 aese
$ctr7b, $rk5 \n aesmc
$ctr7b, $ctr7b @ AES block
8k
+15 - round
5
2908 aese
$ctr0b, $rk5 \n aesmc
$ctr0b, $ctr0b @ AES block
8k
+8 - round
5
2910 pmull
$h3.1q
, $res5.1d
, $h3.1d @ GHASH block
8k
+5 - low
2911 aese
$ctr4b, $rk6 \n aesmc
$ctr4b, $ctr4b @ AES block
8k
+12 - round
6
2912 trn1
$t9.2d
, $res7.2d
, $res6.2d @ GHASH block
8k
+6, 8k
+7 - mid
2914 aese
$ctr0b, $rk6 \n aesmc
$ctr0b, $ctr0b @ AES block
8k
+8 - round
6
2915 aese
$ctr3b, $rk6 \n aesmc
$ctr3b, $ctr3b @ AES block
8k
+11 - round
6
2916 pmull2
$t7.1q
, $res6.2d
, $h2.2d @ GHASH block
8k
+6 - high
2918 pmull
$h2.1q
, $res6.1d
, $h2.1d @ GHASH block
8k
+6 - low
2919 trn2
$res6.2d
, $res7.2d
, $res6.2d @ GHASH block
8k
+6, 8k
+7 - mid
2920 aese
$ctr2b, $rk6 \n aesmc
$ctr2b, $ctr2b @ AES block
8k
+10 - round
6
2922 aese
$ctr6b, $rk6 \n aesmc
$ctr6b, $ctr6b @ AES block
8k
+14 - round
6
2923 aese
$ctr5b, $rk6 \n aesmc
$ctr5b, $ctr5b @ AES block
8k
+13 - round
6
2925 aese
$ctr7b, $rk6 \n aesmc
$ctr7b, $ctr7b @ AES block
8k
+15 - round
6
2926 aese
$ctr2b, $rk7 \n aesmc
$ctr2b, $ctr2b @ AES block
8k
+10 - round
7
2927 aese
$ctr1b, $rk6 \n aesmc
$ctr1b, $ctr1b @ AES block
8k
+9 - round
6
2929 aese
$ctr6b, $rk7 \n aesmc
$ctr6b, $ctr6b @ AES block
8k
+14 - round
7
2930 eor
$res6.16b
, $res6.16b
, $t9.16b @ GHASH block
8k
+6, 8k
+7 - mid
2932 pmull2
$t6.1q
, $res4.2d
, $h34k.2d @ GHASH block
8k
+4 - mid
2933 ldp
$rk8q, $rk9q, [$cc, #128] @ load rk8, rk9
2934 pmull
$h34k.1q
, $res4.1d
, $h34k.1d @ GHASH block
8k
+5 - mid
2936 aese
$ctr4b, $rk7 \n aesmc
$ctr4b, $ctr4b @ AES block
8k
+12 - round
7
2937 pmull2
$t8.1q
, $res7.2d
, $h1.2d @ GHASH block
8k
+7 - high
2938 aese
$ctr5b, $rk7 \n aesmc
$ctr5b, $ctr5b @ AES block
8k
+13 - round
7
2940 eor3
$acc_mb, $acc_mb, $h34k.16b
, $t6.16b @ GHASH block
8k
+4, 8k
+5 - mid
2941 aese
$ctr7b, $rk7 \n aesmc
$ctr7b, $ctr7b @ AES block
8k
+15 - round
7
2942 add
$rtmp_ctr.4s
, $rtmp_ctr.4s
, $rctr_inc.4s @ CTR block
8k
+15
2944 ldr
$mod_constantd, [$modulo_constant] @ MODULO
- load modulo constant
2945 eor3
$acc_hb, $acc_hb, $t4.16b
, $t5.16b @ GHASH block
8k
+4, 8k
+5 - high
2946 aese
$ctr0b, $rk7 \n aesmc
$ctr0b, $ctr0b @ AES block
8k
+8 - round
7
2948 pmull2
$t9.1q
, $res6.2d
, $h12k.2d @ GHASH block
8k
+6 - mid
2949 pmull
$h1.1q
, $res7.1d
, $h1.1d @ GHASH block
8k
+7 - low
2950 aese
$ctr3b, $rk7 \n aesmc
$ctr3b, $ctr3b @ AES block
8k
+11 - round
7
2952 aese
$ctr5b, $rk8 \n aesmc
$ctr5b, $ctr5b @ AES block
8k
+13 - round
8
2953 aese
$ctr4b, $rk8 \n aesmc
$ctr4b, $ctr4b @ AES block
8k
+12 - round
8
2954 aese
$ctr0b, $rk8 \n aesmc
$ctr0b, $ctr0b @ AES block
8k
+8 - round
8
2956 aese
$ctr6b, $rk8 \n aesmc
$ctr6b, $ctr6b @ AES block
8k
+14 - round
8
2957 eor3
$acc_lb, $acc_lb, $h4.16b
, $h3.16b @ GHASH block
8k
+4, 8k
+5 - low
2958 aese
$ctr1b, $rk7 \n aesmc
$ctr1b, $ctr1b @ AES block
8k
+9 - round
7
2960 aese
$ctr7b, $rk8 \n aesmc
$ctr7b, $ctr7b @ AES block
8k
+15 - round
8
2961 aese
$ctr2b, $rk8 \n aesmc
$ctr2b, $ctr2b @ AES block
8k
+10 - round
8
2962 pmull
$h12k.1q
, $res6.1d
, $h12k.1d @ GHASH block
8k
+7 - mid
2964 aese
$ctr1b, $rk8 \n aesmc
$ctr1b, $ctr1b @ AES block
8k
+9 - round
8
2965 aese
$ctr3b, $rk8 \n aesmc
$ctr3b, $ctr3b @ AES block
8k
+11 - round
8
2966 ldp
$rk10q, $rk11q, [$cc, #160] @ load rk10, rk11
2968 eor3
$acc_lb, $acc_lb, $h2.16b
, $h1.16b @ GHASH block
8k
+6, 8k
+7 - low
2969 rev32
$h1.16b
, $rtmp_ctr.16b @ CTR block
8k
+16
2970 add
$rtmp_ctr.4s
, $rtmp_ctr.4s
, $rctr_inc.4s @ CTR block
8k
+16
2972 aese
$ctr2b, $rk9 \n aesmc
$ctr2b, $ctr2b @ AES block
8k
+10 - round
9
2973 eor3
$acc_mb, $acc_mb, $h12k.16b
, $t9.16b @ GHASH block
8k
+6, 8k
+7 - mid
2974 eor3
$acc_hb, $acc_hb, $t7.16b
, $t8.16b @ GHASH block
8k
+6, 8k
+7 - high
2976 aese
$ctr6b, $rk9 \n aesmc
$ctr6b, $ctr6b @ AES block
8k
+14 - round
9
2977 aese
$ctr3b, $rk9 \n aesmc
$ctr3b, $ctr3b @ AES block
8k
+11 - round
9
2978 ldp
$ctr_t0q, $ctr_t1q, [$input_ptr], #32 @ AES block 8k+8, 8k+9 - load plaintext
2980 pmull
$t11.1q
, $acc_h.1d
, $mod_constant.1d @ MODULO
- top
64b align with mid
2981 rev32
$h2.16b
, $rtmp_ctr.16b @ CTR block
8k
+17
2982 aese
$ctr0b, $rk9 \n aesmc
$ctr0b, $ctr0b @ AES block
8k
+8 - round
9
2984 aese
$ctr4b, $rk9 \n aesmc
$ctr4b, $ctr4b @ AES block
8k
+12 - round
9
2985 aese
$ctr1b, $rk9 \n aesmc
$ctr1b, $ctr1b @ AES block
8k
+9 - round
9
2986 aese
$ctr7b, $rk9 \n aesmc
$ctr7b, $ctr7b @ AES block
8k
+15 - round
9
2988 eor3
$acc_mb, $acc_mb, $acc_hb, $acc_lb @ MODULO
- karatsuba tidy up
2989 aese
$ctr5b, $rk9 \n aesmc
$ctr5b, $ctr5b @ AES block
8k
+13 - round
9
2990 add
$rtmp_ctr.4s
, $rtmp_ctr.4s
, $rctr_inc.4s @ CTR block
8k
+17
2992 aese
$ctr2b, $rk10 \n aesmc
$ctr2b, $ctr2b @ AES block
8k
+10 - round
10
2993 aese
$ctr4b, $rk10 \n aesmc
$ctr4b, $ctr4b @ AES block
8k
+12 - round
10
2994 ldr
$rk12q, [$cc, #192] @ load rk12
2995 ext
$t12.16b
, $acc_hb, $acc_hb, #8 @ MODULO - other top alignment
2997 aese
$ctr0b, $rk10 \n aesmc
$ctr0b, $ctr0b @ AES block
8k
+8 - round
10
2998 aese
$ctr7b, $rk10 \n aesmc
$ctr7b, $ctr7b @ AES block
8k
+15 - round
10
2999 ldp
$ctr_t2q, $ctr_t3q, [$input_ptr], #32 @ AES block 8k+10, 8k+11 - load plaintext
3001 aese
$ctr4b, $rk11 @ AES block
8k
+12 - round
11
3002 eor3
$acc_mb, $acc_mb, $t12.16b
, $t11.16b @ MODULO
- fold into mid
3003 ldp
$ctr_t4q, $ctr_t5q, [$input_ptr], #32 @ AES block 8k+12, 8k+13 - load plaintext
3005 ldp
$ctr_t6q, $ctr_t7q, [$input_ptr], #32 @ AES block 8k+14, 8k+15 - load plaintext
3006 aese
$ctr2b, $rk11 @ AES block
8k
+10 - round
11
3007 aese
$ctr1b, $rk10 \n aesmc
$ctr1b, $ctr1b @ AES block
8k
+9 - round
10
3009 rev32
$h3.16b
, $rtmp_ctr.16b @ CTR block
8k
+18
3010 aese
$ctr5b, $rk10 \n aesmc
$ctr5b, $ctr5b @ AES block
8k
+13 - round
10
3012 aese
$ctr3b, $rk10 \n aesmc
$ctr3b, $ctr3b @ AES block
8k
+11 - round
10
3013 pmull
$acc_h.1q
, $acc_m.1d
, $mod_constant.1d @ MODULO
- mid
64b align with low
3015 aese
$ctr6b, $rk10 \n aesmc
$ctr6b, $ctr6b @ AES block
8k
+14 - round
10
3016 aese
$ctr5b, $rk11 @ AES block
8k
+13 - round
11
3017 add
$rtmp_ctr.4s
, $rtmp_ctr.4s
, $rctr_inc.4s @ CTR block
8k
+18
3019 aese
$ctr7b, $rk11 @ AES block
8k
+15 - round
11
3020 aese
$ctr0b, $rk11 @ AES block
8k
+8 - round
11
3021 eor3
$res4b, $ctr_t4b, $ctr4b, $rk12 @ AES block
4 - result
3023 aese
$ctr6b, $rk11 @ AES block
8k
+14 - round
11
3024 aese
$ctr3b, $rk11 @ AES block
8k
+11 - round
11
3025 aese
$ctr1b, $rk11 @ AES block
8k
+9 - round
11
3027 rev32
$h4.16b
, $rtmp_ctr.16b @ CTR block
8k
+19
3028 add
$rtmp_ctr.4s
, $rtmp_ctr.4s
, $rctr_inc.4s @ CTR block
8k
+19
3029 eor3
$res7b, $ctr_t7b, $ctr7b, $rk12 @ AES block
7 - result
3031 eor3
$res2b, $ctr_t2b, $ctr2b, $rk12 @ AES block
8k
+10 - result
3032 eor3
$res0b, $ctr_t0b, $ctr0b, $rk12 @ AES block
8k
+8 - result
3033 mov
$ctr2.16b
, $h3.16b @ CTR block
8k
+18
3035 eor3
$res1b, $ctr_t1b, $ctr1b, $rk12 @ AES block
8k
+9 - result
3036 mov
$ctr1.16b
, $h2.16b @ CTR block
8k
+17
3037 stp
$res0q, $res1q, [$output_ptr], #32 @ AES block 8k+8, 8k+9 - store result
3038 ext
$t11.16b
, $acc_mb, $acc_mb, #8 @ MODULO - other mid alignment
3040 eor3
$res6b, $ctr_t6b, $ctr6b, $rk12 @ AES block
6 - result
3041 mov
$ctr0.16b
, $h1.16b @ CTR block
8k
+16
3042 rev32
$ctr4.16b
, $rtmp_ctr.16b @ CTR block
8k
+20
3044 add
$rtmp_ctr.4s
, $rtmp_ctr.4s
, $rctr_inc.4s @ CTR block
8k
+20
3045 eor3
$res5b, $ctr_t5b, $ctr5b, $rk12 @ AES block
5 - result
3046 eor3
$acc_lb, $acc_lb, $acc_hb, $t11.16b @ MODULO
- fold into low
3048 eor3
$res3b, $ctr_t3b, $ctr3b, $rk12 @ AES block
8k
+11 - result
3049 mov
$ctr3.16b
, $h4.16b @ CTR block
8k
+19
3051 stp
$res2q, $res3q, [$output_ptr], #32 @ AES block 8k+10, 8k+11 - store result
3053 stp
$res4q, $res5q, [$output_ptr], #32 @ AES block 8k+12, 8k+13 - store result
3055 cmp $input_ptr, $main_end_input_ptr @ LOOP CONTROL
3056 stp
$res6q, $res7q, [$output_ptr], #32 @ AES block 8k+14, 8k+15 - store result
3057 b
.lt .L192_enc_main_loop
3059 .L192_enc_prepretail
: @ PREPRETAIL
3060 rev32
$ctr5.16b
, $rtmp_ctr.16b @ CTR block
8k
+13
3061 ldp
$rk0q, $rk1q, [$cc, #0] @ load rk0, rk1
3062 add
$rtmp_ctr.4s
, $rtmp_ctr.4s
, $rctr_inc.4s @ CTR block
8k
+13
3064 ldr
$h7q, [$current_tag, #176] @ load h7l | h7h
3065 ext
$h7.16b
, $h7.16b
, $h7.16b
, #8
3066 ldr
$h8q, [$current_tag, #208] @ load h8l | h8h
3067 ext
$h8.16b
, $h8.16b
, $h8.16b
, #8
3068 rev64
$res0b, $res0b @ GHASH block
8k
3069 ext
$acc_lb, $acc_lb, $acc_lb, #8 @ PRE 0
3071 rev32
$ctr6.16b
, $rtmp_ctr.16b @ CTR block
8k
+14
3072 add
$rtmp_ctr.4s
, $rtmp_ctr.4s
, $rctr_inc.4s @ CTR block
8k
+14
3073 ldr
$h56kq, [$current_tag, #144] @ load h6k | h5k
3074 ldr
$h78kq, [$current_tag, #192] @ load h8k | h7k
3076 rev64
$res3b, $res3b @ GHASH block
8k
+3
3077 rev64
$res2b, $res2b @ GHASH block
8k
+2
3078 ldr
$h5q, [$current_tag, #128] @ load h5l | h5h
3079 ext
$h5.16b
, $h5.16b
, $h5.16b
, #8
3080 ldr
$h6q, [$current_tag, #160] @ load h6l | h6h
3081 ext
$h6.16b
, $h6.16b
, $h6.16b
, #8
3083 eor
$res0b, $res0b, $acc_lb @ PRE
1
3084 rev32
$ctr7.16b
, $rtmp_ctr.16b @ CTR block
8k
+15
3085 rev64
$res1b, $res1b @ GHASH block
8k
+1
3087 aese
$ctr5b, $rk0 \n aesmc
$ctr5b, $ctr5b @ AES block
8k
+13 - round
0
3088 aese
$ctr2b, $rk0 \n aesmc
$ctr2b, $ctr2b @ AES block
8k
+10 - round
0
3089 aese
$ctr3b, $rk0 \n aesmc
$ctr3b, $ctr3b @ AES block
8k
+11 - round
0
3091 pmull2
$t0.1q
, $res1.2d
, $h7.2d @ GHASH block
8k
+1 - high
3092 aese
$ctr0b, $rk0 \n aesmc
$ctr0b, $ctr0b @ AES block
8k
+8 - round
0
3093 aese
$ctr6b, $rk0 \n aesmc
$ctr6b, $ctr6b @ AES block
8k
+14 - round
0
3095 aese
$ctr1b, $rk0 \n aesmc
$ctr1b, $ctr1b @ AES block
8k
+9 - round
0
3096 aese
$ctr4b, $rk0 \n aesmc
$ctr4b, $ctr4b @ AES block
8k
+12 - round
0
3097 pmull2
$acc_h.1q
, $res0.2d
, $h8.2d @ GHASH block
8k
- high
3099 aese
$ctr6b, $rk1 \n aesmc
$ctr6b, $ctr6b @ AES block
8k
+14 - round
1
3100 pmull
$acc_l.1q
, $res0.1d
, $h8.1d @ GHASH block
8k
- low
3101 trn1
$acc_m.2d
, $res1.2d
, $res0.2d @ GHASH block
8k
, 8k
+1 - mid
3103 trn2
$res0.2d
, $res1.2d
, $res0.2d @ GHASH block
8k
, 8k
+1 - mid
3104 aese
$ctr7b, $rk0 \n aesmc
$ctr7b, $ctr7b @ AES block
8k
+15 - round
0
3105 ldp
$rk2q, $rk3q, [$cc, #32] @ load rk2, rk3
3107 pmull
$h7.1q
, $res1.1d
, $h7.1d @ GHASH block
8k
+1 - low
3108 eor
$acc_hb, $acc_hb, $t0.16b @ GHASH block
8k
+1 - high
3109 aese
$ctr2b, $rk1 \n aesmc
$ctr2b, $ctr2b @ AES block
8k
+10 - round
1
3111 aese
$ctr5b, $rk1 \n aesmc
$ctr5b, $ctr5b @ AES block
8k
+13 - round
1
3112 eor
$res0.16b
, $res0.16b
, $acc_m.16b @ GHASH block
8k
, 8k
+1 - mid
3113 aese
$ctr1b, $rk1 \n aesmc
$ctr1b, $ctr1b @ AES block
8k
+9 - round
1
3115 aese
$ctr7b, $rk1 \n aesmc
$ctr7b, $ctr7b @ AES block
8k
+15 - round
1
3116 pmull2
$t2.1q
, $res3.2d
, $h5.2d @ GHASH block
8k
+3 - high
3117 pmull2
$t1.1q
, $res2.2d
, $h6.2d @ GHASH block
8k
+2 - high
3119 aese
$ctr3b, $rk1 \n aesmc
$ctr3b, $ctr3b @ AES block
8k
+11 - round
1
3120 aese
$ctr0b, $rk1 \n aesmc
$ctr0b, $ctr0b @ AES block
8k
+8 - round
1
3121 aese
$ctr4b, $rk1 \n aesmc
$ctr4b, $ctr4b @ AES block
8k
+12 - round
1
3123 pmull
$h6.1q
, $res2.1d
, $h6.1d @ GHASH block
8k
+2 - low
3124 aese
$ctr5b, $rk2 \n aesmc
$ctr5b, $ctr5b @ AES block
8k
+13 - round
2
3125 eor
$acc_lb, $acc_lb, $h7.16b @ GHASH block
8k
+1 - low
3127 pmull
$h5.1q
, $res3.1d
, $h5.1d @ GHASH block
8k
+3 - low
3128 aese
$ctr7b, $rk2 \n aesmc
$ctr7b, $ctr7b @ AES block
8k
+15 - round
2
3129 eor3
$acc_hb, $acc_hb, $t1.16b
, $t2.16b @ GHASH block
8k
+2, 8k
+3 - high
3131 aese
$ctr5b, $rk3 \n aesmc
$ctr5b, $ctr5b @ AES block
8k
+13 - round
3
3132 trn1
$t3.2d
, $res3.2d
, $res2.2d @ GHASH block
8k
+2, 8k
+3 - mid
3133 aese
$ctr6b, $rk2 \n aesmc
$ctr6b, $ctr6b @ AES block
8k
+14 - round
2
3135 aese
$ctr0b, $rk2 \n aesmc
$ctr0b, $ctr0b @ AES block
8k
+8 - round
2
3136 pmull2
$acc_m.1q
, $res0.2d
, $h78k.2d @ GHASH block
8k
- mid
3137 trn2
$res2.2d
, $res3.2d
, $res2.2d @ GHASH block
8k
+2, 8k
+3 - mid
3139 aese
$ctr3b, $rk2 \n aesmc
$ctr3b, $ctr3b @ AES block
8k
+11 - round
2
3140 rev64
$res5b, $res5b @ GHASH block
8k
+5 (t0
, t1
, t2
and t3 free
)
3141 rev64
$res6b, $res6b @ GHASH block
8k
+6 (t0
, t1
, and t2 free
)
3143 aese
$ctr2b, $rk2 \n aesmc
$ctr2b, $ctr2b @ AES block
8k
+10 - round
2
3144 aese
$ctr1b, $rk2 \n aesmc
$ctr1b, $ctr1b @ AES block
8k
+9 - round
2
3145 aese
$ctr4b, $rk2 \n aesmc
$ctr4b, $ctr4b @ AES block
8k
+12 - round
2
3147 eor
$res2.16b
, $res2.16b
, $t3.16b @ GHASH block
8k
+2, 8k
+3 - mid
3148 pmull
$h78k.1q
, $res0.1d
, $h78k.1d @ GHASH block
8k
+1 - mid
3149 ldp
$rk4q, $rk5q, [$cc, #64] @ load rk4, rk5
3151 aese
$ctr1b, $rk3 \n aesmc
$ctr1b, $ctr1b @ AES block
8k
+9 - round
3
3152 aese
$ctr6b, $rk3 \n aesmc
$ctr6b, $ctr6b @ AES block
8k
+14 - round
3
3153 aese
$ctr2b, $rk3 \n aesmc
$ctr2b, $ctr2b @ AES block
8k
+10 - round
3
3155 eor
$acc_mb, $acc_mb, $h78k.16b @ GHASH block
8k
+1 - mid
3156 eor3
$acc_lb, $acc_lb, $h6.16b
, $h5.16b @ GHASH block
8k
+2, 8k
+3 - low
3157 aese
$ctr7b, $rk3 \n aesmc
$ctr7b, $ctr7b @ AES block
8k
+15 - round
3
3159 ldr
$h3q, [$current_tag, #80] @ load h3l | h3h
3160 ext
$h3.16b
, $h3.16b
, $h3.16b
, #8
3161 ldr
$h4q, [$current_tag, #112] @ load h4l | h4h
3162 ext
$h4.16b
, $h4.16b
, $h4.16b
, #8
3163 aese
$ctr3b, $rk3 \n aesmc
$ctr3b, $ctr3b @ AES block
8k
+11 - round
3
3164 pmull2
$t3.1q
, $res2.2d
, $h56k.2d @ GHASH block
8k
+2 - mid
3166 ldr
$h1q, [$current_tag, #32] @ load h1l | h1h
3167 ext
$h1.16b
, $h1.16b
, $h1.16b
, #8
3168 ldr
$h2q, [$current_tag, #64] @ load h1l | h1h
3169 ext
$h2.16b
, $h2.16b
, $h2.16b
, #8
3170 aese
$ctr4b, $rk3 \n aesmc
$ctr4b, $ctr4b @ AES block
8k
+12 - round
3
3171 rev64
$res4b, $res4b @ GHASH block
8k
+4 (t0
, t1
, and t2 free
)
3173 aese
$ctr0b, $rk3 \n aesmc
$ctr0b, $ctr0b @ AES block
8k
+8 - round
3
3174 pmull
$h56k.1q
, $res2.1d
, $h56k.1d @ GHASH block
8k
+3 - mid
3175 aese
$ctr6b, $rk4 \n aesmc
$ctr6b, $ctr6b @ AES block
8k
+14 - round
4
3177 trn1
$t6.2d
, $res5.2d
, $res4.2d @ GHASH block
8k
+4, 8k
+5 - mid
3178 aese
$ctr7b, $rk4 \n aesmc
$ctr7b, $ctr7b @ AES block
8k
+15 - round
4
3179 aese
$ctr5b, $rk4 \n aesmc
$ctr5b, $ctr5b @ AES block
8k
+13 - round
4
3181 eor3
$acc_mb, $acc_mb, $h56k.16b
, $t3.16b @ GHASH block
8k
+2, 8k
+3 - mid
3182 aese
$ctr3b, $rk4 \n aesmc
$ctr3b, $ctr3b @ AES block
8k
+11 - round
4
3183 aese
$ctr0b, $rk4 \n aesmc
$ctr0b, $ctr0b @ AES block
8k
+8 - round
4
3185 aese
$ctr1b, $rk4 \n aesmc
$ctr1b, $ctr1b @ AES block
8k
+9 - round
4
3186 aese
$ctr4b, $rk4 \n aesmc
$ctr4b, $ctr4b @ AES block
8k
+12 - round
4
3187 aese
$ctr2b, $rk4 \n aesmc
$ctr2b, $ctr2b @ AES block
8k
+10 - round
4
3189 aese
$ctr0b, $rk5 \n aesmc
$ctr0b, $ctr0b @ AES block
8k
+8 - round
5
3190 rev64
$res7b, $res7b @ GHASH block
8k
+7 (t0
, t1
, t2
and t3 free
)
3191 ldr
$h12kq, [$current_tag, #48] @ load h2k | h1k
3192 ldr
$h34kq, [$current_tag, #96] @ load h4k | h3k
3194 aese
$ctr1b, $rk5 \n aesmc
$ctr1b, $ctr1b @ AES block
8k
+9 - round
5
3195 aese
$ctr2b, $rk5 \n aesmc
$ctr2b, $ctr2b @ AES block
8k
+10 - round
5
3196 ldp
$rk6q, $rk7q, [$cc, #96] @ load rk6, rk7
3198 pmull2
$t7.1q
, $res6.2d
, $h2.2d @ GHASH block
8k
+6 - high
3199 pmull2
$t4.1q
, $res4.2d
, $h4.2d @ GHASH block
8k
+4 - high
3200 pmull
$h4.1q
, $res4.1d
, $h4.1d @ GHASH block
8k
+4 - low
3202 aese
$ctr4b, $rk5 \n aesmc
$ctr4b, $ctr4b @ AES block
8k
+12 - round
5
3203 trn2
$res4.2d
, $res5.2d
, $res4.2d @ GHASH block
8k
+4, 8k
+5 - mid
3205 pmull2
$t5.1q
, $res5.2d
, $h3.2d @ GHASH block
8k
+5 - high
3206 pmull
$h3.1q
, $res5.1d
, $h3.1d @ GHASH block
8k
+5 - low
3207 pmull
$h2.1q
, $res6.1d
, $h2.1d @ GHASH block
8k
+6 - low
3209 trn1
$t9.2d
, $res7.2d
, $res6.2d @ GHASH block
8k
+6, 8k
+7 - mid
3210 eor
$res4.16b
, $res4.16b
, $t6.16b @ GHASH block
8k
+4, 8k
+5 - mid
3211 trn2
$res6.2d
, $res7.2d
, $res6.2d @ GHASH block
8k
+6, 8k
+7 - mid
3213 aese
$ctr5b, $rk5 \n aesmc
$ctr5b, $ctr5b @ AES block
8k
+13 - round
5
3214 aese
$ctr1b, $rk6 \n aesmc
$ctr1b, $ctr1b @ AES block
8k
+9 - round
6
3215 aese
$ctr7b, $rk5 \n aesmc
$ctr7b, $ctr7b @ AES block
8k
+15 - round
5
3217 aese
$ctr6b, $rk5 \n aesmc
$ctr6b, $ctr6b @ AES block
8k
+14 - round
5
3218 eor
$res6.16b
, $res6.16b
, $t9.16b @ GHASH block
8k
+6, 8k
+7 - mid
3219 aese
$ctr3b, $rk5 \n aesmc
$ctr3b, $ctr3b @ AES block
8k
+11 - round
5
3221 pmull2
$t6.1q
, $res4.2d
, $h34k.2d @ GHASH block
8k
+4 - mid
3222 pmull
$h34k.1q
, $res4.1d
, $h34k.1d @ GHASH block
8k
+5 - mid
3224 aese
$ctr4b, $rk6 \n aesmc
$ctr4b, $ctr4b @ AES block
8k
+12 - round
6
3225 aese
$ctr5b, $rk6 \n aesmc
$ctr5b, $ctr5b @ AES block
8k
+13 - round
6
3226 aese
$ctr1b, $rk7 \n aesmc
$ctr1b, $ctr1b @ AES block
8k
+9 - round
7
3228 aese
$ctr0b, $rk6 \n aesmc
$ctr0b, $ctr0b @ AES block
8k
+8 - round
6
3229 aese
$ctr7b, $rk6 \n aesmc
$ctr7b, $ctr7b @ AES block
8k
+15 - round
6
3230 eor3
$acc_mb, $acc_mb, $h34k.16b
, $t6.16b @ GHASH block
8k
+4, 8k
+5 - mid
3232 aese
$ctr2b, $rk6 \n aesmc
$ctr2b, $ctr2b @ AES block
8k
+10 - round
6
3233 eor3
$acc_hb, $acc_hb, $t4.16b
, $t5.16b @ GHASH block
8k
+4, 8k
+5 - high
3234 aese
$ctr5b, $rk7 \n aesmc
$ctr5b, $ctr5b @ AES block
8k
+13 - round
7
3236 aese
$ctr6b, $rk6 \n aesmc
$ctr6b, $ctr6b @ AES block
8k
+14 - round
6
3237 ldr
$mod_constantd, [$modulo_constant] @ MODULO
- load modulo constant
3238 aese
$ctr3b, $rk6 \n aesmc
$ctr3b, $ctr3b @ AES block
8k
+11 - round
6
3240 pmull2
$t9.1q
, $res6.2d
, $h12k.2d @ GHASH block
8k
+6 - mid
3241 aese
$ctr0b, $rk7 \n aesmc
$ctr0b, $ctr0b @ AES block
8k
+8 - round
7
3242 eor3
$acc_lb, $acc_lb, $h4.16b
, $h3.16b @ GHASH block
8k
+4, 8k
+5 - low
3244 pmull2
$t8.1q
, $res7.2d
, $h1.2d @ GHASH block
8k
+7 - high
3245 pmull
$h12k.1q
, $res6.1d
, $h12k.1d @ GHASH block
8k
+7 - mid
3246 pmull
$h1.1q
, $res7.1d
, $h1.1d @ GHASH block
8k
+7 - low
3248 aese
$ctr4b, $rk7 \n aesmc
$ctr4b, $ctr4b @ AES block
8k
+12 - round
7
3249 aese
$ctr2b, $rk7 \n aesmc
$ctr2b, $ctr2b @ AES block
8k
+10 - round
7
3250 ldp
$rk8q, $rk9q, [$cc, #128] @ load rk8, rk9
3252 aese
$ctr3b, $rk7 \n aesmc
$ctr3b, $ctr3b @ AES block
8k
+11 - round
7
3253 eor3
$acc_mb, $acc_mb, $h12k.16b
, $t9.16b @ GHASH block
8k
+6, 8k
+7 - mid
3255 eor3
$acc_lb, $acc_lb, $h2.16b
, $h1.16b @ GHASH block
8k
+6, 8k
+7 - low
3256 eor3
$acc_hb, $acc_hb, $t7.16b
, $t8.16b @ GHASH block
8k
+6, 8k
+7 - high
3258 eor3
$acc_mb, $acc_mb, $acc_hb, $acc_lb @ MODULO
- karatsuba tidy up
3259 ext
$t12.16b
, $acc_hb, $acc_hb, #8 @ MODULO - other top alignment
3260 aese
$ctr7b, $rk7 \n aesmc
$ctr7b, $ctr7b @ AES block
8k
+15 - round
7
3261 pmull
$t11.1q
, $acc_h.1d
, $mod_constant.1d @ MODULO
- top
64b align with mid
3263 aese
$ctr5b, $rk8 \n aesmc
$ctr5b, $ctr5b @ AES block
8k
+13 - round
8
3264 aese
$ctr1b, $rk8 \n aesmc
$ctr1b, $ctr1b @ AES block
8k
+9 - round
8
3266 aese
$ctr6b, $rk7 \n aesmc
$ctr6b, $ctr6b @ AES block
8k
+14 - round
7
3267 aese
$ctr2b, $rk8 \n aesmc
$ctr2b, $ctr2b @ AES block
8k
+10 - round
8
3268 eor3
$acc_mb, $acc_mb, $t12.16b
, $t11.16b @ MODULO
- fold into mid
3270 aese
$ctr3b, $rk8 \n aesmc
$ctr3b, $ctr3b @ AES block
8k
+11 - round
8
3271 aese
$ctr5b, $rk9 \n aesmc
$ctr5b, $ctr5b @ AES block
8k
+13 - round
9
3272 aese
$ctr4b, $rk8 \n aesmc
$ctr4b, $ctr4b @ AES block
8k
+12 - round
8
3274 aese
$ctr0b, $rk8 \n aesmc
$ctr0b, $ctr0b @ AES block
8k
+8 - round
8
3275 aese
$ctr7b, $rk8 \n aesmc
$ctr7b, $ctr7b @ AES block
8k
+15 - round
8
3276 aese
$ctr6b, $rk8 \n aesmc
$ctr6b, $ctr6b @ AES block
8k
+14 - round
8
3278 aese
$ctr3b, $rk9 \n aesmc
$ctr3b, $ctr3b @ AES block
8k
+11 - round
9
3279 ldp
$rk10q, $rk11q, [$cc, #160] @ load rk10, rk11
3280 aese
$ctr4b, $rk9 \n aesmc
$ctr4b, $ctr4b @ AES block
8k
+12 - round
9
3282 aese
$ctr2b, $rk9 \n aesmc
$ctr2b, $ctr2b @ AES block
8k
+10 - round
9
3283 aese
$ctr7b, $rk9 \n aesmc
$ctr7b, $ctr7b @ AES block
8k
+15 - round
9
3285 ext
$t11.16b
, $acc_mb, $acc_mb, #8 @ MODULO - other mid alignment
3286 aese
$ctr6b, $rk9 \n aesmc
$ctr6b, $ctr6b @ AES block
8k
+14 - round
9
3287 aese
$ctr0b, $rk9 \n aesmc
$ctr0b, $ctr0b @ AES block
8k
+8 - round
9
3288 aese
$ctr1b, $rk9 \n aesmc
$ctr1b, $ctr1b @ AES block
8k
+9 - round
9
3290 pmull
$acc_h.1q
, $acc_m.1d
, $mod_constant.1d @ MODULO
- mid
64b align with low
3291 ldr
$rk12q, [$cc, #192] @ load rk12
3293 aese
$ctr7b, $rk10 \n aesmc
$ctr7b, $ctr7b @ AES block
8k
+15 - round
10
3294 aese
$ctr1b, $rk10 \n aesmc
$ctr1b, $ctr1b @ AES block
8k
+9 - round
10
3295 aese
$ctr2b, $rk10 \n aesmc
$ctr2b, $ctr2b @ AES block
8k
+10 - round
10
3297 eor3
$acc_lb, $acc_lb, $acc_hb, $t11.16b @ MODULO
- fold into low
3298 aese
$ctr0b, $rk10 \n aesmc
$ctr0b, $ctr0b @ AES block
8k
+8 - round
10
3299 aese
$ctr3b, $rk10 \n aesmc
$ctr3b, $ctr3b @ AES block
8k
+11 - round
10
3301 aese
$ctr1b, $rk11 @ AES block
8k
+9 - round
11
3302 aese
$ctr7b, $rk11 @ AES block
8k
+15 - round
11
3304 aese
$ctr4b, $rk10 \n aesmc
$ctr4b, $ctr4b @ AES block
8k
+12 - round
10
3305 aese
$ctr3b, $rk11 @ AES block
8k
+11 - round
11
3307 aese
$ctr5b, $rk10 \n aesmc
$ctr5b, $ctr5b @ AES block
8k
+13 - round
10
3308 aese
$ctr6b, $rk10 \n aesmc
$ctr6b, $ctr6b @ AES block
8k
+14 - round
10
3310 add
$rtmp_ctr.4s
, $rtmp_ctr.4s
, $rctr_inc.4s @ CTR block
8k
+15
3311 aese
$ctr2b, $rk11 @ AES block
8k
+10 - round
11
3312 aese
$ctr0b, $rk11 @ AES block
8k
+8 - round
11
3314 aese
$ctr6b, $rk11 @ AES block
8k
+14 - round
11
3315 aese
$ctr4b, $rk11 @ AES block
8k
+12 - round
11
3316 aese
$ctr5b, $rk11 @ AES block
8k
+13 - round
11
3318 .L192_enc_tail
: @ TAIL
3320 ldp
$h5q, $h56kq, [$current_tag, #128] @ load h5l | h5h
3321 ext
$h5.16b
, $h5.16b
, $h5.16b
, #8
3322 sub $main_end_input_ptr, $end_input_ptr, $input_ptr @ main_end_input_ptr is number of bytes left to process
3324 ldr
$ctr_t0q, [$input_ptr], #16 @ AES block 8k+8 - l3ad plaintext
3326 ldp
$h78kq, $h8q, [$current_tag, #192] @ load h8l | h8h
3327 ext
$h8.16b
, $h8.16b
, $h8.16b
, #8
3331 ldp
$h6q, $h7q, [$current_tag, #160] @ load h6l | h6h
3332 ext
$h6.16b
, $h6.16b
, $h6.16b
, #8
3333 ext
$h7.16b
, $h7.16b
, $h7.16b
, #8
3334 cmp $main_end_input_ptr, #112
3336 eor3
$res1b, $ctr_t0b, $ctr0b, $t1.16b @ AES block
8k
+8 - result
3337 ext
$t0.16b
, $acc_lb, $acc_lb, #8 @ prepare final partial tag
3338 b
.gt .L192_enc_blocks_more_than_7
3340 cmp $main_end_input_ptr, #96
3346 sub $rtmp_ctr.4s
, $rtmp_ctr.4s
, $rctr_inc.4s
3354 b
.gt .L192_enc_blocks_more_than_6
3357 cmp $main_end_input_ptr, #80
3364 sub $rtmp_ctr.4s
, $rtmp_ctr.4s
, $rctr_inc.4s
3365 b
.gt .L192_enc_blocks_more_than_5
3367 cmp $main_end_input_ptr, #64
3368 sub $rtmp_ctr.4s
, $rtmp_ctr.4s
, $rctr_inc.4s
3375 b
.gt .L192_enc_blocks_more_than_4
3381 sub $rtmp_ctr.4s
, $rtmp_ctr.4s
, $rctr_inc.4s
3382 cmp $main_end_input_ptr, #48
3383 b
.gt .L192_enc_blocks_more_than_3
3387 sub $rtmp_ctr.4s
, $rtmp_ctr.4s
, $rctr_inc.4s
3389 ldr
$h34kq, [$current_tag, #96] @ load h4k | h3k
3390 cmp $main_end_input_ptr, #32
3391 b
.gt .L192_enc_blocks_more_than_2
3393 sub $rtmp_ctr.4s
, $rtmp_ctr.4s
, $rctr_inc.4s
3395 cmp $main_end_input_ptr, #16
3397 b
.gt .L192_enc_blocks_more_than_1
3399 sub $rtmp_ctr.4s
, $rtmp_ctr.4s
, $rctr_inc.4s
3400 ldr
$h12kq, [$current_tag, #48] @ load h2k | h1k
3401 b
.L192_enc_blocks_less_than_1
3402 .L192_enc_blocks_more_than_7
: @ blocks left
> 7
3403 st1
{ $res1b}, [$output_ptr], #16 @ AES final-7 block - store result
3405 rev64
$res0b, $res1b @ GHASH final
-7 block
3406 ins
$acc_m.d
[0], $h78k.d
[1] @ GHASH final
-7 block
- mid
3408 eor
$res0b, $res0b, $t0.16b @ feed
in partial tag
3410 ins
$rk4v.d
[0], $res0.d
[1] @ GHASH final
-7 block
- mid
3412 ldr
$ctr_t1q, [$input_ptr], #16 @ AES final-6 block - load plaintext
3414 eor
$rk4v.8b
, $rk4v.8b
, $res0.8b @ GHASH final
-7 block
- mid
3415 movi
$t0.8b
, #0 @ supress further partial tag feed in
3416 pmull
$acc_l.1q
, $res0.1d
, $h8.1d @ GHASH final
-7 block
- low
3418 pmull2
$acc_h.1q
, $res0.2d
, $h8.2d @ GHASH final
-7 block
- high
3420 pmull
$acc_m.1q
, $rk4v.1d
, $acc_m.1d @ GHASH final
-7 block
- mid
3421 eor3
$res1b, $ctr_t1b, $ctr1b, $t1.16b @ AES final
-6 block
- result
3422 .L192_enc_blocks_more_than_6
: @ blocks left
> 6
3424 st1
{ $res1b}, [$output_ptr], #16 @ AES final-6 block - store result
3426 rev64
$res0b, $res1b @ GHASH final
-6 block
3428 ldr
$ctr_t1q, [$input_ptr], #16 @ AES final-5 block - load plaintext
3430 eor
$res0b, $res0b, $t0.16b @ feed
in partial tag
3432 ins
$rk4v.d
[0], $res0.d
[1] @ GHASH final
-6 block
- mid
3434 pmull
$rk3q1, $res0.1d
, $h7.1d @ GHASH final
-6 block
- low
3435 eor3
$res1b, $ctr_t1b, $ctr2b, $t1.16b @ AES final
-5 block
- result
3437 movi
$t0.8b
, #0 @ supress further partial tag feed in
3438 pmull2
$rk2q1, $res0.2d
, $h7.2d @ GHASH final
-6 block
- high
3439 eor
$rk4v.8b
, $rk4v.8b
, $res0.8b @ GHASH final
-6 block
- mid
3441 pmull
$rk4v.1q
, $rk4v.1d
, $h78k.1d @ GHASH final
-6 block
- mid
3443 eor
$acc_hb, $acc_hb, $rk2 @ GHASH final
-6 block
- high
3444 eor
$acc_lb, $acc_lb, $rk3 @ GHASH final
-6 block
- low
3446 eor
$acc_mb, $acc_mb, $rk4v.16b @ GHASH final
-6 block
- mid
3447 .L192_enc_blocks_more_than_5
: @ blocks left
> 5
3449 st1
{ $res1b}, [$output_ptr], #16 @ AES final-5 block - store result
3451 rev64
$res0b, $res1b @ GHASH final
-5 block
3453 eor
$res0b, $res0b, $t0.16b @ feed
in partial tag
3455 ins
$rk4v.d
[0], $res0.d
[1] @ GHASH final
-5 block
- mid
3457 ldr
$ctr_t1q, [$input_ptr], #16 @ AES final-4 block - load plaintext
3458 pmull2
$rk2q1, $res0.2d
, $h6.2d @ GHASH final
-5 block
- high
3460 eor
$rk4v.8b
, $rk4v.8b
, $res0.8b @ GHASH final
-5 block
- mid
3461 eor
$acc_hb, $acc_hb, $rk2 @ GHASH final
-5 block
- high
3463 ins
$rk4v.d
[1], $rk4v.d
[0] @ GHASH final
-5 block
- mid
3464 pmull
$rk3q1, $res0.1d
, $h6.1d @ GHASH final
-5 block
- low
3466 eor
$acc_lb, $acc_lb, $rk3 @ GHASH final
-5 block
- low
3467 pmull2
$rk4v.1q
, $rk4v.2d
, $h56k.2d @ GHASH final
-5 block
- mid
3469 eor3
$res1b, $ctr_t1b, $ctr3b, $t1.16b @ AES final
-4 block
- result
3470 movi
$t0.8b
, #0 @ supress further partial tag feed in
3472 eor
$acc_mb, $acc_mb, $rk4v.16b @ GHASH final
-5 block
- mid
3473 .L192_enc_blocks_more_than_4
: @ blocks left
> 4
3475 st1
{ $res1b}, [$output_ptr], #16 @ AES final-4 block - store result
3477 rev64
$res0b, $res1b @ GHASH final
-4 block
3479 eor
$res0b, $res0b, $t0.16b @ feed
in partial tag
3481 ldr
$ctr_t1q, [$input_ptr], #16 @ AES final-3 block - load plaintext
3482 pmull2
$rk2q1, $res0.2d
, $h5.2d @ GHASH final
-4 block
- high
3483 ins
$rk4v.d
[0], $res0.d
[1] @ GHASH final
-4 block
- mid
3485 pmull
$rk3q1, $res0.1d
, $h5.1d @ GHASH final
-4 block
- low
3486 eor
$acc_hb, $acc_hb, $rk2 @ GHASH final
-4 block
- high
3488 eor
$rk4v.8b
, $rk4v.8b
, $res0.8b @ GHASH final
-4 block
- mid
3490 movi
$t0.8b
, #0 @ supress further partial tag feed in
3491 eor
$acc_lb, $acc_lb, $rk3 @ GHASH final
-4 block
- low
3493 pmull
$rk4v.1q
, $rk4v.1d
, $h56k.1d @ GHASH final
-4 block
- mid
3495 eor
$acc_mb, $acc_mb, $rk4v.16b @ GHASH final
-4 block
- mid
3496 eor3
$res1b, $ctr_t1b, $ctr4b, $t1.16b @ AES final
-3 block
- result
3497 .L192_enc_blocks_more_than_3
: @ blocks left
> 3
3499 ldr
$h34kq, [$current_tag, #96] @ load h4k | h3k
3500 st1
{ $res1b}, [$output_ptr], #16 @ AES final-3 block - store result
3502 rev64
$res0b, $res1b @ GHASH final
-3 block
3504 eor
$res0b, $res0b, $t0.16b @ feed
in partial tag
3505 movi
$t0.8b
, #0 @ supress further partial tag feed in
3507 ldr
$ctr_t1q, [$input_ptr], #16 @ AES final-2 block - load plaintext
3508 ldr
$h4q, [$current_tag, #112] @ load h4l | h4h
3509 ext
$h4.16b
, $h4.16b
, $h4.16b
, #8
3511 ins
$rk4v.d
[0], $res0.d
[1] @ GHASH final
-3 block
- mid
3513 eor3
$res1b, $ctr_t1b, $ctr5b, $t1.16b @ AES final
-2 block
- result
3514 eor
$rk4v.8b
, $rk4v.8b
, $res0.8b @ GHASH final
-3 block
- mid
3516 ins
$rk4v.d
[1], $rk4v.d
[0] @ GHASH final
-3 block
- mid
3517 pmull
$rk3q1, $res0.1d
, $h4.1d @ GHASH final
-3 block
- low
3519 pmull2
$rk2q1, $res0.2d
, $h4.2d @ GHASH final
-3 block
- high
3520 pmull2
$rk4v.1q
, $rk4v.2d
, $h34k.2d @ GHASH final
-3 block
- mid
3522 eor
$acc_lb, $acc_lb, $rk3 @ GHASH final
-3 block
- low
3524 eor
$acc_mb, $acc_mb, $rk4v.16b @ GHASH final
-3 block
- mid
3525 eor
$acc_hb, $acc_hb, $rk2 @ GHASH final
-3 block
- high
3526 .L192_enc_blocks_more_than_2
: @ blocks left
> 2
3528 st1
{ $res1b}, [$output_ptr], #16 @ AES final-2 block - store result
3530 rev64
$res0b, $res1b @ GHASH final
-2 block
3531 ldr
$h3q, [$current_tag, #80] @ load h3l | h3h
3532 ext
$h3.16b
, $h3.16b
, $h3.16b
, #8
3534 eor
$res0b, $res0b, $t0.16b @ feed
in partial tag
3536 ldr
$ctr_t1q, [$input_ptr], #16 @ AES final-1 block - load plaintext
3537 ins
$rk4v.d
[0], $res0.d
[1] @ GHASH final
-2 block
- mid
3539 eor
$rk4v.8b
, $rk4v.8b
, $res0.8b @ GHASH final
-2 block
- mid
3541 pmull
$rk3q1, $res0.1d
, $h3.1d @ GHASH final
-2 block
- low
3542 pmull2
$rk2q1, $res0.2d
, $h3.2d @ GHASH final
-2 block
- high
3543 movi
$t0.8b
, #0 @ supress further partial tag feed in
3545 pmull
$rk4v.1q
, $rk4v.1d
, $h34k.1d @ GHASH final
-2 block
- mid
3547 eor
$acc_lb, $acc_lb, $rk3 @ GHASH final
-2 block
- low
3548 eor
$acc_hb, $acc_hb, $rk2 @ GHASH final
-2 block
- high
3550 eor
$acc_mb, $acc_mb, $rk4v.16b @ GHASH final
-2 block
- mid
3551 eor3
$res1b, $ctr_t1b, $ctr6b, $t1.16b @ AES final
-1 block
- result
3552 .L192_enc_blocks_more_than_1
: @ blocks left
> 1
3554 ldr
$h2q, [$current_tag, #64] @ load h1l | h1h
3555 ext
$h2.16b
, $h2.16b
, $h2.16b
, #8
3556 st1
{ $res1b}, [$output_ptr], #16 @ AES final-1 block - store result
3558 rev64
$res0b, $res1b @ GHASH final
-1 block
3560 eor
$res0b, $res0b, $t0.16b @ feed
in partial tag
3562 ins
$rk4v.d
[0], $res0.d
[1] @ GHASH final
-1 block
- mid
3563 pmull
$rk3q1, $res0.1d
, $h2.1d @ GHASH final
-1 block
- low
3565 eor
$acc_lb, $acc_lb, $rk3 @ GHASH final
-1 block
- low
3566 pmull2
$rk2q1, $res0.2d
, $h2.2d @ GHASH final
-1 block
- high
3567 eor
$rk4v.8b
, $rk4v.8b
, $res0.8b @ GHASH final
-1 block
- mid
3569 ldr
$ctr_t1q, [$input_ptr], #16 @ AES final block - load plaintext
3570 ldr
$h12kq, [$current_tag, #48] @ load h2k | h1k
3572 ins
$rk4v.d
[1], $rk4v.d
[0] @ GHASH final
-1 block
- mid
3574 eor3
$res1b, $ctr_t1b, $ctr7b, $t1.16b @ AES final block
- result
3575 pmull2
$rk4v.1q
, $rk4v.2d
, $h12k.2d @ GHASH final
-1 block
- mid
3577 movi
$t0.8b
, #0 @ supress further partial tag feed in
3579 eor
$acc_mb, $acc_mb, $rk4v.16b @ GHASH final
-1 block
- mid
3580 eor
$acc_hb, $acc_hb, $rk2 @ GHASH final
-1 block
- high
3581 .L192_enc_blocks_less_than_1
: @ blocks left
<= 1
3583 mvn
$temp0_x, xzr @ temp0_x
= 0xffffffffffffffff
3584 and $bit_length, $bit_length, #127 @ bit_length %= 128
3586 sub $bit_length, $bit_length, #128 @ bit_length -= 128
3588 neg
$bit_length, $bit_length @ bit_length
= 128 - #bits in input (in range [1,128])
3590 and $bit_length, $bit_length, #127 @ bit_length %= 128
3592 lsr
$temp0_x, $temp0_x, $bit_length @ temp0_x is mask
for top
64b of
last block
3593 cmp $bit_length, #64
3594 mvn
$temp1_x, xzr @ temp1_x
= 0xffffffffffffffff
3596 csel
$temp2_x, $temp1_x, $temp0_x, lt
3597 csel
$temp3_x, $temp0_x, xzr
, lt
3599 mov
$ctr0.d
[1], $temp3_x
3600 ldr
$h1q, [$current_tag, #32] @ load h1l | h1h
3601 ext
$h1.16b
, $h1.16b
, $h1.16b
, #8
3603 ld1
{ $rk0}, [$output_ptr] @ load existing bytes where the possibly partial
last block is to be stored
3604 mov
$ctr0.d
[0], $temp2_x @ ctr0b is mask
for last block
3606 and $res1b, $res1b, $ctr0b @ possibly partial
last block has zeroes
in highest bits
3608 rev64
$res0b, $res1b @ GHASH final block
3609 bif
$res1b, $rk0, $ctr0b @ insert existing bytes
in top end of result before storing
3611 st1
{ $res1b}, [$output_ptr] @ store all
16B
3613 eor
$res0b, $res0b, $t0.16b @ feed
in partial tag
3615 ins
$t0.d
[0], $res0.d
[1] @ GHASH final block
- mid
3616 pmull2
$rk2q1, $res0.2d
, $h1.2d @ GHASH final block
- high
3618 eor
$acc_hb, $acc_hb, $rk2 @ GHASH final block
- high
3619 pmull
$rk3q1, $res0.1d
, $h1.1d @ GHASH final block
- low
3621 eor
$t0.8b
, $t0.8b
, $res0.8b @ GHASH final block
- mid
3623 pmull
$t0.1q
, $t0.1d
, $h12k.1d @ GHASH final block
- mid
3625 eor
$acc_mb, $acc_mb, $t0.16b @ GHASH final block
- mid
3626 ldr
$mod_constantd, [$modulo_constant] @ MODULO
- load modulo constant
3628 eor
$acc_lb, $acc_lb, $rk3 @ GHASH final block
- low
3629 ext
$t11.16b
, $acc_hb, $acc_hb, #8 @ MODULO - other top alignment
3631 rev32
$rtmp_ctr.16b
, $rtmp_ctr.16b
3633 str
$rtmp_ctrq, [$counter] @ store the updated counter
3634 eor3
$acc_mb, $acc_mb, $acc_hb, $acc_lb @ MODULO
- karatsuba tidy up
3636 pmull
$t12.1q
, $acc_h.1d
, $mod_constant.1d @ MODULO
- top
64b align with mid
3638 eor3
$acc_mb, $acc_mb, $t12.16b
, $t11.16b @ MODULO
- fold into mid
3640 pmull
$acc_h.1q
, $acc_m.1d
, $mod_constant.1d @ MODULO
- mid
64b align with low
3641 ext
$t11.16b
, $acc_mb, $acc_mb, #8 @ MODULO - other mid alignment
3643 eor3
$acc_lb, $acc_lb, $acc_hb, $t11.16b @ MODULO
- fold into low
3644 ext
$acc_lb, $acc_lb, $acc_lb, #8
3645 rev64
$acc_lb, $acc_lb
3646 st1
{ $acc_l.16b
}, [$current_tag]
3648 lsr x0
, $bit_length, #3 @ return sizes
3650 ldp d10
, d11
, [sp
, #16]
3651 ldp d12
, d13
, [sp
, #32]
3652 ldp d14
, d15
, [sp
, #48]
3653 ldp d8
, d9
, [sp
], #80
3659 .size unroll8_eor3_aes_gcm_enc_192_kernel
,.-unroll8_eor3_aes_gcm_enc_192_kernel
3662 #########################################################################################
3663 # size_t unroll8_eor3_aes_gcm_dec_192_kernel(const unsigned char *in,
3665 # unsigned char *out,
3667 # unsigned char ivec[16],
3671 .global unroll8_eor3_aes_gcm_dec_192_kernel
3672 .type unroll8_eor3_aes_gcm_dec_192_kernel
,%function
3674 unroll8_eor3_aes_gcm_dec_192_kernel
:
3675 AARCH64_VALID_CALL_TARGET
3676 cbz x1
, .L192_dec_ret
3677 stp d8
, d9
, [sp
, #-80]!
3680 stp d10
, d11
, [sp
, #16]
3681 stp d12
, d13
, [sp
, #32]
3682 stp d14
, d15
, [sp
, #48]
3683 mov x5
, #0xc200000000000000
3684 stp x5
, xzr
, [sp
, #64]
3685 add
$modulo_constant, sp
, #64
3687 lsr
$main_end_input_ptr, $bit_length, #3 @ byte_len
3688 ld1
{ $ctr0b}, [$counter] @ CTR block
0
3689 ld1
{ $acc_lb}, [$current_tag]
3691 mov
$constant_temp, #0x100000000 @ set up counter increment
3692 movi
$rctr_inc.16b
, #0x0
3693 mov
$rctr_inc.d
[1], $constant_temp
3695 rev32
$rtmp_ctr.16b
, $ctr0.16b @ set up reversed counter
3697 add
$rtmp_ctr.4s
, $rtmp_ctr.4s
, $rctr_inc.4s @ CTR block
0
3699 rev32
$ctr1.16b
, $rtmp_ctr.16b @ CTR block
1
3700 add
$rtmp_ctr.4s
, $rtmp_ctr.4s
, $rctr_inc.4s @ CTR block
1
3702 rev32
$ctr2.16b
, $rtmp_ctr.16b @ CTR block
2
3703 add
$rtmp_ctr.4s
, $rtmp_ctr.4s
, $rctr_inc.4s @ CTR block
2
3705 rev32
$ctr3.16b
, $rtmp_ctr.16b @ CTR block
3
3706 add
$rtmp_ctr.4s
, $rtmp_ctr.4s
, $rctr_inc.4s @ CTR block
3
3708 rev32
$ctr4.16b
, $rtmp_ctr.16b @ CTR block
4
3709 add
$rtmp_ctr.4s
, $rtmp_ctr.4s
, $rctr_inc.4s @ CTR block
4
3711 rev32
$ctr5.16b
, $rtmp_ctr.16b @ CTR block
5
3712 add
$rtmp_ctr.4s
, $rtmp_ctr.4s
, $rctr_inc.4s @ CTR block
5
3713 ldp
$rk0q, $rk1q, [$cc, #0] @ load rk0, rk1
3715 rev32
$ctr6.16b
, $rtmp_ctr.16b @ CTR block
6
3716 add
$rtmp_ctr.4s
, $rtmp_ctr.4s
, $rctr_inc.4s @ CTR block
6
3718 rev32
$ctr7.16b
, $rtmp_ctr.16b @ CTR block
7
3720 aese
$ctr3b, $rk0 \n aesmc
$ctr3b, $ctr3b @ AES block
3 - round
0
3721 aese
$ctr6b, $rk0 \n aesmc
$ctr6b, $ctr6b @ AES block
6 - round
0
3722 aese
$ctr5b, $rk0 \n aesmc
$ctr5b, $ctr5b @ AES block
5 - round
0
3724 aese
$ctr0b, $rk0 \n aesmc
$ctr0b, $ctr0b @ AES block
0 - round
0
3725 aese
$ctr1b, $rk0 \n aesmc
$ctr1b, $ctr1b @ AES block
1 - round
0
3726 aese
$ctr7b, $rk0 \n aesmc
$ctr7b, $ctr7b @ AES block
7 - round
0
3728 aese
$ctr2b, $rk0 \n aesmc
$ctr2b, $ctr2b @ AES block
2 - round
0
3729 aese
$ctr4b, $rk0 \n aesmc
$ctr4b, $ctr4b @ AES block
4 - round
0
3730 ldp
$rk2q, $rk3q, [$cc, #32] @ load rk2, rk3
3732 aese
$ctr1b, $rk1 \n aesmc
$ctr1b, $ctr1b @ AES block
1 - round
1
3734 aese
$ctr2b, $rk1 \n aesmc
$ctr2b, $ctr2b @ AES block
2 - round
1
3736 aese
$ctr0b, $rk1 \n aesmc
$ctr0b, $ctr0b @ AES block
0 - round
1
3737 aese
$ctr3b, $rk1 \n aesmc
$ctr3b, $ctr3b @ AES block
3 - round
1
3738 aese
$ctr7b, $rk1 \n aesmc
$ctr7b, $ctr7b @ AES block
7 - round
1
3740 aese
$ctr5b, $rk1 \n aesmc
$ctr5b, $ctr5b @ AES block
5 - round
1
3741 aese
$ctr6b, $rk1 \n aesmc
$ctr6b, $ctr6b @ AES block
6 - round
1
3743 aese
$ctr7b, $rk2 \n aesmc
$ctr7b, $ctr7b @ AES block
7 - round
2
3744 aese
$ctr0b, $rk2 \n aesmc
$ctr0b, $ctr0b @ AES block
0 - round
2
3745 aese
$ctr4b, $rk1 \n aesmc
$ctr4b, $ctr4b @ AES block
4 - round
1
3747 aese
$ctr5b, $rk2 \n aesmc
$ctr5b, $ctr5b @ AES block
5 - round
2
3748 aese
$ctr1b, $rk2 \n aesmc
$ctr1b, $ctr1b @ AES block
1 - round
2
3749 aese
$ctr2b, $rk2 \n aesmc
$ctr2b, $ctr2b @ AES block
2 - round
2
3751 aese
$ctr3b, $rk2 \n aesmc
$ctr3b, $ctr3b @ AES block
3 - round
2
3752 aese
$ctr4b, $rk2 \n aesmc
$ctr4b, $ctr4b @ AES block
4 - round
2
3753 aese
$ctr6b, $rk2 \n aesmc
$ctr6b, $ctr6b @ AES block
6 - round
2
3755 aese
$ctr7b, $rk3 \n aesmc
$ctr7b, $ctr7b @ AES block
7 - round
3
3757 ldp
$rk4q, $rk5q, [$cc, #64] @ load rk4, rk5
3758 aese
$ctr2b, $rk3 \n aesmc
$ctr2b, $ctr2b @ AES block
2 - round
3
3759 aese
$ctr5b, $rk3 \n aesmc
$ctr5b, $ctr5b @ AES block
5 - round
3
3761 aese
$ctr0b, $rk3 \n aesmc
$ctr0b, $ctr0b @ AES block
0 - round
3
3762 aese
$ctr3b, $rk3 \n aesmc
$ctr3b, $ctr3b @ AES block
3 - round
3
3764 aese
$ctr4b, $rk3 \n aesmc
$ctr4b, $ctr4b @ AES block
4 - round
3
3765 aese
$ctr1b, $rk3 \n aesmc
$ctr1b, $ctr1b @ AES block
1 - round
3
3766 aese
$ctr6b, $rk3 \n aesmc
$ctr6b, $ctr6b @ AES block
6 - round
3
3768 aese
$ctr3b, $rk4 \n aesmc
$ctr3b, $ctr3b @ AES block
3 - round
4
3769 aese
$ctr2b, $rk4 \n aesmc
$ctr2b, $ctr2b @ AES block
2 - round
4
3770 aese
$ctr5b, $rk4 \n aesmc
$ctr5b, $ctr5b @ AES block
5 - round
4
3772 aese
$ctr1b, $rk4 \n aesmc
$ctr1b, $ctr1b @ AES block
1 - round
4
3773 aese
$ctr7b, $rk4 \n aesmc
$ctr7b, $ctr7b @ AES block
7 - round
4
3774 aese
$ctr6b, $rk4 \n aesmc
$ctr6b, $ctr6b @ AES block
6 - round
4
3776 aese
$ctr0b, $rk4 \n aesmc
$ctr0b, $ctr0b @ AES block
0 - round
4
3777 aese
$ctr5b, $rk5 \n aesmc
$ctr5b, $ctr5b @ AES block
5 - round
5
3778 aese
$ctr4b, $rk4 \n aesmc
$ctr4b, $ctr4b @ AES block
4 - round
4
3780 aese
$ctr6b, $rk5 \n aesmc
$ctr6b, $ctr6b @ AES block
6 - round
5
3781 ldp
$rk6q, $rk7q, [$cc, #96] @ load rk6, rk7
3783 aese
$ctr0b, $rk5 \n aesmc
$ctr0b, $ctr0b @ AES block
0 - round
5
3784 aese
$ctr4b, $rk5 \n aesmc
$ctr4b, $ctr4b @ AES block
4 - round
5
3785 aese
$ctr1b, $rk5 \n aesmc
$ctr1b, $ctr1b @ AES block
1 - round
5
3787 aese
$ctr3b, $rk5 \n aesmc
$ctr3b, $ctr3b @ AES block
3 - round
5
3788 aese
$ctr2b, $rk5 \n aesmc
$ctr2b, $ctr2b @ AES block
2 - round
5
3789 aese
$ctr7b, $rk5 \n aesmc
$ctr7b, $ctr7b @ AES block
7 - round
5
3791 sub $main_end_input_ptr, $main_end_input_ptr, #1 @ byte_len - 1
3793 aese
$ctr4b, $rk6 \n aesmc
$ctr4b, $ctr4b @ AES block
4 - round
6
3794 aese
$ctr5b, $rk6 \n aesmc
$ctr5b, $ctr5b @ AES block
5 - round
6
3795 aese
$ctr1b, $rk6 \n aesmc
$ctr1b, $ctr1b @ AES block
1 - round
6
3797 aese
$ctr0b, $rk6 \n aesmc
$ctr0b, $ctr0b @ AES block
0 - round
6
3798 aese
$ctr3b, $rk6 \n aesmc
$ctr3b, $ctr3b @ AES block
3 - round
6
3799 aese
$ctr6b, $rk6 \n aesmc
$ctr6b, $ctr6b @ AES block
6 - round
6
3801 aese
$ctr7b, $rk6 \n aesmc
$ctr7b, $ctr7b @ AES block
7 - round
6
3802 aese
$ctr2b, $rk6 \n aesmc
$ctr2b, $ctr2b @ AES block
2 - round
6
3803 ldp
$rk8q, $rk9q, [$cc, #128] @ load rk8, rk9
3805 add
$rtmp_ctr.4s
, $rtmp_ctr.4s
, $rctr_inc.4s @ CTR block
7
3807 aese
$ctr3b, $rk7 \n aesmc
$ctr3b, $ctr3b @ AES block
3 - round
7
3808 aese
$ctr7b, $rk7 \n aesmc
$ctr7b, $ctr7b @ AES block
7 - round
7
3810 aese
$ctr2b, $rk7 \n aesmc
$ctr2b, $ctr2b @ AES block
2 - round
7
3811 aese
$ctr1b, $rk7 \n aesmc
$ctr1b, $ctr1b @ AES block
1 - round
7
3812 aese
$ctr4b, $rk7 \n aesmc
$ctr4b, $ctr4b @ AES block
4 - round
7
3814 aese
$ctr6b, $rk7 \n aesmc
$ctr6b, $ctr6b @ AES block
6 - round
7
3815 aese
$ctr0b, $rk7 \n aesmc
$ctr0b, $ctr0b @ AES block
0 - round
7
3816 aese
$ctr5b, $rk7 \n aesmc
$ctr5b, $ctr5b @ AES block
5 - round
7
3818 aese
$ctr1b, $rk8 \n aesmc
$ctr1b, $ctr1b @ AES block
1 - round
8
3819 aese
$ctr2b, $rk8 \n aesmc
$ctr2b, $ctr2b @ AES block
2 - round
8
3820 and $main_end_input_ptr, $main_end_input_ptr, #0xffffffffffffff80 @ number of bytes to be processed in main loop (at least 1 byte must be handled by tail)
3822 aese
$ctr7b, $rk8 \n aesmc
$ctr7b, $ctr7b @ AES block
7 - round
8
3823 aese
$ctr6b, $rk8 \n aesmc
$ctr6b, $ctr6b @ AES block
6 - round
8
3824 aese
$ctr5b, $rk8 \n aesmc
$ctr5b, $ctr5b @ AES block
5 - round
8
3826 aese
$ctr4b, $rk8 \n aesmc
$ctr4b, $ctr4b @ AES block
4 - round
8
3827 aese
$ctr3b, $rk8 \n aesmc
$ctr3b, $ctr3b @ AES block
3 - round
8
3828 aese
$ctr0b, $rk8 \n aesmc
$ctr0b, $ctr0b @ AES block
0 - round
8
3830 add
$end_input_ptr, $input_ptr, $bit_length, lsr
#3 @ end_input_ptr
3831 aese
$ctr6b, $rk9 \n aesmc
$ctr6b, $ctr6b @ AES block
6 - round
9
3833 ld1
{ $acc_lb}, [$current_tag]
3834 ext
$acc_lb, $acc_lb, $acc_lb, #8
3835 rev64
$acc_lb, $acc_lb
3837 ldp
$rk10q, $rk11q, [$cc, #160] @ load rk10, rk11
3839 aese
$ctr0b, $rk9 \n aesmc
$ctr0b, $ctr0b @ AES block
0 - round
9
3840 add
$main_end_input_ptr, $main_end_input_ptr, $input_ptr
3842 aese
$ctr1b, $rk9 \n aesmc
$ctr1b, $ctr1b @ AES block
1 - round
9
3843 aese
$ctr7b, $rk9 \n aesmc
$ctr7b, $ctr7b @ AES block
7 - round
9
3844 aese
$ctr4b, $rk9 \n aesmc
$ctr4b, $ctr4b @ AES block
4 - round
9
3846 cmp $input_ptr, $main_end_input_ptr @ check
if we have
<= 8 blocks
3847 aese
$ctr3b, $rk9 \n aesmc
$ctr3b, $ctr3b @ AES block
3 - round
9
3849 aese
$ctr5b, $rk9 \n aesmc
$ctr5b, $ctr5b @ AES block
5 - round
9
3850 aese
$ctr2b, $rk9 \n aesmc
$ctr2b, $ctr2b @ AES block
2 - round
9
3852 aese
$ctr3b, $rk10 \n aesmc
$ctr3b, $ctr3b @ AES block
3 - round
10
3853 aese
$ctr1b, $rk10 \n aesmc
$ctr1b, $ctr1b @ AES block
1 - round
10
3854 aese
$ctr7b, $rk10 \n aesmc
$ctr7b, $ctr7b @ AES block
7 - round
10
3856 aese
$ctr4b, $rk10 \n aesmc
$ctr4b, $ctr4b @ AES block
4 - round
10
3857 aese
$ctr0b, $rk10 \n aesmc
$ctr0b, $ctr0b @ AES block
0 - round
10
3858 aese
$ctr2b, $rk10 \n aesmc
$ctr2b, $ctr2b @ AES block
2 - round
10
3860 aese
$ctr6b, $rk10 \n aesmc
$ctr6b, $ctr6b @ AES block
6 - round
10
3861 aese
$ctr5b, $rk10 \n aesmc
$ctr5b, $ctr5b @ AES block
5 - round
10
3862 ldr
$rk12q, [$cc, #192] @ load rk12
3864 aese
$ctr0b, $rk11 @ AES block
0 - round
11
3865 aese
$ctr1b, $rk11 @ AES block
1 - round
11
3866 aese
$ctr4b, $rk11 @ AES block
4 - round
11
3868 aese
$ctr6b, $rk11 @ AES block
6 - round
11
3869 aese
$ctr5b, $rk11 @ AES block
5 - round
11
3870 aese
$ctr7b, $rk11 @ AES block
7 - round
11
3872 aese
$ctr2b, $rk11 @ AES block
2 - round
11
3873 aese
$ctr3b, $rk11 @ AES block
3 - round
11
3874 b
.ge .L192_dec_tail @ handle tail
3876 ldp
$res0q, $res1q, [$input_ptr], #32 @ AES block 0, 1 - load ciphertext
3878 ldp
$res2q, $res3q, [$input_ptr], #32 @ AES block 2, 3 - load ciphertext
3880 ldp
$res4q, $res5q, [$input_ptr], #32 @ AES block 4, 5 - load ciphertext
3882 eor3
$ctr1b, $res1b, $ctr1b, $rk12 @ AES block
1 - result
3883 eor3
$ctr0b, $res0b, $ctr0b, $rk12 @ AES block
0 - result
3884 stp
$ctr0q, $ctr1q, [$output_ptr], #32 @ AES block 0, 1 - store result
3886 rev32
$ctr0.16b
, $rtmp_ctr.16b @ CTR block
8
3887 add
$rtmp_ctr.4s
, $rtmp_ctr.4s
, $rctr_inc.4s @ CTR block
8
3889 rev32
$ctr1.16b
, $rtmp_ctr.16b @ CTR block
9
3890 add
$rtmp_ctr.4s
, $rtmp_ctr.4s
, $rctr_inc.4s @ CTR block
9
3891 eor3
$ctr3b, $res3b, $ctr3b, $rk12 @ AES block
3 - result
3893 eor3
$ctr2b, $res2b, $ctr2b, $rk12 @ AES block
2 - result
3894 stp
$ctr2q, $ctr3q, [$output_ptr], #32 @ AES block 2, 3 - store result
3895 ldp
$res6q, $res7q, [$input_ptr], #32 @ AES block 6, 7 - load ciphertext
3897 rev32
$ctr2.16b
, $rtmp_ctr.16b @ CTR block
10
3898 add
$rtmp_ctr.4s
, $rtmp_ctr.4s
, $rctr_inc.4s @ CTR block
10
3900 eor3
$ctr4b, $res4b, $ctr4b, $rk12 @ AES block
4 - result
3902 rev32
$ctr3.16b
, $rtmp_ctr.16b @ CTR block
11
3903 add
$rtmp_ctr.4s
, $rtmp_ctr.4s
, $rctr_inc.4s @ CTR block
11
3905 eor3
$ctr5b, $res5b, $ctr5b, $rk12 @ AES block
5 - result
3906 stp
$ctr4q, $ctr5q, [$output_ptr], #32 @ AES block 4, 5 - store result
3907 cmp $input_ptr, $main_end_input_ptr @ check
if we have
<= 8 blocks
3909 eor3
$ctr6b, $res6b, $ctr6b, $rk12 @ AES block
6 - result
3910 eor3
$ctr7b, $res7b, $ctr7b, $rk12 @ AES block
7 - result
3911 rev32
$ctr4.16b
, $rtmp_ctr.16b @ CTR block
12
3913 add
$rtmp_ctr.4s
, $rtmp_ctr.4s
, $rctr_inc.4s @ CTR block
12
3914 stp
$ctr6q, $ctr7q, [$output_ptr], #32 @ AES block 6, 7 - store result
3915 b
.ge .L192_dec_prepretail @
do prepretail
3917 .L192_dec_main_loop
: @ main
loop start
3918 rev64
$res1b, $res1b @ GHASH block
8k
+1
3919 ldp
$rk0q, $rk1q, [$cc, #0] @ load rk0, rk1
3920 ext
$acc_lb, $acc_lb, $acc_lb, #8 @ PRE 0
3922 rev64
$res0b, $res0b @ GHASH block
8k
3923 rev32
$ctr5.16b
, $rtmp_ctr.16b @ CTR block
8k
+13
3924 add
$rtmp_ctr.4s
, $rtmp_ctr.4s
, $rctr_inc.4s @ CTR block
8k
+13
3926 ldr
$h7q, [$current_tag, #176] @ load h7l | h7h
3927 ext
$h7.16b
, $h7.16b
, $h7.16b
, #8
3928 ldr
$h8q, [$current_tag, #208] @ load h8l | h8h
3929 ext
$h8.16b
, $h8.16b
, $h8.16b
, #8
3930 rev64
$res4b, $res4b @ GHASH block
8k
+4
3931 rev64
$res3b, $res3b @ GHASH block
8k
+3
3933 eor
$res0b, $res0b, $acc_lb @ PRE
1
3934 rev32
$ctr6.16b
, $rtmp_ctr.16b @ CTR block
8k
+14
3935 add
$rtmp_ctr.4s
, $rtmp_ctr.4s
, $rctr_inc.4s @ CTR block
8k
+14
3937 rev64
$res5b, $res5b @ GHASH block
8k
+5
3939 rev32
$ctr7.16b
, $rtmp_ctr.16b @ CTR block
8k
+15
3940 aese
$ctr1b, $rk0 \n aesmc
$ctr1b, $ctr1b @ AES block
8k
+9 - round
0
3941 aese
$ctr6b, $rk0 \n aesmc
$ctr6b, $ctr6b @ AES block
8k
+14 - round
0
3943 aese
$ctr5b, $rk0 \n aesmc
$ctr5b, $ctr5b @ AES block
8k
+13 - round
0
3944 aese
$ctr4b, $rk0 \n aesmc
$ctr4b, $ctr4b @ AES block
8k
+12 - round
0
3945 aese
$ctr0b, $rk0 \n aesmc
$ctr0b, $ctr0b @ AES block
8k
+8 - round
0
3947 aese
$ctr7b, $rk0 \n aesmc
$ctr7b, $ctr7b @ AES block
8k
+15 - round
0
3948 aese
$ctr2b, $rk0 \n aesmc
$ctr2b, $ctr2b @ AES block
8k
+10 - round
0
3949 aese
$ctr3b, $rk0 \n aesmc
$ctr3b, $ctr3b @ AES block
8k
+11 - round
0
3951 pmull
$acc_l.1q
, $res0.1d
, $h8.1d @ GHASH block
8k
- low
3952 pmull2
$t0.1q
, $res1.2d
, $h7.2d @ GHASH block
8k
+1 - high
3953 ldp
$rk2q, $rk3q, [$cc, #32] @ load rk2, rk3
3955 aese
$ctr6b, $rk1 \n aesmc
$ctr6b, $ctr6b @ AES block
8k
+14 - round
1
3956 pmull
$h7.1q
, $res1.1d
, $h7.1d @ GHASH block
8k
+1 - low
3957 ldr
$h5q, [$current_tag, #128] @ load h5l | h5h
3958 ext
$h5.16b
, $h5.16b
, $h5.16b
, #8
3959 ldr
$h6q, [$current_tag, #160] @ load h6l | h6h
3960 ext
$h6.16b
, $h6.16b
, $h6.16b
, #8
3962 aese
$ctr0b, $rk1 \n aesmc
$ctr0b, $ctr0b @ AES block
8k
+8 - round
1
3963 aese
$ctr3b, $rk1 \n aesmc
$ctr3b, $ctr3b @ AES block
8k
+11 - round
1
3964 aese
$ctr7b, $rk1 \n aesmc
$ctr7b, $ctr7b @ AES block
8k
+15 - round
1
3966 pmull2
$acc_h.1q
, $res0.2d
, $h8.2d @ GHASH block
8k
- high
3967 aese
$ctr2b, $rk1 \n aesmc
$ctr2b, $ctr2b @ AES block
8k
+10 - round
1
3968 aese
$ctr4b, $rk1 \n aesmc
$ctr4b, $ctr4b @ AES block
8k
+12 - round
1
3970 trn1
$acc_m.2d
, $res1.2d
, $res0.2d @ GHASH block
8k
, 8k
+1 - mid
3971 rev64
$res2b, $res2b @ GHASH block
8k
+2
3972 aese
$ctr1b, $rk1 \n aesmc
$ctr1b, $ctr1b @ AES block
8k
+9 - round
1
3974 aese
$ctr5b, $rk1 \n aesmc
$ctr5b, $ctr5b @ AES block
8k
+13 - round
1
3975 ldr
$h56kq, [$current_tag, #144] @ load h6k | h5k
3976 ldr
$h78kq, [$current_tag, #192] @ load h8k | h7k
3977 trn2
$res0.2d
, $res1.2d
, $res0.2d @ GHASH block
8k
, 8k
+1 - mid
3979 eor
$acc_hb, $acc_hb, $t0.16b @ GHASH block
8k
+1 - high
3980 pmull2
$t2.1q
, $res3.2d
, $h5.2d @ GHASH block
8k
+3 - high
3981 pmull2
$t1.1q
, $res2.2d
, $h6.2d @ GHASH block
8k
+2 - high
3983 eor
$res0.16b
, $res0.16b
, $acc_m.16b @ GHASH block
8k
, 8k
+1 - mid
3984 eor
$acc_lb, $acc_lb, $h7.16b @ GHASH block
8k
+1 - low
3985 aese
$ctr6b, $rk2 \n aesmc
$ctr6b, $ctr6b @ AES block
8k
+14 - round
2
3987 aese
$ctr2b, $rk2 \n aesmc
$ctr2b, $ctr2b @ AES block
8k
+10 - round
2
3988 pmull
$h5.1q
, $res3.1d
, $h5.1d @ GHASH block
8k
+3 - low
3989 eor3
$acc_hb, $acc_hb, $t1.16b
, $t2.16b @ GHASH block
8k
+2, 8k
+3 - high
3991 aese
$ctr1b, $rk2 \n aesmc
$ctr1b, $ctr1b @ AES block
8k
+9 - round
2
3992 aese
$ctr6b, $rk3 \n aesmc
$ctr6b, $ctr6b @ AES block
8k
+14 - round
3
3993 aese
$ctr4b, $rk2 \n aesmc
$ctr4b, $ctr4b @ AES block
8k
+12 - round
2
3995 aese
$ctr0b, $rk2 \n aesmc
$ctr0b, $ctr0b @ AES block
8k
+8 - round
2
3996 aese
$ctr7b, $rk2 \n aesmc
$ctr7b, $ctr7b @ AES block
8k
+15 - round
2
3997 aese
$ctr3b, $rk2 \n aesmc
$ctr3b, $ctr3b @ AES block
8k
+11 - round
2
3999 ldr
$h3q, [$current_tag, #80] @ load h3l | h3h
4000 ext
$h3.16b
, $h3.16b
, $h3.16b
, #8
4001 ldr
$h4q, [$current_tag, #112] @ load h4l | h4h
4002 ext
$h4.16b
, $h4.16b
, $h4.16b
, #8
4003 aese
$ctr5b, $rk2 \n aesmc
$ctr5b, $ctr5b @ AES block
8k
+13 - round
2
4004 aese
$ctr2b, $rk3 \n aesmc
$ctr2b, $ctr2b @ AES block
8k
+10 - round
3
4006 pmull
$h6.1q
, $res2.1d
, $h6.1d @ GHASH block
8k
+2 - low
4007 trn1
$t3.2d
, $res3.2d
, $res2.2d @ GHASH block
8k
+2, 8k
+3 - mid
4008 trn2
$res2.2d
, $res3.2d
, $res2.2d @ GHASH block
8k
+2, 8k
+3 - mid
4010 aese
$ctr3b, $rk3 \n aesmc
$ctr3b, $ctr3b @ AES block
8k
+11 - round
3
4011 aese
$ctr4b, $rk3 \n aesmc
$ctr4b, $ctr4b @ AES block
8k
+12 - round
3
4013 aese
$ctr0b, $rk3 \n aesmc
$ctr0b, $ctr0b @ AES block
8k
+8 - round
3
4014 aese
$ctr7b, $rk3 \n aesmc
$ctr7b, $ctr7b @ AES block
8k
+15 - round
3
4015 ldp
$rk4q, $rk5q, [$cc, #64] @ load rk4, rk5
4017 eor
$res2.16b
, $res2.16b
, $t3.16b @ GHASH block
8k
+2, 8k
+3 - mid
4018 eor3
$acc_lb, $acc_lb, $h6.16b
, $h5.16b @ GHASH block
8k
+2, 8k
+3 - low
4019 aese
$ctr1b, $rk3 \n aesmc
$ctr1b, $ctr1b @ AES block
8k
+9 - round
3
4021 trn1
$t6.2d
, $res5.2d
, $res4.2d @ GHASH block
8k
+4, 8k
+5 - mid
4022 add
$rtmp_ctr.4s
, $rtmp_ctr.4s
, $rctr_inc.4s @ CTR block
8k
+15
4024 pmull2
$t3.1q
, $res2.2d
, $h56k.2d @ GHASH block
8k
+2 - mid
4025 pmull2
$acc_m.1q
, $res0.2d
, $h78k.2d @ GHASH block
8k
- mid
4026 pmull
$h78k.1q
, $res0.1d
, $h78k.1d @ GHASH block
8k
+1 - mid
4028 aese
$ctr5b, $rk3 \n aesmc
$ctr5b, $ctr5b @ AES block
8k
+13 - round
3
4029 pmull
$h56k.1q
, $res2.1d
, $h56k.1d @ GHASH block
8k
+3 - mid
4030 pmull2
$t4.1q
, $res4.2d
, $h4.2d @ GHASH block
8k
+4 - high
4032 aese
$ctr4b, $rk4 \n aesmc
$ctr4b, $ctr4b @ AES block
8k
+12 - round
4
4033 aese
$ctr6b, $rk4 \n aesmc
$ctr6b, $ctr6b @ AES block
8k
+14 - round
4
4034 eor
$acc_mb, $acc_mb, $h78k.16b @ GHASH block
8k
+1 - mid
4036 aese
$ctr5b, $rk4 \n aesmc
$ctr5b, $ctr5b @ AES block
8k
+13 - round
4
4037 aese
$ctr1b, $rk4 \n aesmc
$ctr1b, $ctr1b @ AES block
8k
+9 - round
4
4038 aese
$ctr3b, $rk4 \n aesmc
$ctr3b, $ctr3b @ AES block
8k
+11 - round
4
4040 aese
$ctr2b, $rk4 \n aesmc
$ctr2b, $ctr2b @ AES block
8k
+10 - round
4
4041 aese
$ctr0b, $rk4 \n aesmc
$ctr0b, $ctr0b @ AES block
8k
+8 - round
4
4042 aese
$ctr7b, $rk4 \n aesmc
$ctr7b, $ctr7b @ AES block
8k
+15 - round
4
4044 ldr
$h1q, [$current_tag, #32] @ load h1l | h1h
4045 ext
$h1.16b
, $h1.16b
, $h1.16b
, #8
4046 ldr
$h2q, [$current_tag, #64] @ load h1l | h1h
4047 ext
$h2.16b
, $h2.16b
, $h2.16b
, #8
4048 aese
$ctr3b, $rk5 \n aesmc
$ctr3b, $ctr3b @ AES block
8k
+11 - round
5
4049 aese
$ctr5b, $rk5 \n aesmc
$ctr5b, $ctr5b @ AES block
8k
+13 - round
5
4051 ldp
$rk6q, $rk7q, [$cc, #96] @ load rk6, rk7
4052 aese
$ctr7b, $rk5 \n aesmc
$ctr7b, $ctr7b @ AES block
8k
+15 - round
5
4053 rev64
$res7b, $res7b @ GHASH block
8k
+7
4055 aese
$ctr4b, $rk5 \n aesmc
$ctr4b, $ctr4b @ AES block
8k
+12 - round
5
4056 eor3
$acc_mb, $acc_mb, $h56k.16b
, $t3.16b @ GHASH block
8k
+2, 8k
+3 - mid
4057 aese
$ctr1b, $rk5 \n aesmc
$ctr1b, $ctr1b @ AES block
8k
+9 - round
5
4059 pmull
$h4.1q
, $res4.1d
, $h4.1d @ GHASH block
8k
+4 - low
4060 trn2
$res4.2d
, $res5.2d
, $res4.2d @ GHASH block
8k
+4, 8k
+5 - mid
4061 aese
$ctr2b, $rk5 \n aesmc
$ctr2b, $ctr2b @ AES block
8k
+10 - round
5
4063 aese
$ctr6b, $rk5 \n aesmc
$ctr6b, $ctr6b @ AES block
8k
+14 - round
5
4064 aese
$ctr0b, $rk5 \n aesmc
$ctr0b, $ctr0b @ AES block
8k
+8 - round
5
4065 rev64
$res6b, $res6b @ GHASH block
8k
+6
4067 ldr
$h12kq, [$current_tag, #48] @ load h2k | h1k
4068 ldr
$h34kq, [$current_tag, #96] @ load h4k | h3k
4069 pmull2
$t5.1q
, $res5.2d
, $h3.2d @ GHASH block
8k
+5 - high
4070 pmull
$h3.1q
, $res5.1d
, $h3.1d @ GHASH block
8k
+5 - low
4072 aese
$ctr0b, $rk6 \n aesmc
$ctr0b, $ctr0b @ AES block
8k
+8 - round
6
4073 eor
$res4.16b
, $res4.16b
, $t6.16b @ GHASH block
8k
+4, 8k
+5 - mid
4074 trn1
$t9.2d
, $res7.2d
, $res6.2d @ GHASH block
8k
+6, 8k
+7 - mid
4076 aese
$ctr7b, $rk6 \n aesmc
$ctr7b, $ctr7b @ AES block
8k
+15 - round
6
4077 aese
$ctr2b, $rk6 \n aesmc
$ctr2b, $ctr2b @ AES block
8k
+10 - round
6
4078 aese
$ctr6b, $rk6 \n aesmc
$ctr6b, $ctr6b @ AES block
8k
+14 - round
6
4080 pmull2
$t7.1q
, $res6.2d
, $h2.2d @ GHASH block
8k
+6 - high
4081 aese
$ctr3b, $rk6 \n aesmc
$ctr3b, $ctr3b @ AES block
8k
+11 - round
6
4082 aese
$ctr1b, $rk6 \n aesmc
$ctr1b, $ctr1b @ AES block
8k
+9 - round
6
4084 aese
$ctr2b, $rk7 \n aesmc
$ctr2b, $ctr2b @ AES block
8k
+10 - round
7
4085 aese
$ctr6b, $rk7 \n aesmc
$ctr6b, $ctr6b @ AES block
8k
+14 - round
7
4086 aese
$ctr5b, $rk6 \n aesmc
$ctr5b, $ctr5b @ AES block
8k
+13 - round
6
4088 pmull2
$t6.1q
, $res4.2d
, $h34k.2d @ GHASH block
8k
+4 - mid
4089 eor3
$acc_hb, $acc_hb, $t4.16b
, $t5.16b @ GHASH block
8k
+4, 8k
+5 - high
4090 eor3
$acc_lb, $acc_lb, $h4.16b
, $h3.16b @ GHASH block
8k
+4, 8k
+5 - low
4092 pmull
$h2.1q
, $res6.1d
, $h2.1d @ GHASH block
8k
+6 - low
4093 trn2
$res6.2d
, $res7.2d
, $res6.2d @ GHASH block
8k
+6, 8k
+7 - mid
4094 aese
$ctr4b, $rk6 \n aesmc
$ctr4b, $ctr4b @ AES block
8k
+12 - round
6
4096 aese
$ctr5b, $rk7 \n aesmc
$ctr5b, $ctr5b @ AES block
8k
+13 - round
7
4097 ldp
$rk8q, $rk9q, [$cc, #128] @ load rk8, rk9
4098 aese
$ctr3b, $rk7 \n aesmc
$ctr3b, $ctr3b @ AES block
8k
+11 - round
7
4100 eor
$res6.16b
, $res6.16b
, $t9.16b @ GHASH block
8k
+6, 8k
+7 - mid
4101 pmull
$h34k.1q
, $res4.1d
, $h34k.1d @ GHASH block
8k
+5 - mid
4102 aese
$ctr1b, $rk7 \n aesmc
$ctr1b, $ctr1b @ AES block
8k
+9 - round
7
4104 aese
$ctr4b, $rk7 \n aesmc
$ctr4b, $ctr4b @ AES block
8k
+12 - round
7
4105 aese
$ctr0b, $rk7 \n aesmc
$ctr0b, $ctr0b @ AES block
8k
+8 - round
7
4106 aese
$ctr7b, $rk7 \n aesmc
$ctr7b, $ctr7b @ AES block
8k
+15 - round
7
4108 eor3
$acc_mb, $acc_mb, $h34k.16b
, $t6.16b @ GHASH block
8k
+4, 8k
+5 - mid
4109 pmull2
$t9.1q
, $res6.2d
, $h12k.2d @ GHASH block
8k
+6 - mid
4110 pmull2
$t8.1q
, $res7.2d
, $h1.2d @ GHASH block
8k
+7 - high
4112 pmull
$h12k.1q
, $res6.1d
, $h12k.1d @ GHASH block
8k
+7 - mid
4113 ldr
$mod_constantd, [$modulo_constant] @ MODULO
- load modulo constant
4114 pmull
$h1.1q
, $res7.1d
, $h1.1d @ GHASH block
8k
+7 - low
4116 aese
$ctr2b, $rk8 \n aesmc
$ctr2b, $ctr2b @ AES block
8k
+10 - round
8
4117 aese
$ctr5b, $rk8 \n aesmc
$ctr5b, $ctr5b @ AES block
8k
+13 - round
8
4118 aese
$ctr7b, $rk8 \n aesmc
$ctr7b, $ctr7b @ AES block
8k
+15 - round
8
4120 aese
$ctr0b, $rk8 \n aesmc
$ctr0b, $ctr0b @ AES block
8k
+8 - round
8
4121 aese
$ctr3b, $rk8 \n aesmc
$ctr3b, $ctr3b @ AES block
8k
+11 - round
8
4122 eor3
$acc_lb, $acc_lb, $h2.16b
, $h1.16b @ GHASH block
8k
+6, 8k
+7 - low
4124 aese
$ctr4b, $rk8 \n aesmc
$ctr4b, $ctr4b @ AES block
8k
+12 - round
8
4125 aese
$ctr1b, $rk8 \n aesmc
$ctr1b, $ctr1b @ AES block
8k
+9 - round
8
4126 aese
$ctr6b, $rk8 \n aesmc
$ctr6b, $ctr6b @ AES block
8k
+14 - round
8
4128 eor3
$acc_hb, $acc_hb, $t7.16b
, $t8.16b @ GHASH block
8k
+6, 8k
+7 - high
4129 rev32
$h1.16b
, $rtmp_ctr.16b @ CTR block
8k
+16
4130 add
$rtmp_ctr.4s
, $rtmp_ctr.4s
, $rctr_inc.4s @ CTR block
8k
+16
4132 aese
$ctr5b, $rk9 \n aesmc
$ctr5b, $ctr5b @ AES block
8k
+13 - round
9
4133 eor3
$acc_mb, $acc_mb, $h12k.16b
, $t9.16b @ GHASH block
8k
+6, 8k
+7 - mid
4134 aese
$ctr1b, $rk9 \n aesmc
$ctr1b, $ctr1b @ AES block
8k
+9 - round
9
4136 aese
$ctr3b, $rk9 \n aesmc
$ctr3b, $ctr3b @ AES block
8k
+11 - round
9
4137 aese
$ctr7b, $rk9 \n aesmc
$ctr7b, $ctr7b @ AES block
8k
+15 - round
9
4138 ldp
$rk10q, $rk11q, [$cc, #160] @ load rk10, rk11
4140 eor3
$acc_mb, $acc_mb, $acc_hb, $acc_lb @ MODULO
- karatsuba tidy up
4141 ldp
$res0q, $res1q, [$input_ptr], #32 @ AES block 8k+8, 8k+9 - load ciphertext
4143 aese
$ctr2b, $rk9 \n aesmc
$ctr2b, $ctr2b @ AES block
8k
+10 - round
9
4144 aese
$ctr0b, $rk9 \n aesmc
$ctr0b, $ctr0b @ AES block
8k
+8 - round
9
4145 ldp
$res2q, $res3q, [$input_ptr], #32 @ AES block 8k+10, 8k+11 - load ciphertext
4147 rev32
$h2.16b
, $rtmp_ctr.16b @ CTR block
8k
+17
4148 pmull
$t12.1q
, $acc_h.1d
, $mod_constant.1d @ MODULO
- top
64b align with mid
4149 add
$rtmp_ctr.4s
, $rtmp_ctr.4s
, $rctr_inc.4s @ CTR block
8k
+17
4151 aese
$ctr6b, $rk9 \n aesmc
$ctr6b, $ctr6b @ AES block
8k
+14 - round
9
4152 aese
$ctr4b, $rk9 \n aesmc
$ctr4b, $ctr4b @ AES block
8k
+12 - round
9
4153 ext
$t11.16b
, $acc_hb, $acc_hb, #8 @ MODULO - other top alignment
4155 aese
$ctr3b, $rk10 \n aesmc
$ctr3b, $ctr3b @ AES block
8k
+11 - round
10
4156 aese
$ctr7b, $rk10 \n aesmc
$ctr7b, $ctr7b @ AES block
8k
+15 - round
10
4157 ldp
$res4q, $res5q, [$input_ptr], #32 @ AES block 8k+12, 8k+13 - load ciphertext
4159 rev32
$h3.16b
, $rtmp_ctr.16b @ CTR block
8k
+18
4160 add
$rtmp_ctr.4s
, $rtmp_ctr.4s
, $rctr_inc.4s @ CTR block
8k
+18
4161 eor3
$acc_mb, $acc_mb, $t12.16b
, $t11.16b @ MODULO
- fold into mid
4163 aese
$ctr0b, $rk10 \n aesmc
$ctr0b, $ctr0b @ AES block
8k
+8 - round
10
4164 aese
$ctr1b, $rk10 \n aesmc
$ctr1b, $ctr1b @ AES block
8k
+9 - round
10
4165 ldr
$rk12q, [$cc, #192] @ load rk12
4167 ldp
$res6q, $res7q, [$input_ptr], #32 @ AES block 8k+14, 8k+15 - load ciphertext
4168 aese
$ctr4b, $rk10 \n aesmc
$ctr4b, $ctr4b @ AES block
8k
+12 - round
10
4169 aese
$ctr6b, $rk10 \n aesmc
$ctr6b, $ctr6b @ AES block
8k
+14 - round
10
4171 aese
$ctr0b, $rk11 @ AES block
8k
+8 - round
11
4172 ext
$t11.16b
, $acc_mb, $acc_mb, #8 @ MODULO - other mid alignment
4173 aese
$ctr1b, $rk11 @ AES block
8k
+9 - round
11
4175 aese
$ctr2b, $rk10 \n aesmc
$ctr2b, $ctr2b @ AES block
8k
+10 - round
10
4176 aese
$ctr6b, $rk11 @ AES block
8k
+14 - round
11
4177 aese
$ctr3b, $rk11 @ AES block
8k
+11 - round
11
4179 eor3
$ctr0b, $res0b, $ctr0b, $rk12 @ AES block
8k
+8 - result
4180 rev32
$h4.16b
, $rtmp_ctr.16b @ CTR block
8k
+19
4181 aese
$ctr5b, $rk10 \n aesmc
$ctr5b, $ctr5b @ AES block
8k
+13 - round
10
4183 aese
$ctr4b, $rk11 @ AES block
8k
+12 - round
11
4184 aese
$ctr2b, $rk11 @ AES block
8k
+10 - round
11
4185 add
$rtmp_ctr.4s
, $rtmp_ctr.4s
, $rctr_inc.4s @ CTR block
8k
+19
4187 aese
$ctr7b, $rk11 @ AES block
8k
+15 - round
11
4188 aese
$ctr5b, $rk11 @ AES block
8k
+13 - round
11
4189 pmull
$acc_h.1q
, $acc_m.1d
, $mod_constant.1d @ MODULO
- mid
64b align with low
4191 eor3
$ctr1b, $res1b, $ctr1b, $rk12 @ AES block
8k
+9 - result
4192 stp
$ctr0q, $ctr1q, [$output_ptr], #32 @ AES block 8k+8, 8k+9 - store result
4193 eor3
$ctr3b, $res3b, $ctr3b, $rk12 @ AES block
8k
+11 - result
4195 eor3
$ctr2b, $res2b, $ctr2b, $rk12 @ AES block
8k
+10 - result
4196 eor3
$ctr7b, $res7b, $ctr7b, $rk12 @ AES block
8k
+15 - result
4197 stp
$ctr2q, $ctr3q, [$output_ptr], #32 @ AES block 8k+10, 8k+11 - store result
4199 eor3
$ctr5b, $res5b, $ctr5b, $rk12 @ AES block
8k
+13 - result
4200 eor3
$acc_lb, $acc_lb, $acc_hb, $t11.16b @ MODULO
- fold into low
4201 mov
$ctr3.16b
, $h4.16b @ CTR block
8k
+19
4203 eor3
$ctr4b, $res4b, $ctr4b, $rk12 @ AES block
8k
+12 - result
4204 stp
$ctr4q, $ctr5q, [$output_ptr], #32 @ AES block 8k+12, 8k+13 - store result
4205 cmp $input_ptr, $main_end_input_ptr @ LOOP CONTROL
4207 eor3
$ctr6b, $res6b, $ctr6b, $rk12 @ AES block
8k
+14 - result
4208 stp
$ctr6q, $ctr7q, [$output_ptr], #32 @ AES block 8k+14, 8k+15 - store result
4209 mov
$ctr0.16b
, $h1.16b @ CTR block
8k
+16
4211 mov
$ctr1.16b
, $h2.16b @ CTR block
8k
+17
4212 mov
$ctr2.16b
, $h3.16b @ CTR block
8k
+18
4214 rev32
$ctr4.16b
, $rtmp_ctr.16b @ CTR block
8k
+20
4215 add
$rtmp_ctr.4s
, $rtmp_ctr.4s
, $rctr_inc.4s @ CTR block
8k
+20
4216 b
.lt .L192_dec_main_loop
4218 .L192_dec_prepretail
: @ PREPRETAIL
4219 ldp
$rk0q, $rk1q, [$cc, #0] @ load rk0, rk1
4220 rev32
$ctr5.16b
, $rtmp_ctr.16b @ CTR block
8k
+13
4221 add
$rtmp_ctr.4s
, $rtmp_ctr.4s
, $rctr_inc.4s @ CTR block
8k
+13
4223 ldr
$h7q, [$current_tag, #176] @ load h7l | h7h
4224 ext
$h7.16b
, $h7.16b
, $h7.16b
, #8
4225 ldr
$h8q, [$current_tag, #208] @ load h8l | h8h
4226 ext
$h8.16b
, $h8.16b
, $h8.16b
, #8
4227 rev64
$res0b, $res0b @ GHASH block
8k
4228 ext
$acc_lb, $acc_lb, $acc_lb, #8 @ PRE 0
4230 rev64
$res3b, $res3b @ GHASH block
8k
+3
4231 rev32
$ctr6.16b
, $rtmp_ctr.16b @ CTR block
8k
+14
4232 add
$rtmp_ctr.4s
, $rtmp_ctr.4s
, $rctr_inc.4s @ CTR block
8k
+14
4234 eor
$res0b, $res0b, $acc_lb @ PRE
1
4235 rev64
$res2b, $res2b @ GHASH block
8k
+2
4236 rev64
$res1b, $res1b @ GHASH block
8k
+1
4238 ldr
$h5q, [$current_tag, #128] @ load h5l | h5h
4239 ext
$h5.16b
, $h5.16b
, $h5.16b
, #8
4240 ldr
$h6q, [$current_tag, #160] @ load h6l | h6h
4241 ext
$h6.16b
, $h6.16b
, $h6.16b
, #8
4242 rev32
$ctr7.16b
, $rtmp_ctr.16b @ CTR block
8k
+15
4244 aese
$ctr0b, $rk0 \n aesmc
$ctr0b, $ctr0b @ AES block
8k
+8 - round
0
4245 aese
$ctr6b, $rk0 \n aesmc
$ctr6b, $ctr6b @ AES block
8k
+14 - round
0
4246 aese
$ctr5b, $rk0 \n aesmc
$ctr5b, $ctr5b @ AES block
8k
+13 - round
0
4248 aese
$ctr3b, $rk0 \n aesmc
$ctr3b, $ctr3b @ AES block
8k
+11 - round
0
4249 aese
$ctr2b, $rk0 \n aesmc
$ctr2b, $ctr2b @ AES block
8k
+10 - round
0
4250 pmull2
$t0.1q
, $res1.2d
, $h7.2d @ GHASH block
8k
+1 - high
4252 aese
$ctr4b, $rk0 \n aesmc
$ctr4b, $ctr4b @ AES block
8k
+12 - round
0
4253 pmull2
$acc_h.1q
, $res0.2d
, $h8.2d @ GHASH block
8k
- high
4254 aese
$ctr1b, $rk0 \n aesmc
$ctr1b, $ctr1b @ AES block
8k
+9 - round
0
4256 aese
$ctr6b, $rk1 \n aesmc
$ctr6b, $ctr6b @ AES block
8k
+14 - round
1
4257 aese
$ctr7b, $rk0 \n aesmc
$ctr7b, $ctr7b @ AES block
8k
+15 - round
0
4258 ldp
$rk2q, $rk3q, [$cc, #32] @ load rk2, rk3
4260 aese
$ctr4b, $rk1 \n aesmc
$ctr4b, $ctr4b @ AES block
8k
+12 - round
1
4261 pmull2
$t1.1q
, $res2.2d
, $h6.2d @ GHASH block
8k
+2 - high
4262 pmull
$h6.1q
, $res2.1d
, $h6.1d @ GHASH block
8k
+2 - low
4264 pmull
$h7.1q
, $res1.1d
, $h7.1d @ GHASH block
8k
+1 - low
4265 eor
$acc_hb, $acc_hb, $t0.16b @ GHASH block
8k
+1 - high
4266 aese
$ctr3b, $rk1 \n aesmc
$ctr3b, $ctr3b @ AES block
8k
+11 - round
1
4268 pmull
$acc_l.1q
, $res0.1d
, $h8.1d @ GHASH block
8k
- low
4269 aese
$ctr7b, $rk1 \n aesmc
$ctr7b, $ctr7b @ AES block
8k
+15 - round
1
4270 aese
$ctr0b, $rk1 \n aesmc
$ctr0b, $ctr0b @ AES block
8k
+8 - round
1
4272 trn1
$acc_m.2d
, $res1.2d
, $res0.2d @ GHASH block
8k
, 8k
+1 - mid
4273 trn2
$res0.2d
, $res1.2d
, $res0.2d @ GHASH block
8k
, 8k
+1 - mid
4274 pmull2
$t2.1q
, $res3.2d
, $h5.2d @ GHASH block
8k
+3 - high
4276 aese
$ctr2b, $rk1 \n aesmc
$ctr2b, $ctr2b @ AES block
8k
+10 - round
1
4277 aese
$ctr1b, $rk1 \n aesmc
$ctr1b, $ctr1b @ AES block
8k
+9 - round
1
4278 aese
$ctr5b, $rk1 \n aesmc
$ctr5b, $ctr5b @ AES block
8k
+13 - round
1
4280 ldr
$h56kq, [$current_tag, #144] @ load h6k | h5k
4281 ldr
$h78kq, [$current_tag, #192] @ load h8k | h7k
4282 aese
$ctr3b, $rk2 \n aesmc
$ctr3b, $ctr3b @ AES block
8k
+11 - round
2
4283 eor
$res0.16b
, $res0.16b
, $acc_m.16b @ GHASH block
8k
, 8k
+1 - mid
4285 aese
$ctr6b, $rk2 \n aesmc
$ctr6b, $ctr6b @ AES block
8k
+14 - round
2
4286 rev64
$res5b, $res5b @ GHASH block
8k
+5
4287 pmull
$h5.1q
, $res3.1d
, $h5.1d @ GHASH block
8k
+3 - low
4289 eor3
$acc_hb, $acc_hb, $t1.16b
, $t2.16b @ GHASH block
8k
+2, 8k
+3 - high
4290 aese
$ctr4b, $rk2 \n aesmc
$ctr4b, $ctr4b @ AES block
8k
+12 - round
2
4291 aese
$ctr5b, $rk2 \n aesmc
$ctr5b, $ctr5b @ AES block
8k
+13 - round
2
4293 trn1
$t3.2d
, $res3.2d
, $res2.2d @ GHASH block
8k
+2, 8k
+3 - mid
4294 aese
$ctr3b, $rk3 \n aesmc
$ctr3b, $ctr3b @ AES block
8k
+11 - round
3
4295 aese
$ctr7b, $rk2 \n aesmc
$ctr7b, $ctr7b @ AES block
8k
+15 - round
2
4297 aese
$ctr0b, $rk2 \n aesmc
$ctr0b, $ctr0b @ AES block
8k
+8 - round
2
4298 aese
$ctr2b, $rk2 \n aesmc
$ctr2b, $ctr2b @ AES block
8k
+10 - round
2
4299 trn2
$res2.2d
, $res3.2d
, $res2.2d @ GHASH block
8k
+2, 8k
+3 - mid
4301 pmull2
$acc_m.1q
, $res0.2d
, $h78k.2d @ GHASH block
8k
- mid
4302 aese
$ctr1b, $rk2 \n aesmc
$ctr1b, $ctr1b @ AES block
8k
+9 - round
2
4303 pmull
$h78k.1q
, $res0.1d
, $h78k.1d @ GHASH block
8k
+1 - mid
4305 aese
$ctr5b, $rk3 \n aesmc
$ctr5b, $ctr5b @ AES block
8k
+13 - round
3
4306 eor
$res2.16b
, $res2.16b
, $t3.16b @ GHASH block
8k
+2, 8k
+3 - mid
4307 eor
$acc_lb, $acc_lb, $h7.16b @ GHASH block
8k
+1 - low
4309 aese
$ctr7b, $rk3 \n aesmc
$ctr7b, $ctr7b @ AES block
8k
+15 - round
3
4310 aese
$ctr6b, $rk3 \n aesmc
$ctr6b, $ctr6b @ AES block
8k
+14 - round
3
4311 aese
$ctr4b, $rk3 \n aesmc
$ctr4b, $ctr4b @ AES block
8k
+12 - round
3
4313 eor3
$acc_lb, $acc_lb, $h6.16b
, $h5.16b @ GHASH block
8k
+2, 8k
+3 - low
4314 ldp
$rk4q, $rk5q, [$cc, #64] @ load rk4, rk5
4315 aese
$ctr0b, $rk3 \n aesmc
$ctr0b, $ctr0b @ AES block
8k
+8 - round
3
4317 ldr
$h3q, [$current_tag, #80] @ load h3l | h3h
4318 ext
$h3.16b
, $h3.16b
, $h3.16b
, #8
4319 ldr
$h4q, [$current_tag, #112] @ load h4l | h4h
4320 ext
$h4.16b
, $h4.16b
, $h4.16b
, #8
4321 pmull2
$t3.1q
, $res2.2d
, $h56k.2d @ GHASH block
8k
+2 - mid
4322 pmull
$h56k.1q
, $res2.1d
, $h56k.1d @ GHASH block
8k
+3 - mid
4324 ldr
$h1q, [$current_tag, #32] @ load h1l | h1h
4325 ext
$h1.16b
, $h1.16b
, $h1.16b
, #8
4326 ldr
$h2q, [$current_tag, #64] @ load h1l | h1h
4327 ext
$h2.16b
, $h2.16b
, $h2.16b
, #8
4328 eor
$acc_mb, $acc_mb, $h78k.16b @ GHASH block
8k
+1 - mid
4329 aese
$ctr2b, $rk3 \n aesmc
$ctr2b, $ctr2b @ AES block
8k
+10 - round
3
4331 rev64
$res7b, $res7b @ GHASH block
8k
+7
4333 eor3
$acc_mb, $acc_mb, $h56k.16b
, $t3.16b @ GHASH block
8k
+2, 8k
+3 - mid
4334 rev64
$res4b, $res4b @ GHASH block
8k
+4
4336 aese
$ctr5b, $rk4 \n aesmc
$ctr5b, $ctr5b @ AES block
8k
+13 - round
4
4337 aese
$ctr4b, $rk4 \n aesmc
$ctr4b, $ctr4b @ AES block
8k
+12 - round
4
4338 aese
$ctr1b, $rk3 \n aesmc
$ctr1b, $ctr1b @ AES block
8k
+9 - round
3
4340 aese
$ctr2b, $rk4 \n aesmc
$ctr2b, $ctr2b @ AES block
8k
+10 - round
4
4341 aese
$ctr0b, $rk4 \n aesmc
$ctr0b, $ctr0b @ AES block
8k
+8 - round
4
4342 aese
$ctr3b, $rk4 \n aesmc
$ctr3b, $ctr3b @ AES block
8k
+11 - round
4
4344 aese
$ctr1b, $rk4 \n aesmc
$ctr1b, $ctr1b @ AES block
8k
+9 - round
4
4345 aese
$ctr6b, $rk4 \n aesmc
$ctr6b, $ctr6b @ AES block
8k
+14 - round
4
4346 aese
$ctr7b, $rk4 \n aesmc
$ctr7b, $ctr7b @ AES block
8k
+15 - round
4
4348 rev64
$res6b, $res6b @ GHASH block
8k
+6
4349 ldr
$h12kq, [$current_tag, #48] @ load h2k | h1k
4350 ldr
$h34kq, [$current_tag, #96] @ load h4k | h3k
4351 trn1
$t6.2d
, $res5.2d
, $res4.2d @ GHASH block
8k
+4, 8k
+5 - mid
4353 aese
$ctr7b, $rk5 \n aesmc
$ctr7b, $ctr7b @ AES block
8k
+15 - round
5
4354 aese
$ctr1b, $rk5 \n aesmc
$ctr1b, $ctr1b @ AES block
8k
+9 - round
5
4355 aese
$ctr2b, $rk5 \n aesmc
$ctr2b, $ctr2b @ AES block
8k
+10 - round
5
4357 ldp
$rk6q, $rk7q, [$cc, #96] @ load rk6, rk7
4358 aese
$ctr6b, $rk5 \n aesmc
$ctr6b, $ctr6b @ AES block
8k
+14 - round
5
4359 aese
$ctr5b, $rk5 \n aesmc
$ctr5b, $ctr5b @ AES block
8k
+13 - round
5
4361 pmull2
$t7.1q
, $res6.2d
, $h2.2d @ GHASH block
8k
+6 - high
4362 pmull2
$t4.1q
, $res4.2d
, $h4.2d @ GHASH block
8k
+4 - high
4363 pmull
$h2.1q
, $res6.1d
, $h2.1d @ GHASH block
8k
+6 - low
4365 aese
$ctr4b, $rk5 \n aesmc
$ctr4b, $ctr4b @ AES block
8k
+12 - round
5
4367 pmull
$h4.1q
, $res4.1d
, $h4.1d @ GHASH block
8k
+4 - low
4368 trn2
$res4.2d
, $res5.2d
, $res4.2d @ GHASH block
8k
+4, 8k
+5 - mid
4369 pmull2
$t5.1q
, $res5.2d
, $h3.2d @ GHASH block
8k
+5 - high
4371 pmull
$h3.1q
, $res5.1d
, $h3.1d @ GHASH block
8k
+5 - low
4372 trn1
$t9.2d
, $res7.2d
, $res6.2d @ GHASH block
8k
+6, 8k
+7 - mid
4373 aese
$ctr0b, $rk5 \n aesmc
$ctr0b, $ctr0b @ AES block
8k
+8 - round
5
4375 trn2
$res6.2d
, $res7.2d
, $res6.2d @ GHASH block
8k
+6, 8k
+7 - mid
4376 aese
$ctr3b, $rk5 \n aesmc
$ctr3b, $ctr3b @ AES block
8k
+11 - round
5
4377 eor
$res4.16b
, $res4.16b
, $t6.16b @ GHASH block
8k
+4, 8k
+5 - mid
4379 aese
$ctr4b, $rk6 \n aesmc
$ctr4b, $ctr4b @ AES block
8k
+12 - round
6
4380 aese
$ctr2b, $rk6 \n aesmc
$ctr2b, $ctr2b @ AES block
8k
+10 - round
6
4382 eor
$res6.16b
, $res6.16b
, $t9.16b @ GHASH block
8k
+6, 8k
+7 - mid
4383 aese
$ctr1b, $rk6 \n aesmc
$ctr1b, $ctr1b @ AES block
8k
+9 - round
6
4384 aese
$ctr7b, $rk6 \n aesmc
$ctr7b, $ctr7b @ AES block
8k
+15 - round
6
4386 pmull2
$t6.1q
, $res4.2d
, $h34k.2d @ GHASH block
8k
+4 - mid
4387 pmull
$h34k.1q
, $res4.1d
, $h34k.1d @ GHASH block
8k
+5 - mid
4388 aese
$ctr0b, $rk6 \n aesmc
$ctr0b, $ctr0b @ AES block
8k
+8 - round
6
4390 pmull2
$t9.1q
, $res6.2d
, $h12k.2d @ GHASH block
8k
+6 - mid
4391 aese
$ctr5b, $rk6 \n aesmc
$ctr5b, $ctr5b @ AES block
8k
+13 - round
6
4392 pmull2
$t8.1q
, $res7.2d
, $h1.2d @ GHASH block
8k
+7 - high
4394 eor3
$acc_mb, $acc_mb, $h34k.16b
, $t6.16b @ GHASH block
8k
+4, 8k
+5 - mid
4395 aese
$ctr4b, $rk7 \n aesmc
$ctr4b, $ctr4b @ AES block
8k
+12 - round
7
4396 eor3
$acc_lb, $acc_lb, $h4.16b
, $h3.16b @ GHASH block
8k
+4, 8k
+5 - low
4398 aese
$ctr3b, $rk6 \n aesmc
$ctr3b, $ctr3b @ AES block
8k
+11 - round
6
4399 aese
$ctr6b, $rk6 \n aesmc
$ctr6b, $ctr6b @ AES block
8k
+14 - round
6
4400 aese
$ctr5b, $rk7 \n aesmc
$ctr5b, $ctr5b @ AES block
8k
+13 - round
7
4402 ldp
$rk8q, $rk9q, [$cc, #128] @ load rk8, rk9
4403 pmull
$h12k.1q
, $res6.1d
, $h12k.1d @ GHASH block
8k
+7 - mid
4404 aese
$ctr2b, $rk7 \n aesmc
$ctr2b, $ctr2b @ AES block
8k
+10 - round
7
4406 ldr
$mod_constantd, [$modulo_constant] @ MODULO
- load modulo constant
4407 eor3
$acc_hb, $acc_hb, $t4.16b
, $t5.16b @ GHASH block
8k
+4, 8k
+5 - high
4408 pmull
$h1.1q
, $res7.1d
, $h1.1d @ GHASH block
8k
+7 - low
4410 aese
$ctr1b, $rk7 \n aesmc
$ctr1b, $ctr1b @ AES block
8k
+9 - round
7
4411 aese
$ctr7b, $rk7 \n aesmc
$ctr7b, $ctr7b @ AES block
8k
+15 - round
7
4412 aese
$ctr6b, $rk7 \n aesmc
$ctr6b, $ctr6b @ AES block
8k
+14 - round
7
4414 eor3
$acc_hb, $acc_hb, $t7.16b
, $t8.16b @ GHASH block
8k
+6, 8k
+7 - high
4415 eor3
$acc_lb, $acc_lb, $h2.16b
, $h1.16b @ GHASH block
8k
+6, 8k
+7 - low
4416 eor3
$acc_mb, $acc_mb, $h12k.16b
, $t9.16b @ GHASH block
8k
+6, 8k
+7 - mid
4418 aese
$ctr0b, $rk7 \n aesmc
$ctr0b, $ctr0b @ AES block
8k
+8 - round
7
4419 aese
$ctr3b, $rk7 \n aesmc
$ctr3b, $ctr3b @ AES block
8k
+11 - round
7
4421 eor3
$acc_mb, $acc_mb, $acc_hb, $acc_lb @ MODULO
- karatsuba tidy up
4422 ext
$t11.16b
, $acc_hb, $acc_hb, #8 @ MODULO - other top alignment
4423 aese
$ctr2b, $rk8 \n aesmc
$ctr2b, $ctr2b @ AES block
8k
+10 - round
8
4425 aese
$ctr6b, $rk8 \n aesmc
$ctr6b, $ctr6b @ AES block
8k
+14 - round
8
4426 aese
$ctr7b, $rk8 \n aesmc
$ctr7b, $ctr7b @ AES block
8k
+15 - round
8
4427 aese
$ctr1b, $rk8 \n aesmc
$ctr1b, $ctr1b @ AES block
8k
+9 - round
8
4429 aese
$ctr3b, $rk8 \n aesmc
$ctr3b, $ctr3b @ AES block
8k
+11 - round
8
4430 pmull
$t12.1q
, $acc_h.1d
, $mod_constant.1d @ MODULO
- top
64b align with mid
4431 aese
$ctr0b, $rk8 \n aesmc
$ctr0b, $ctr0b @ AES block
8k
+8 - round
8
4433 aese
$ctr5b, $rk8 \n aesmc
$ctr5b, $ctr5b @ AES block
8k
+13 - round
8
4434 aese
$ctr4b, $rk8 \n aesmc
$ctr4b, $ctr4b @ AES block
8k
+12 - round
8
4435 ldp
$rk10q, $rk11q, [$cc, #160] @ load rk10, rk11
4437 eor3
$acc_mb, $acc_mb, $t12.16b
, $t11.16b @ MODULO
- fold into mid
4438 aese
$ctr7b, $rk9 \n aesmc
$ctr7b, $ctr7b @ AES block
8k
+15 - round
9
4439 aese
$ctr6b, $rk9 \n aesmc
$ctr6b, $ctr6b @ AES block
8k
+14 - round
9
4441 aese
$ctr5b, $rk9 \n aesmc
$ctr5b, $ctr5b @ AES block
8k
+13 - round
9
4442 aese
$ctr2b, $rk9 \n aesmc
$ctr2b, $ctr2b @ AES block
8k
+10 - round
9
4443 aese
$ctr3b, $rk9 \n aesmc
$ctr3b, $ctr3b @ AES block
8k
+11 - round
9
4445 aese
$ctr0b, $rk9 \n aesmc
$ctr0b, $ctr0b @ AES block
8k
+8 - round
9
4446 aese
$ctr1b, $rk9 \n aesmc
$ctr1b, $ctr1b @ AES block
8k
+9 - round
9
4447 aese
$ctr4b, $rk9 \n aesmc
$ctr4b, $ctr4b @ AES block
8k
+12 - round
9
4449 pmull
$acc_h.1q
, $acc_m.1d
, $mod_constant.1d @ MODULO
- mid
64b align with low
4450 ldr
$rk12q, [$cc, #192] @ load rk12
4451 ext
$t11.16b
, $acc_mb, $acc_mb, #8 @ MODULO - other mid alignment
4453 aese
$ctr2b, $rk10 \n aesmc
$ctr2b, $ctr2b @ AES block
8k
+10 - round
10
4454 aese
$ctr5b, $rk10 \n aesmc
$ctr5b, $ctr5b @ AES block
8k
+13 - round
10
4455 aese
$ctr0b, $rk10 \n aesmc
$ctr0b, $ctr0b @ AES block
8k
+8 - round
10
4457 aese
$ctr4b, $rk10 \n aesmc
$ctr4b, $ctr4b @ AES block
8k
+12 - round
10
4458 aese
$ctr6b, $rk10 \n aesmc
$ctr6b, $ctr6b @ AES block
8k
+14 - round
10
4459 aese
$ctr7b, $rk10 \n aesmc
$ctr7b, $ctr7b @ AES block
8k
+15 - round
10
4461 aese
$ctr0b, $rk11 @ AES block
8k
+8 - round
11
4462 eor3
$acc_lb, $acc_lb, $acc_hb, $t11.16b @ MODULO
- fold into low
4463 aese
$ctr5b, $rk11 @ AES block
8k
+13 - round
11
4465 aese
$ctr2b, $rk11 @ AES block
8k
+10 - round
11
4466 aese
$ctr3b, $rk10 \n aesmc
$ctr3b, $ctr3b @ AES block
8k
+11 - round
10
4467 aese
$ctr1b, $rk10 \n aesmc
$ctr1b, $ctr1b @ AES block
8k
+9 - round
10
4469 aese
$ctr6b, $rk11 @ AES block
8k
+14 - round
11
4470 aese
$ctr4b, $rk11 @ AES block
8k
+12 - round
11
4471 add
$rtmp_ctr.4s
, $rtmp_ctr.4s
, $rctr_inc.4s @ CTR block
8k
+15
4473 aese
$ctr3b, $rk11 @ AES block
8k
+11 - round
11
4474 aese
$ctr1b, $rk11 @ AES block
8k
+9 - round
11
4475 aese
$ctr7b, $rk11 @ AES block
8k
+15 - round
11
4477 .L192_dec_tail
: @ TAIL
4479 sub $main_end_input_ptr, $end_input_ptr, $input_ptr @ main_end_input_ptr is number of bytes left to process
4481 ldp
$h5q, $h56kq, [$current_tag, #128] @ load h5l | h5h
4482 ext
$h5.16b
, $h5.16b
, $h5.16b
, #8
4483 ldr
$res1q, [$input_ptr], #16 @ AES block 8k+8 - load ciphertext
4485 ldp
$h78kq, $h8q, [$current_tag, #192] @ load h8l | h8h
4486 ext
$h8.16b
, $h8.16b
, $h8.16b
, #8
4490 ldp
$h6q, $h7q, [$current_tag, #160] @ load h6l | h6h
4491 ext
$h6.16b
, $h6.16b
, $h6.16b
, #8
4492 ext
$h7.16b
, $h7.16b
, $h7.16b
, #8
4493 ext
$t0.16b
, $acc_lb, $acc_lb, #8 @ prepare final partial tag
4495 eor3
$res4b, $res1b, $ctr0b, $t1.16b @ AES block
8k
+8 - result
4496 cmp $main_end_input_ptr, #112
4497 b
.gt .L192_dec_blocks_more_than_7
4501 sub $rtmp_ctr.4s
, $rtmp_ctr.4s
, $rctr_inc.4s
4507 cmp $main_end_input_ptr, #96
4513 b
.gt .L192_dec_blocks_more_than_6
4522 sub $rtmp_ctr.4s
, $rtmp_ctr.4s
, $rctr_inc.4s
4523 cmp $main_end_input_ptr, #80
4524 b
.gt .L192_dec_blocks_more_than_5
4531 cmp $main_end_input_ptr, #64
4533 sub $rtmp_ctr.4s
, $rtmp_ctr.4s
, $rctr_inc.4s
4534 b
.gt .L192_dec_blocks_more_than_4
4536 sub $rtmp_ctr.4s
, $rtmp_ctr.4s
, $rctr_inc.4s
4541 cmp $main_end_input_ptr, #48
4542 b
.gt .L192_dec_blocks_more_than_3
4544 sub $rtmp_ctr.4s
, $rtmp_ctr.4s
, $rctr_inc.4s
4546 cmp $main_end_input_ptr, #32
4549 ldr
$h34kq, [$current_tag, #96] @ load h4k | h3k
4550 b
.gt .L192_dec_blocks_more_than_2
4552 sub $rtmp_ctr.4s
, $rtmp_ctr.4s
, $rctr_inc.4s
4555 cmp $main_end_input_ptr, #16
4556 b
.gt .L192_dec_blocks_more_than_1
4558 sub $rtmp_ctr.4s
, $rtmp_ctr.4s
, $rctr_inc.4s
4559 ldr
$h12kq, [$current_tag, #48] @ load h2k | h1k
4560 b
.L192_dec_blocks_less_than_1
4561 .L192_dec_blocks_more_than_7
: @ blocks left
> 7
4562 rev64
$res0b, $res1b @ GHASH final
-7 block
4564 ins
$acc_m.d
[0], $h78k.d
[1] @ GHASH final
-7 block
- mid
4565 eor
$res0b, $res0b, $t0.16b @ feed
in partial tag
4567 pmull2
$acc_h.1q
, $res0.2d
, $h8.2d @ GHASH final
-7 block
- high
4568 ins
$rk4v.d
[0], $res0.d
[1] @ GHASH final
-7 block
- mid
4569 ldr
$res1q, [$input_ptr], #16 @ AES final-6 block - load ciphertext
4571 pmull
$acc_l.1q
, $res0.1d
, $h8.1d @ GHASH final
-7 block
- low
4573 eor
$rk4v.8b
, $rk4v.8b
, $res0.8b @ GHASH final
-7 block
- mid
4574 st1
{ $res4b}, [$output_ptr], #16 @ AES final-7 block - store result
4576 eor3
$res4b, $res1b, $ctr1b, $t1.16b @ AES final
-6 block
- result
4578 pmull
$acc_m.1q
, $rk4v.1d
, $acc_m.1d @ GHASH final
-7 block
- mid
4579 movi
$t0.8b
, #0 @ supress further partial tag feed in
4580 .L192_dec_blocks_more_than_6
: @ blocks left
> 6
4582 rev64
$res0b, $res1b @ GHASH final
-6 block
4584 eor
$res0b, $res0b, $t0.16b @ feed
in partial tag
4586 ldr
$res1q, [$input_ptr], #16 @ AES final-5 block - load ciphertext
4587 ins
$rk4v.d
[0], $res0.d
[1] @ GHASH final
-6 block
- mid
4589 eor
$rk4v.8b
, $rk4v.8b
, $res0.8b @ GHASH final
-6 block
- mid
4590 movi
$t0.8b
, #0 @ supress further partial tag feed in
4591 pmull2
$rk2q1, $res0.2d
, $h7.2d @ GHASH final
-6 block
- high
4593 st1
{ $res4b}, [$output_ptr], #16 @ AES final-6 block - store result
4594 eor3
$res4b, $res1b, $ctr2b, $t1.16b @ AES final
-5 block
- result
4596 eor
$acc_hb, $acc_hb, $rk2 @ GHASH final
-6 block
- high
4597 pmull
$rk4v.1q
, $rk4v.1d
, $h78k.1d @ GHASH final
-6 block
- mid
4598 pmull
$rk3q1, $res0.1d
, $h7.1d @ GHASH final
-6 block
- low
4600 eor
$acc_mb, $acc_mb, $rk4v.16b @ GHASH final
-6 block
- mid
4601 eor
$acc_lb, $acc_lb, $rk3 @ GHASH final
-6 block
- low
4602 .L192_dec_blocks_more_than_5
: @ blocks left
> 5
4604 rev64
$res0b, $res1b @ GHASH final
-5 block
4606 eor
$res0b, $res0b, $t0.16b @ feed
in partial tag
4608 ins
$rk4v.d
[0], $res0.d
[1] @ GHASH final
-5 block
- mid
4610 eor
$rk4v.8b
, $rk4v.8b
, $res0.8b @ GHASH final
-5 block
- mid
4612 ins
$rk4v.d
[1], $rk4v.d
[0] @ GHASH final
-5 block
- mid
4613 pmull2
$rk2q1, $res0.2d
, $h6.2d @ GHASH final
-5 block
- high
4615 ldr
$res1q, [$input_ptr], #16 @ AES final-4 block - load ciphertext
4617 eor
$acc_hb, $acc_hb, $rk2 @ GHASH final
-5 block
- high
4618 pmull
$rk3q1, $res0.1d
, $h6.1d @ GHASH final
-5 block
- low
4620 pmull2
$rk4v.1q
, $rk4v.2d
, $h56k.2d @ GHASH final
-5 block
- mid
4622 eor
$acc_lb, $acc_lb, $rk3 @ GHASH final
-5 block
- low
4623 movi
$t0.8b
, #0 @ supress further partial tag feed in
4624 st1
{ $res4b}, [$output_ptr], #16 @ AES final-5 block - store result
4626 eor
$acc_mb, $acc_mb, $rk4v.16b @ GHASH final
-5 block
- mid
4627 eor3
$res4b, $res1b, $ctr3b, $t1.16b @ AES final
-4 block
- result
4628 .L192_dec_blocks_more_than_4
: @ blocks left
> 4
4630 rev64
$res0b, $res1b @ GHASH final
-4 block
4632 eor
$res0b, $res0b, $t0.16b @ feed
in partial tag
4633 movi
$t0.8b
, #0 @ supress further partial tag feed in
4635 ldr
$res1q, [$input_ptr], #16 @ AES final-3 block - load ciphertext
4636 ins
$rk4v.d
[0], $res0.d
[1] @ GHASH final
-4 block
- mid
4637 pmull
$rk3q1, $res0.1d
, $h5.1d @ GHASH final
-4 block
- low
4639 eor
$rk4v.8b
, $rk4v.8b
, $res0.8b @ GHASH final
-4 block
- mid
4641 eor
$acc_lb, $acc_lb, $rk3 @ GHASH final
-4 block
- low
4643 pmull
$rk4v.1q
, $rk4v.1d
, $h56k.1d @ GHASH final
-4 block
- mid
4644 st1
{ $res4b}, [$output_ptr], #16 @ AES final-4 block - store result
4645 pmull2
$rk2q1, $res0.2d
, $h5.2d @ GHASH final
-4 block
- high
4647 eor3
$res4b, $res1b, $ctr4b, $t1.16b @ AES final
-3 block
- result
4649 eor
$acc_mb, $acc_mb, $rk4v.16b @ GHASH final
-4 block
- mid
4650 eor
$acc_hb, $acc_hb, $rk2 @ GHASH final
-4 block
- high
4651 .L192_dec_blocks_more_than_3
: @ blocks left
> 3
4653 ldr
$h4q, [$current_tag, #112] @ load h4l | h4h
4654 ext
$h4.16b
, $h4.16b
, $h4.16b
, #8
4655 rev64
$res0b, $res1b @ GHASH final
-3 block
4656 ldr
$res1q, [$input_ptr], #16 @ AES final-2 block - load ciphertext
4658 eor
$res0b, $res0b, $t0.16b @ feed
in partial tag
4660 ins
$rk4v.d
[0], $res0.d
[1] @ GHASH final
-3 block
- mid
4661 pmull2
$rk2q1, $res0.2d
, $h4.2d @ GHASH final
-3 block
- high
4663 eor
$acc_hb, $acc_hb, $rk2 @ GHASH final
-3 block
- high
4664 movi
$t0.8b
, #0 @ supress further partial tag feed in
4665 pmull
$rk3q1, $res0.1d
, $h4.1d @ GHASH final
-3 block
- low
4667 st1
{ $res4b}, [$output_ptr], #16 @ AES final-3 block - store result
4668 eor
$rk4v.8b
, $rk4v.8b
, $res0.8b @ GHASH final
-3 block
- mid
4669 eor3
$res4b, $res1b, $ctr5b, $t1.16b @ AES final
-2 block
- result
4671 eor
$acc_lb, $acc_lb, $rk3 @ GHASH final
-3 block
- low
4672 ldr
$h34kq, [$current_tag, #96] @ load h4k | h3k
4674 ins
$rk4v.d
[1], $rk4v.d
[0] @ GHASH final
-3 block
- mid
4676 pmull2
$rk4v.1q
, $rk4v.2d
, $h34k.2d @ GHASH final
-3 block
- mid
4678 eor
$acc_mb, $acc_mb, $rk4v.16b @ GHASH final
-3 block
- mid
4679 .L192_dec_blocks_more_than_2
: @ blocks left
> 2
4681 rev64
$res0b, $res1b @ GHASH final
-2 block
4682 ldr
$h3q, [$current_tag, #80] @ load h3l | h3h
4683 ext
$h3.16b
, $h3.16b
, $h3.16b
, #8
4685 eor
$res0b, $res0b, $t0.16b @ feed
in partial tag
4687 ins
$rk4v.d
[0], $res0.d
[1] @ GHASH final
-2 block
- mid
4688 ldr
$res1q, [$input_ptr], #16 @ AES final-1 block - load ciphertext
4690 pmull2
$rk2q1, $res0.2d
, $h3.2d @ GHASH final
-2 block
- high
4692 eor
$rk4v.8b
, $rk4v.8b
, $res0.8b @ GHASH final
-2 block
- mid
4694 eor
$acc_hb, $acc_hb, $rk2 @ GHASH final
-2 block
- high
4695 pmull
$rk3q1, $res0.1d
, $h3.1d @ GHASH final
-2 block
- low
4697 pmull
$rk4v.1q
, $rk4v.1d
, $h34k.1d @ GHASH final
-2 block
- mid
4698 movi
$t0.8b
, #0 @ supress further partial tag feed in
4700 eor
$acc_lb, $acc_lb, $rk3 @ GHASH final
-2 block
- low
4701 st1
{ $res4b}, [$output_ptr], #16 @ AES final-2 block - store result
4703 eor
$acc_mb, $acc_mb, $rk4v.16b @ GHASH final
-2 block
- mid
4704 eor3
$res4b, $res1b, $ctr6b, $t1.16b @ AES final
-1 block
- result
4705 .L192_dec_blocks_more_than_1
: @ blocks left
> 1
4707 rev64
$res0b, $res1b @ GHASH final
-1 block
4708 ldr
$res1q, [$input_ptr], #16 @ AES final block - load ciphertext
4709 ldr
$h2q, [$current_tag, #64] @ load h1l | h1h
4710 ext
$h2.16b
, $h2.16b
, $h2.16b
, #8
4712 eor
$res0b, $res0b, $t0.16b @ feed
in partial tag
4713 movi
$t0.8b
, #0 @ supress further partial tag feed in
4714 ldr
$h12kq, [$current_tag, #48] @ load h2k | h1k
4716 pmull
$rk3q1, $res0.1d
, $h2.1d @ GHASH final
-1 block
- low
4717 ins
$rk4v.d
[0], $res0.d
[1] @ GHASH final
-1 block
- mid
4718 st1
{ $res4b}, [$output_ptr], #16 @ AES final-1 block - store result
4720 pmull2
$rk2q1, $res0.2d
, $h2.2d @ GHASH final
-1 block
- high
4722 eor3
$res4b, $res1b, $ctr7b, $t1.16b @ AES final block
- result
4724 eor
$rk4v.8b
, $rk4v.8b
, $res0.8b @ GHASH final
-1 block
- mid
4726 ins
$rk4v.d
[1], $rk4v.d
[0] @ GHASH final
-1 block
- mid
4728 pmull2
$rk4v.1q
, $rk4v.2d
, $h12k.2d @ GHASH final
-1 block
- mid
4730 eor
$acc_lb, $acc_lb, $rk3 @ GHASH final
-1 block
- low
4732 eor
$acc_mb, $acc_mb, $rk4v.16b @ GHASH final
-1 block
- mid
4733 eor
$acc_hb, $acc_hb, $rk2 @ GHASH final
-1 block
- high
4734 .L192_dec_blocks_less_than_1
: @ blocks left
<= 1
4736 rev32
$rtmp_ctr.16b
, $rtmp_ctr.16b
4737 and $bit_length, $bit_length, #127 @ bit_length %= 128
4739 sub $bit_length, $bit_length, #128 @ bit_length -= 128
4740 str
$rtmp_ctrq, [$counter] @ store the updated counter
4742 neg
$bit_length, $bit_length @ bit_length
= 128 - #bits in input (in range [1,128])
4743 mvn
$temp0_x, xzr @ temp0_x
= 0xffffffffffffffff
4745 and $bit_length, $bit_length, #127 @ bit_length %= 128
4747 mvn
$temp1_x, xzr @ temp1_x
= 0xffffffffffffffff
4748 lsr
$temp0_x, $temp0_x, $bit_length @ temp0_x is mask
for top
64b of
last block
4749 cmp $bit_length, #64
4751 csel
$temp2_x, $temp1_x, $temp0_x, lt
4752 csel
$temp3_x, $temp0_x, xzr
, lt
4753 ldr
$h1q, [$current_tag, #32] @ load h1l | h1h
4754 ext
$h1.16b
, $h1.16b
, $h1.16b
, #8
4756 mov
$ctr0.d
[1], $temp3_x
4757 ld1
{ $rk0}, [$output_ptr] @ load existing bytes where the possibly partial
last block is to be stored
4759 mov
$ctr0.d
[0], $temp2_x @ ctr0b is mask
for last block
4761 and $res1b, $res1b, $ctr0b @ possibly partial
last block has zeroes
in highest bits
4762 bif
$res4b, $rk0, $ctr0b @ insert existing bytes
in top end of result before storing
4764 rev64
$res0b, $res1b @ GHASH final block
4766 st1
{ $res4b}, [$output_ptr] @ store all
16B
4768 eor
$res0b, $res0b, $t0.16b @ feed
in partial tag
4770 ins
$t0.d
[0], $res0.d
[1] @ GHASH final block
- mid
4771 pmull
$rk3q1, $res0.1d
, $h1.1d @ GHASH final block
- low
4773 eor
$t0.8b
, $t0.8b
, $res0.8b @ GHASH final block
- mid
4774 pmull2
$rk2q1, $res0.2d
, $h1.2d @ GHASH final block
- high
4775 eor
$acc_lb, $acc_lb, $rk3 @ GHASH final block
- low
4777 pmull
$t0.1q
, $t0.1d
, $h12k.1d @ GHASH final block
- mid
4778 eor
$acc_hb, $acc_hb, $rk2 @ GHASH final block
- high
4780 eor
$t10.16b
, $acc_hb, $acc_lb @ MODULO
- karatsuba tidy up
4781 eor
$acc_mb, $acc_mb, $t0.16b @ GHASH final block
- mid
4782 ldr
$mod_constantd, [$modulo_constant] @ MODULO
- load modulo constant
4784 pmull
$t11.1q
, $acc_h.1d
, $mod_constant.1d @ MODULO
- top
64b align with mid
4785 ext
$acc_hb, $acc_hb, $acc_hb, #8 @ MODULO - other top alignment
4787 eor
$acc_mb, $acc_mb, $t10.16b @ MODULO
- karatsuba tidy up
4789 eor3
$acc_mb, $acc_mb, $acc_hb, $t11.16b @ MODULO
- fold into mid
4791 pmull
$acc_h.1q
, $acc_m.1d
, $mod_constant.1d @ MODULO
- mid
64b align with low
4792 ext
$acc_mb, $acc_mb, $acc_mb, #8 @ MODULO - other mid alignment
4794 eor3
$acc_lb, $acc_lb, $acc_mb, $acc_hb @ MODULO
- fold into low
4795 ext
$acc_lb, $acc_lb, $acc_lb, #8
4796 rev64
$acc_lb, $acc_lb
4797 st1
{ $acc_l.16b
}, [$current_tag]
4799 ldp d10
, d11
, [sp
, #16]
4800 ldp d12
, d13
, [sp
, #32]
4801 ldp d14
, d15
, [sp
, #48]
4802 ldp d8
, d9
, [sp
], #80
4808 .size unroll8_eor3_aes_gcm_dec_192_kernel
,.-unroll8_eor3_aes_gcm_dec_192_kernel
4814 my ($end_input_ptr,$main_end_input_ptr,$temp0_x,$temp1_x)=map("x$_",(4..7));
4815 my ($temp2_x,$temp3_x)=map("x$_",(13..14));
4816 my ($ctr0b,$ctr1b,$ctr2b,$ctr3b,$ctr4b,$ctr5b,$ctr6b,$ctr7b,$res0b,$res1b,$res2b,$res3b,$res4b,$res5b,$res6b,$res7b)=map("v$_.16b",(0..15));
4817 my ($ctr0,$ctr1,$ctr2,$ctr3,$ctr4,$ctr5,$ctr6,$ctr7,$res0,$res1,$res2,$res3,$res4,$res5,$res6,$res7)=map("v$_",(0..15));
4818 my ($ctr0d,$ctr1d,$ctr2d,$ctr3d,$ctr4d,$ctr5d,$ctr6d,$ctr7d)=map("d$_",(0..7));
4819 my ($ctr0q,$ctr1q,$ctr2q,$ctr3q,$ctr4q,$ctr5q,$ctr6q,$ctr7q)=map("q$_",(0..7));
4820 my ($res0q,$res1q,$res2q,$res3q,$res4q,$res5q,$res6q,$res7q)=map("q$_",(8..15));
4822 my ($ctr_t0,$ctr_t1,$ctr_t2,$ctr_t3,$ctr_t4,$ctr_t5,$ctr_t6,$ctr_t7)=map("v$_",(8..15));
4823 my ($ctr_t0b,$ctr_t1b,$ctr_t2b,$ctr_t3b,$ctr_t4b,$ctr_t5b,$ctr_t6b,$ctr_t7b)=map("v$_.16b",(8..15));
4824 my ($ctr_t0q,$ctr_t1q,$ctr_t2q,$ctr_t3q,$ctr_t4q,$ctr_t5q,$ctr_t6q,$ctr_t7q)=map("q$_",(8..15));
4826 my ($acc_hb,$acc_mb,$acc_lb)=map("v$_.16b",(17..19));
4827 my ($acc_h,$acc_m,$acc_l)=map("v$_",(17..19));
4829 my ($h1,$h12k,$h2,$h3,$h34k,$h4)=map("v$_",(20..25));
4830 my ($h5,$h56k,$h6,$h7,$h78k,$h8)=map("v$_",(20..25));
4831 my ($h1q,$h12kq,$h2q,$h3q,$h34kq,$h4q)=map("q$_",(20..25));
4832 my ($h5q,$h56kq,$h6q,$h7q,$h78kq,$h8q)=map("q$_",(20..25));
4854 my $rtmp_ctrq="q30";
4856 my $rctr_incd="d31";
4858 my $mod_constantd=$t0d;
4859 my $mod_constant=$t0;
4861 my ($rk0,$rk1,$rk2)=map("v$_.16b",(26..28));
4862 my ($rk3,$rk4,$rk5)=map("v$_.16b",(26..28));
4863 my ($rk6,$rk7,$rk8)=map("v$_.16b",(26..28));
4864 my ($rk9,$rk10,$rk11)=map("v$_.16b",(26..28));
4865 my ($rk12,$rk13,$rk14)=map("v$_.16b",(26..28));
4866 my ($rk0q,$rk1q,$rk2q)=map("q$_",(26..28));
4867 my ($rk3q,$rk4q,$rk5q)=map("q$_",(26..28));
4868 my ($rk6q,$rk7q,$rk8q)=map("q$_",(26..28));
4869 my ($rk9q,$rk10q,$rk11q)=map("q$_",(26..28));
4870 my ($rk12q,$rk13q,$rk14q)=map("q$_",(26..28));
4874 #########################################################################################
4875 # size_t unroll8_eor3_aes_gcm_enc_256_kernel(const unsigned char *in,
4877 # unsigned char *out,
4879 # unsigned char ivec[16],
4883 .global unroll8_eor3_aes_gcm_enc_256_kernel
4884 .type unroll8_eor3_aes_gcm_enc_256_kernel
,%function
4886 unroll8_eor3_aes_gcm_enc_256_kernel
:
4887 AARCH64_VALID_CALL_TARGET
4888 cbz x1
, .L256_enc_ret
4889 stp d8
, d9
, [sp
, #-80]!
4892 stp d10
, d11
, [sp
, #16]
4893 stp d12
, d13
, [sp
, #32]
4894 stp d14
, d15
, [sp
, #48]
4895 mov x5
, #0xc200000000000000
4896 stp x5
, xzr
, [sp
, #64]
4897 add
$modulo_constant, sp
, #64
4899 ld1
{ $ctr0b}, [$counter] @ CTR block
0
4901 lsr
$main_end_input_ptr, $bit_length, #3 @ byte_len
4903 mov
$constant_temp, #0x100000000 @ set up counter increment
4904 movi
$rctr_inc.16b
, #0x0
4905 mov
$rctr_inc.d
[1], $constant_temp
4906 sub $main_end_input_ptr, $main_end_input_ptr, #1 @ byte_len - 1
4908 and $main_end_input_ptr, $main_end_input_ptr, #0xffffffffffffff80 @ number of bytes to be processed in main loop (at least 1 byte must be handled by tail)
4910 add
$main_end_input_ptr, $main_end_input_ptr, $input_ptr
4912 rev32
$rtmp_ctr.16b
, $ctr0.16b @ set up reversed counter
4914 add
$rtmp_ctr.4s
, $rtmp_ctr.4s
, $rctr_inc.4s @ CTR block
0
4916 rev32
$ctr1.16b
, $rtmp_ctr.16b @ CTR block
1
4917 add
$rtmp_ctr.4s
, $rtmp_ctr.4s
, $rctr_inc.4s @ CTR block
1
4919 rev32
$ctr2.16b
, $rtmp_ctr.16b @ CTR block
2
4920 add
$rtmp_ctr.4s
, $rtmp_ctr.4s
, $rctr_inc.4s @ CTR block
2
4922 rev32
$ctr3.16b
, $rtmp_ctr.16b @ CTR block
3
4923 add
$rtmp_ctr.4s
, $rtmp_ctr.4s
, $rctr_inc.4s @ CTR block
3
4925 rev32
$ctr4.16b
, $rtmp_ctr.16b @ CTR block
4
4926 add
$rtmp_ctr.4s
, $rtmp_ctr.4s
, $rctr_inc.4s @ CTR block
4
4928 rev32
$ctr5.16b
, $rtmp_ctr.16b @ CTR block
5
4929 add
$rtmp_ctr.4s
, $rtmp_ctr.4s
, $rctr_inc.4s @ CTR block
5
4930 ldp
$rk0q, $rk1q, [$cc, #0] @ load rk0, rk1
4932 rev32
$ctr6.16b
, $rtmp_ctr.16b @ CTR block
6
4933 add
$rtmp_ctr.4s
, $rtmp_ctr.4s
, $rctr_inc.4s @ CTR block
6
4935 rev32
$ctr7.16b
, $rtmp_ctr.16b @ CTR block
7
4937 aese
$ctr3b, $rk0 \n aesmc
$ctr3b, $ctr3b @ AES block
3 - round
0
4938 aese
$ctr4b, $rk0 \n aesmc
$ctr4b, $ctr4b @ AES block
4 - round
0
4939 aese
$ctr2b, $rk0 \n aesmc
$ctr2b, $ctr2b @ AES block
2 - round
0
4941 aese
$ctr0b, $rk0 \n aesmc
$ctr0b, $ctr0b @ AES block
0 - round
0
4942 aese
$ctr1b, $rk0 \n aesmc
$ctr1b, $ctr1b @ AES block
1 - round
0
4943 aese
$ctr6b, $rk0 \n aesmc
$ctr6b, $ctr6b @ AES block
6 - round
0
4945 aese
$ctr5b, $rk0 \n aesmc
$ctr5b, $ctr5b @ AES block
5 - round
0
4946 aese
$ctr7b, $rk0 \n aesmc
$ctr7b, $ctr7b @ AES block
7 - round
0
4947 ldp
$rk2q, $rk3q, [$cc, #32] @ load rk2, rk3
4949 aese
$ctr4b, $rk1 \n aesmc
$ctr4b, $ctr4b @ AES block
4 - round
1
4950 aese
$ctr1b, $rk1 \n aesmc
$ctr1b, $ctr1b @ AES block
1 - round
1
4951 aese
$ctr3b, $rk1 \n aesmc
$ctr3b, $ctr3b @ AES block
3 - round
1
4953 aese
$ctr6b, $rk1 \n aesmc
$ctr6b, $ctr6b @ AES block
6 - round
1
4954 aese
$ctr5b, $rk1 \n aesmc
$ctr5b, $ctr5b @ AES block
5 - round
1
4956 aese
$ctr2b, $rk1 \n aesmc
$ctr2b, $ctr2b @ AES block
2 - round
1
4958 aese
$ctr7b, $rk1 \n aesmc
$ctr7b, $ctr7b @ AES block
7 - round
1
4960 aese
$ctr2b, $rk2 \n aesmc
$ctr2b, $ctr2b @ AES block
2 - round
2
4961 aese
$ctr3b, $rk2 \n aesmc
$ctr3b, $ctr3b @ AES block
3 - round
2
4962 aese
$ctr0b, $rk1 \n aesmc
$ctr0b, $ctr0b @ AES block
0 - round
1
4964 aese
$ctr7b, $rk2 \n aesmc
$ctr7b, $ctr7b @ AES block
7 - round
2
4965 aese
$ctr6b, $rk2 \n aesmc
$ctr6b, $ctr6b @ AES block
6 - round
2
4966 aese
$ctr5b, $rk2 \n aesmc
$ctr5b, $ctr5b @ AES block
5 - round
2
4968 aese
$ctr4b, $rk2 \n aesmc
$ctr4b, $ctr4b @ AES block
4 - round
2
4969 aese
$ctr0b, $rk2 \n aesmc
$ctr0b, $ctr0b @ AES block
0 - round
2
4970 aese
$ctr1b, $rk2 \n aesmc
$ctr1b, $ctr1b @ AES block
1 - round
2
4972 aese
$ctr5b, $rk3 \n aesmc
$ctr5b, $ctr5b @ AES block
5 - round
3
4973 aese
$ctr3b, $rk3 \n aesmc
$ctr3b, $ctr3b @ AES block
3 - round
3
4974 ldp
$rk4q, $rk5q, [$cc, #64] @ load rk4, rk5
4976 aese
$ctr4b, $rk3 \n aesmc
$ctr4b, $ctr4b @ AES block
4 - round
3
4978 aese
$ctr1b, $rk3 \n aesmc
$ctr1b, $ctr1b @ AES block
1 - round
3
4979 aese
$ctr6b, $rk3 \n aesmc
$ctr6b, $ctr6b @ AES block
6 - round
3
4980 aese
$ctr7b, $rk3 \n aesmc
$ctr7b, $ctr7b @ AES block
7 - round
3
4982 aese
$ctr2b, $rk3 \n aesmc
$ctr2b, $ctr2b @ AES block
2 - round
3
4983 aese
$ctr0b, $rk3 \n aesmc
$ctr0b, $ctr0b @ AES block
0 - round
3
4985 aese
$ctr4b, $rk4 \n aesmc
$ctr4b, $ctr4b @ AES block
4 - round
4
4986 aese
$ctr6b, $rk4 \n aesmc
$ctr6b, $ctr6b @ AES block
6 - round
4
4987 aese
$ctr1b, $rk4 \n aesmc
$ctr1b, $ctr1b @ AES block
1 - round
4
4989 aese
$ctr2b, $rk4 \n aesmc
$ctr2b, $ctr2b @ AES block
2 - round
4
4990 aese
$ctr0b, $rk4 \n aesmc
$ctr0b, $ctr0b @ AES block
0 - round
4
4992 aese
$ctr3b, $rk4 \n aesmc
$ctr3b, $ctr3b @ AES block
3 - round
4
4993 aese
$ctr7b, $rk4 \n aesmc
$ctr7b, $ctr7b @ AES block
7 - round
4
4994 aese
$ctr5b, $rk4 \n aesmc
$ctr5b, $ctr5b @ AES block
5 - round
4
4996 aese
$ctr0b, $rk5 \n aesmc
$ctr0b, $ctr0b @ AES block
0 - round
5
4997 aese
$ctr2b, $rk5 \n aesmc
$ctr2b, $ctr2b @ AES block
2 - round
5
4998 ldp
$rk6q, $rk7q, [$cc, #96] @ load rk6, rk7
5000 aese
$ctr1b, $rk5 \n aesmc
$ctr1b, $ctr1b @ AES block
1 - round
5
5001 aese
$ctr4b, $rk5 \n aesmc
$ctr4b, $ctr4b @ AES block
4 - round
5
5002 aese
$ctr5b, $rk5 \n aesmc
$ctr5b, $ctr5b @ AES block
5 - round
5
5004 aese
$ctr3b, $rk5 \n aesmc
$ctr3b, $ctr3b @ AES block
3 - round
5
5005 aese
$ctr6b, $rk5 \n aesmc
$ctr6b, $ctr6b @ AES block
6 - round
5
5006 aese
$ctr7b, $rk5 \n aesmc
$ctr7b, $ctr7b @ AES block
7 - round
5
5008 aese
$ctr1b, $rk6 \n aesmc
$ctr1b, $ctr1b @ AES block
1 - round
6
5009 aese
$ctr5b, $rk6 \n aesmc
$ctr5b, $ctr5b @ AES block
5 - round
6
5010 aese
$ctr4b, $rk6 \n aesmc
$ctr4b, $ctr4b @ AES block
4 - round
6
5012 aese
$ctr2b, $rk6 \n aesmc
$ctr2b, $ctr2b @ AES block
2 - round
6
5013 aese
$ctr6b, $rk6 \n aesmc
$ctr6b, $ctr6b @ AES block
6 - round
6
5014 aese
$ctr0b, $rk6 \n aesmc
$ctr0b, $ctr0b @ AES block
0 - round
6
5016 aese
$ctr7b, $rk6 \n aesmc
$ctr7b, $ctr7b @ AES block
7 - round
6
5017 aese
$ctr3b, $rk6 \n aesmc
$ctr3b, $ctr3b @ AES block
3 - round
6
5018 ldp
$rk8q, $rk9q, [$cc, #128] @ load rk8, rk9
5020 aese
$ctr2b, $rk7 \n aesmc
$ctr2b, $ctr2b @ AES block
2 - round
7
5021 aese
$ctr0b, $rk7 \n aesmc
$ctr0b, $ctr0b @ AES block
0 - round
7
5023 aese
$ctr7b, $rk7 \n aesmc
$ctr7b, $ctr7b @ AES block
7 - round
7
5024 aese
$ctr6b, $rk7 \n aesmc
$ctr6b, $ctr6b @ AES block
6 - round
7
5025 aese
$ctr1b, $rk7 \n aesmc
$ctr1b, $ctr1b @ AES block
1 - round
7
5027 aese
$ctr5b, $rk7 \n aesmc
$ctr5b, $ctr5b @ AES block
5 - round
7
5028 aese
$ctr3b, $rk7 \n aesmc
$ctr3b, $ctr3b @ AES block
3 - round
7
5030 aese
$ctr4b, $rk7 \n aesmc
$ctr4b, $ctr4b @ AES block
4 - round
7
5032 aese
$ctr6b, $rk8 \n aesmc
$ctr6b, $ctr6b @ AES block
6 - round
8
5033 aese
$ctr1b, $rk8 \n aesmc
$ctr1b, $ctr1b @ AES block
1 - round
8
5035 aese
$ctr3b, $rk8 \n aesmc
$ctr3b, $ctr3b @ AES block
3 - round
8
5036 aese
$ctr0b, $rk8 \n aesmc
$ctr0b, $ctr0b @ AES block
0 - round
8
5037 aese
$ctr7b, $rk8 \n aesmc
$ctr7b, $ctr7b @ AES block
7 - round
8
5039 aese
$ctr5b, $rk8 \n aesmc
$ctr5b, $ctr5b @ AES block
5 - round
8
5040 aese
$ctr4b, $rk8 \n aesmc
$ctr4b, $ctr4b @ AES block
4 - round
8
5041 aese
$ctr2b, $rk8 \n aesmc
$ctr2b, $ctr2b @ AES block
2 - round
8
5043 ld1
{ $acc_lb}, [$current_tag]
5044 ext
$acc_lb, $acc_lb, $acc_lb, #8
5045 rev64
$acc_lb, $acc_lb
5046 ldp
$rk10q, $rk11q, [$cc, #160] @ load rk10, rk11
5048 aese
$ctr6b, $rk9 \n aesmc
$ctr6b, $ctr6b @ AES block
6 - round
9
5049 aese
$ctr7b, $rk9 \n aesmc
$ctr7b, $ctr7b @ AES block
7 - round
9
5050 aese
$ctr3b, $rk9 \n aesmc
$ctr3b, $ctr3b @ AES block
3 - round
9
5052 aese
$ctr4b, $rk9 \n aesmc
$ctr4b, $ctr4b @ AES block
4 - round
9
5053 aese
$ctr5b, $rk9 \n aesmc
$ctr5b, $ctr5b @ AES block
5 - round
9
5054 aese
$ctr2b, $rk9 \n aesmc
$ctr2b, $ctr2b @ AES block
2 - round
9
5056 aese
$ctr1b, $rk9 \n aesmc
$ctr1b, $ctr1b @ AES block
1 - round
9
5058 aese
$ctr7b, $rk10 \n aesmc
$ctr7b, $ctr7b @ AES block
7 - round
10
5059 aese
$ctr4b, $rk10 \n aesmc
$ctr4b, $ctr4b @ AES block
4 - round
10
5060 aese
$ctr0b, $rk9 \n aesmc
$ctr0b, $ctr0b @ AES block
0 - round
9
5062 aese
$ctr1b, $rk10 \n aesmc
$ctr1b, $ctr1b @ AES block
1 - round
10
5063 aese
$ctr5b, $rk10 \n aesmc
$ctr5b, $ctr5b @ AES block
5 - round
10
5064 aese
$ctr3b, $rk10 \n aesmc
$ctr3b, $ctr3b @ AES block
3 - round
10
5066 aese
$ctr2b, $rk10 \n aesmc
$ctr2b, $ctr2b @ AES block
2 - round
10
5067 aese
$ctr0b, $rk10 \n aesmc
$ctr0b, $ctr0b @ AES block
0 - round
10
5068 aese
$ctr6b, $rk10 \n aesmc
$ctr6b, $ctr6b @ AES block
6 - round
10
5070 aese
$ctr4b, $rk11 \n aesmc
$ctr4b, $ctr4b @ AES block
4 - round
11
5071 ldp
$rk12q, $rk13q, [$cc, #192] @ load rk12, rk13
5072 aese
$ctr5b, $rk11 \n aesmc
$ctr5b, $ctr5b @ AES block
5 - round
11
5074 aese
$ctr2b, $rk11 \n aesmc
$ctr2b, $ctr2b @ AES block
2 - round
11
5075 aese
$ctr6b, $rk11 \n aesmc
$ctr6b, $ctr6b @ AES block
6 - round
11
5076 aese
$ctr1b, $rk11 \n aesmc
$ctr1b, $ctr1b @ AES block
1 - round
11
5078 aese
$ctr0b, $rk11 \n aesmc
$ctr0b, $ctr0b @ AES block
0 - round
11
5079 aese
$ctr3b, $rk11 \n aesmc
$ctr3b, $ctr3b @ AES block
3 - round
11
5080 aese
$ctr7b, $rk11 \n aesmc
$ctr7b, $ctr7b @ AES block
7 - round
11
5082 add
$rtmp_ctr.4s
, $rtmp_ctr.4s
, $rctr_inc.4s @ CTR block
7
5083 ldr
$rk14q, [$cc, #224] @ load rk14
5085 aese
$ctr4b, $rk12 \n aesmc
$ctr4b, $ctr4b @ AES block
4 - round
12
5086 aese
$ctr2b, $rk12 \n aesmc
$ctr2b, $ctr2b @ AES block
2 - round
12
5087 aese
$ctr1b, $rk12 \n aesmc
$ctr1b, $ctr1b @ AES block
1 - round
12
5089 aese
$ctr0b, $rk12 \n aesmc
$ctr0b, $ctr0b @ AES block
0 - round
12
5090 aese
$ctr5b, $rk12 \n aesmc
$ctr5b, $ctr5b @ AES block
5 - round
12
5091 aese
$ctr3b, $rk12 \n aesmc
$ctr3b, $ctr3b @ AES block
3 - round
12
5093 aese
$ctr2b, $rk13 @ AES block
2 - round
13
5094 aese
$ctr1b, $rk13 @ AES block
1 - round
13
5095 aese
$ctr4b, $rk13 @ AES block
4 - round
13
5097 aese
$ctr6b, $rk12 \n aesmc
$ctr6b, $ctr6b @ AES block
6 - round
12
5098 aese
$ctr7b, $rk12 \n aesmc
$ctr7b, $ctr7b @ AES block
7 - round
12
5100 aese
$ctr0b, $rk13 @ AES block
0 - round
13
5101 aese
$ctr5b, $rk13 @ AES block
5 - round
13
5103 aese
$ctr6b, $rk13 @ AES block
6 - round
13
5104 aese
$ctr7b, $rk13 @ AES block
7 - round
13
5105 aese
$ctr3b, $rk13 @ AES block
3 - round
13
5107 add
$end_input_ptr, $input_ptr, $bit_length, lsr
#3 @ end_input_ptr
5108 cmp $input_ptr, $main_end_input_ptr @ check
if we have
<= 8 blocks
5109 b
.ge .L256_enc_tail @ handle tail
5111 ldp
$ctr_t0q, $ctr_t1q, [$input_ptr], #32 @ AES block 0, 1 - load plaintext
5113 ldp
$ctr_t2q, $ctr_t3q, [$input_ptr], #32 @ AES block 2, 3 - load plaintext
5115 eor3
$res0b, $ctr_t0b, $ctr0b, $rk14 @ AES block
0 - result
5116 rev32
$ctr0.16b
, $rtmp_ctr.16b @ CTR block
8
5117 add
$rtmp_ctr.4s
, $rtmp_ctr.4s
, $rctr_inc.4s @ CTR block
8
5119 eor3
$res1b, $ctr_t1b, $ctr1b, $rk14 @ AES block
1 - result
5120 eor3
$res3b, $ctr_t3b, $ctr3b, $rk14 @ AES block
3 - result
5122 rev32
$ctr1.16b
, $rtmp_ctr.16b @ CTR block
9
5123 add
$rtmp_ctr.4s
, $rtmp_ctr.4s
, $rctr_inc.4s @ CTR block
9
5124 ldp
$ctr_t4q, $ctr_t5q, [$input_ptr], #32 @ AES block 4, 5 - load plaintext
5126 ldp
$ctr_t6q, $ctr_t7q, [$input_ptr], #32 @ AES block 6, 7 - load plaintext
5127 eor3
$res2b, $ctr_t2b, $ctr2b, $rk14 @ AES block
2 - result
5128 cmp $input_ptr, $main_end_input_ptr @ check
if we have
<= 8 blocks
5130 rev32
$ctr2.16b
, $rtmp_ctr.16b @ CTR block
10
5131 add
$rtmp_ctr.4s
, $rtmp_ctr.4s
, $rctr_inc.4s @ CTR block
10
5132 stp
$res0q, $res1q, [$output_ptr], #32 @ AES block 0, 1 - store result
5134 stp
$res2q, $res3q, [$output_ptr], #32 @ AES block 2, 3 - store result
5136 rev32
$ctr3.16b
, $rtmp_ctr.16b @ CTR block
11
5137 add
$rtmp_ctr.4s
, $rtmp_ctr.4s
, $rctr_inc.4s @ CTR block
11
5139 eor3
$res4b, $ctr_t4b, $ctr4b, $rk14 @ AES block
4 - result
5141 eor3
$res7b, $ctr_t7b, $ctr7b, $rk14 @ AES block
7 - result
5142 eor3
$res6b, $ctr_t6b, $ctr6b, $rk14 @ AES block
6 - result
5143 eor3
$res5b, $ctr_t5b, $ctr5b, $rk14 @ AES block
5 - result
5145 stp
$res4q, $res5q, [$output_ptr], #32 @ AES block 4, 5 - store result
5146 rev32
$ctr4.16b
, $rtmp_ctr.16b @ CTR block
12
5148 stp
$res6q, $res7q, [$output_ptr], #32 @ AES block 6, 7 - store result
5149 add
$rtmp_ctr.4s
, $rtmp_ctr.4s
, $rctr_inc.4s @ CTR block
12
5150 b
.ge .L256_enc_prepretail @
do prepretail
5152 .L256_enc_main_loop
: @ main
loop start
5153 ldp
$rk0q, $rk1q, [$cc, #0] @ load rk0, rk1
5155 rev32
$ctr5.16b
, $rtmp_ctr.16b @ CTR block
8k
+13
5156 add
$rtmp_ctr.4s
, $rtmp_ctr.4s
, $rctr_inc.4s @ CTR block
8k
+13
5157 ldr
$h56kq, [$current_tag, #144] @ load h6k | h5k
5158 ldr
$h78kq, [$current_tag, #192] @ load h8k | h7k
5160 rev64
$res3b, $res3b @ GHASH block
8k
+3
5161 ldr
$h5q, [$current_tag, #128] @ load h5l | h5h
5162 ext
$h5.16b
, $h5.16b
, $h5.16b
, #8
5163 ldr
$h6q, [$current_tag, #160] @ load h6l | h6h
5164 ext
$h6.16b
, $h6.16b
, $h6.16b
, #8
5165 rev64
$res1b, $res1b @ GHASH block
8k
+1
5167 rev32
$ctr6.16b
, $rtmp_ctr.16b @ CTR block
8k
+14
5168 add
$rtmp_ctr.4s
, $rtmp_ctr.4s
, $rctr_inc.4s @ CTR block
8k
+14
5169 rev64
$res0b, $res0b @ GHASH block
8k
5171 rev64
$res4b, $res4b @ GHASH block
8k
+4
5172 ext
$acc_lb, $acc_lb, $acc_lb, #8 @ PRE 0
5173 ldr
$h7q, [$current_tag, #176] @ load h7l | h7h
5174 ext
$h7.16b
, $h7.16b
, $h7.16b
, #8
5175 ldr
$h8q, [$current_tag, #208] @ load h8l | h8h
5176 ext
$h8.16b
, $h8.16b
, $h8.16b
, #8
5178 aese
$ctr3b, $rk0 \n aesmc
$ctr3b, $ctr3b @ AES block
8k
+11 - round
0
5179 aese
$ctr5b, $rk0 \n aesmc
$ctr5b, $ctr5b @ AES block
8k
+13 - round
0
5180 rev32
$ctr7.16b
, $rtmp_ctr.16b @ CTR block
8k
+15
5182 aese
$ctr0b, $rk0 \n aesmc
$ctr0b, $ctr0b @ AES block
8k
+8 - round
0
5183 aese
$ctr1b, $rk0 \n aesmc
$ctr1b, $ctr1b @ AES block
8k
+9 - round
0
5184 aese
$ctr6b, $rk0 \n aesmc
$ctr6b, $ctr6b @ AES block
8k
+14 - round
0
5186 aese
$ctr7b, $rk0 \n aesmc
$ctr7b, $ctr7b @ AES block
8k
+15 - round
0
5187 aese
$ctr2b, $rk0 \n aesmc
$ctr2b, $ctr2b @ AES block
8k
+10 - round
0
5188 aese
$ctr4b, $rk0 \n aesmc
$ctr4b, $ctr4b @ AES block
8k
+12 - round
0
5190 ldp
$rk2q, $rk3q, [$cc, #32] @ load rk2, rk3
5191 eor
$res0b, $res0b, $acc_lb @ PRE
1
5192 aese
$ctr6b, $rk1 \n aesmc
$ctr6b, $ctr6b @ AES block
8k
+14 - round
1
5194 aese
$ctr2b, $rk1 \n aesmc
$ctr2b, $ctr2b @ AES block
8k
+10 - round
1
5195 aese
$ctr1b, $rk1 \n aesmc
$ctr1b, $ctr1b @ AES block
8k
+9 - round
1
5196 aese
$ctr0b, $rk1 \n aesmc
$ctr0b, $ctr0b @ AES block
8k
+8 - round
1
5198 aese
$ctr4b, $rk1 \n aesmc
$ctr4b, $ctr4b @ AES block
8k
+12 - round
1
5199 aese
$ctr3b, $rk1 \n aesmc
$ctr3b, $ctr3b @ AES block
8k
+11 - round
1
5200 aese
$ctr5b, $rk1 \n aesmc
$ctr5b, $ctr5b @ AES block
8k
+13 - round
1
5202 pmull2
$acc_h.1q
, $res0.2d
, $h8.2d @ GHASH block
8k
- high
5203 pmull
$acc_l.1q
, $res0.1d
, $h8.1d @ GHASH block
8k
- low
5204 pmull2
$t0.1q
, $res1.2d
, $h7.2d @ GHASH block
8k
+1 - high
5206 trn1
$acc_m.2d
, $res1.2d
, $res0.2d @ GHASH block
8k
, 8k
+1 - mid
5207 trn2
$res0.2d
, $res1.2d
, $res0.2d @ GHASH block
8k
, 8k
+1 - mid
5208 aese
$ctr7b, $rk1 \n aesmc
$ctr7b, $ctr7b @ AES block
8k
+15 - round
1
5210 aese
$ctr1b, $rk2 \n aesmc
$ctr1b, $ctr1b @ AES block
8k
+9 - round
2
5211 aese
$ctr5b, $rk2 \n aesmc
$ctr5b, $ctr5b @ AES block
8k
+13 - round
2
5212 aese
$ctr6b, $rk2 \n aesmc
$ctr6b, $ctr6b @ AES block
8k
+14 - round
2
5214 aese
$ctr2b, $rk2 \n aesmc
$ctr2b, $ctr2b @ AES block
8k
+10 - round
2
5215 pmull
$h7.1q
, $res1.1d
, $h7.1d @ GHASH block
8k
+1 - low
5216 aese
$ctr4b, $rk2 \n aesmc
$ctr4b, $ctr4b @ AES block
8k
+12 - round
2
5218 aese
$ctr5b, $rk3 \n aesmc
$ctr5b, $ctr5b @ AES block
8k
+13 - round
3
5219 aese
$ctr6b, $rk3 \n aesmc
$ctr6b, $ctr6b @ AES block
8k
+14 - round
3
5220 aese
$ctr0b, $rk2 \n aesmc
$ctr0b, $ctr0b @ AES block
8k
+8 - round
2
5222 aese
$ctr1b, $rk3 \n aesmc
$ctr1b, $ctr1b @ AES block
8k
+9 - round
3
5223 aese
$ctr7b, $rk2 \n aesmc
$ctr7b, $ctr7b @ AES block
8k
+15 - round
2
5224 aese
$ctr3b, $rk2 \n aesmc
$ctr3b, $ctr3b @ AES block
8k
+11 - round
2
5226 aese
$ctr4b, $rk3 \n aesmc
$ctr4b, $ctr4b @ AES block
8k
+12 - round
3
5227 rev64
$res6b, $res6b @ GHASH block
8k
+6
5228 pmull2
$t2.1q
, $res3.2d
, $h5.2d @ GHASH block
8k
+3 - high
5230 aese
$ctr3b, $rk3 \n aesmc
$ctr3b, $ctr3b @ AES block
8k
+11 - round
3
5231 ldp
$rk4q, $rk5q, [$cc, #64] @ load rk4, rk5
5232 rev64
$res2b, $res2b @ GHASH block
8k
+2
5234 aese
$ctr2b, $rk3 \n aesmc
$ctr2b, $ctr2b @ AES block
8k
+10 - round
3
5235 aese
$ctr7b, $rk3 \n aesmc
$ctr7b, $ctr7b @ AES block
8k
+15 - round
3
5236 aese
$ctr0b, $rk3 \n aesmc
$ctr0b, $ctr0b @ AES block
8k
+8 - round
3
5238 eor
$acc_hb, $acc_hb, $t0.16b @ GHASH block
8k
+1 - high
5239 pmull2
$t1.1q
, $res2.2d
, $h6.2d @ GHASH block
8k
+2 - high
5240 rev64
$res5b, $res5b @ GHASH block
8k
+5
5242 pmull
$h5.1q
, $res3.1d
, $h5.1d @ GHASH block
8k
+3 - low
5243 eor
$acc_lb, $acc_lb, $h7.16b @ GHASH block
8k
+1 - low
5244 ldr
$h3q, [$current_tag, #80] @ load h3l | h3h
5245 ext
$h3.16b
, $h3.16b
, $h3.16b
, #8
5246 ldr
$h4q, [$current_tag, #112] @ load h4l | h4h
5247 ext
$h4.16b
, $h4.16b
, $h4.16b
, #8
5249 trn1
$t6.2d
, $res5.2d
, $res4.2d @ GHASH block
8k
+4, 8k
+5 - mid
5250 eor3
$acc_hb, $acc_hb, $t1.16b
, $t2.16b @ GHASH block
8k
+2, 8k
+3 - high
5251 pmull
$h6.1q
, $res2.1d
, $h6.1d @ GHASH block
8k
+2 - low
5253 aese
$ctr4b, $rk4 \n aesmc
$ctr4b, $ctr4b @ AES block
8k
+12 - round
4
5254 aese
$ctr1b, $rk4 \n aesmc
$ctr1b, $ctr1b @ AES block
8k
+9 - round
4
5255 aese
$ctr5b, $rk4 \n aesmc
$ctr5b, $ctr5b @ AES block
8k
+13 - round
4
5257 aese
$ctr7b, $rk4 \n aesmc
$ctr7b, $ctr7b @ AES block
8k
+15 - round
4
5258 aese
$ctr3b, $rk4 \n aesmc
$ctr3b, $ctr3b @ AES block
8k
+11 - round
4
5259 aese
$ctr2b, $rk4 \n aesmc
$ctr2b, $ctr2b @ AES block
8k
+10 - round
4
5261 trn1
$t3.2d
, $res3.2d
, $res2.2d @ GHASH block
8k
+2, 8k
+3 - mid
5262 aese
$ctr6b, $rk4 \n aesmc
$ctr6b, $ctr6b @ AES block
8k
+14 - round
4
5263 aese
$ctr0b, $rk4 \n aesmc
$ctr0b, $ctr0b @ AES block
8k
+8 - round
4
5265 trn2
$res2.2d
, $res3.2d
, $res2.2d @ GHASH block
8k
+2, 8k
+3 - mid
5266 eor
$res0.16b
, $res0.16b
, $acc_m.16b @ GHASH block
8k
, 8k
+1 - mid
5267 ldp
$rk6q, $rk7q, [$cc, #96] @ load rk6, rk7
5269 aese
$ctr5b, $rk5 \n aesmc
$ctr5b, $ctr5b @ AES block
8k
+13 - round
5
5270 aese
$ctr7b, $rk5 \n aesmc
$ctr7b, $ctr7b @ AES block
8k
+15 - round
5
5271 aese
$ctr4b, $rk5 \n aesmc
$ctr4b, $ctr4b @ AES block
8k
+12 - round
5
5273 eor
$res2.16b
, $res2.16b
, $t3.16b @ GHASH block
8k
+2, 8k
+3 - mid
5274 aese
$ctr2b, $rk5 \n aesmc
$ctr2b, $ctr2b @ AES block
8k
+10 - round
5
5275 rev64
$res7b, $res7b @ GHASH block
8k
+7
5277 aese
$ctr3b, $rk5 \n aesmc
$ctr3b, $ctr3b @ AES block
8k
+11 - round
5
5278 aese
$ctr6b, $rk5 \n aesmc
$ctr6b, $ctr6b @ AES block
8k
+14 - round
5
5279 aese
$ctr1b, $rk5 \n aesmc
$ctr1b, $ctr1b @ AES block
8k
+9 - round
5
5281 pmull2
$t3.1q
, $res2.2d
, $h56k.2d @ GHASH block
8k
+2 - mid
5282 pmull2
$acc_m.1q
, $res0.2d
, $h78k.2d @ GHASH block
8k
- mid
5283 aese
$ctr0b, $rk5 \n aesmc
$ctr0b, $ctr0b @ AES block
8k
+8 - round
5
5285 pmull
$h78k.1q
, $res0.1d
, $h78k.1d @ GHASH block
8k
+1 - mid
5286 aese
$ctr4b, $rk6 \n aesmc
$ctr4b, $ctr4b @ AES block
8k
+12 - round
6
5287 aese
$ctr2b, $rk6 \n aesmc
$ctr2b, $ctr2b @ AES block
8k
+10 - round
6
5289 aese
$ctr6b, $rk6 \n aesmc
$ctr6b, $ctr6b @ AES block
8k
+14 - round
6
5290 aese
$ctr1b, $rk6 \n aesmc
$ctr1b, $ctr1b @ AES block
8k
+9 - round
6
5291 aese
$ctr7b, $rk6 \n aesmc
$ctr7b, $ctr7b @ AES block
8k
+15 - round
6
5293 eor
$acc_mb, $acc_mb, $h78k.16b @ GHASH block
8k
+1 - mid
5294 pmull
$h56k.1q
, $res2.1d
, $h56k.1d @ GHASH block
8k
+3 - mid
5295 aese
$ctr5b, $rk6 \n aesmc
$ctr5b, $ctr5b @ AES block
8k
+13 - round
6
5297 eor3
$acc_lb, $acc_lb, $h6.16b
, $h5.16b @ GHASH block
8k
+2, 8k
+3 - low
5298 aese
$ctr3b, $rk6 \n aesmc
$ctr3b, $ctr3b @ AES block
8k
+11 - round
6
5299 aese
$ctr0b, $rk6 \n aesmc
$ctr0b, $ctr0b @ AES block
8k
+8 - round
6
5301 ldp
$rk8q, $rk9q, [$cc, #128] @ load rk8, rk9
5302 pmull2
$t4.1q
, $res4.2d
, $h4.2d @ GHASH block
8k
+4 - high
5303 aese
$ctr5b, $rk7 \n aesmc
$ctr5b, $ctr5b @ AES block
8k
+13 - round
7
5305 ldr
$h1q, [$current_tag, #32] @ load h1l | h1h
5306 ext
$h1.16b
, $h1.16b
, $h1.16b
, #8
5307 ldr
$h2q, [$current_tag, #64] @ load h1l | h1h
5308 ext
$h2.16b
, $h2.16b
, $h2.16b
, #8
5309 aese
$ctr2b, $rk7 \n aesmc
$ctr2b, $ctr2b @ AES block
8k
+10 - round
7
5310 eor3
$acc_mb, $acc_mb, $h56k.16b
, $t3.16b @ GHASH block
8k
+2, 8k
+3 - mid
5312 ldr
$h12kq, [$current_tag, #48] @ load h2k | h1k
5313 ldr
$h34kq, [$current_tag, #96] @ load h4k | h3k
5314 aese
$ctr6b, $rk7 \n aesmc
$ctr6b, $ctr6b @ AES block
8k
+14 - round
7
5315 aese
$ctr3b, $rk7 \n aesmc
$ctr3b, $ctr3b @ AES block
8k
+11 - round
7
5317 aese
$ctr0b, $rk7 \n aesmc
$ctr0b, $ctr0b @ AES block
8k
+8 - round
7
5318 aese
$ctr7b, $rk7 \n aesmc
$ctr7b, $ctr7b @ AES block
8k
+15 - round
7
5319 pmull
$h4.1q
, $res4.1d
, $h4.1d @ GHASH block
8k
+4 - low
5321 trn2
$res4.2d
, $res5.2d
, $res4.2d @ GHASH block
8k
+4, 8k
+5 - mid
5322 aese
$ctr4b, $rk7 \n aesmc
$ctr4b, $ctr4b @ AES block
8k
+12 - round
7
5323 aese
$ctr1b, $rk7 \n aesmc
$ctr1b, $ctr1b @ AES block
8k
+9 - round
7
5325 pmull2
$t5.1q
, $res5.2d
, $h3.2d @ GHASH block
8k
+5 - high
5326 aese
$ctr7b, $rk8 \n aesmc
$ctr7b, $ctr7b @ AES block
8k
+15 - round
8
5327 aese
$ctr0b, $rk8 \n aesmc
$ctr0b, $ctr0b @ AES block
8k
+8 - round
8
5329 pmull
$h3.1q
, $res5.1d
, $h3.1d @ GHASH block
8k
+5 - low
5330 trn1
$t9.2d
, $res7.2d
, $res6.2d @ GHASH block
8k
+6, 8k
+7 - mid
5331 eor
$res4.16b
, $res4.16b
, $t6.16b @ GHASH block
8k
+4, 8k
+5 - mid
5333 aese
$ctr3b, $rk8 \n aesmc
$ctr3b, $ctr3b @ AES block
8k
+11 - round
8
5334 aese
$ctr0b, $rk9 \n aesmc
$ctr0b, $ctr0b @ AES block
8k
+8 - round
9
5335 aese
$ctr1b, $rk8 \n aesmc
$ctr1b, $ctr1b @ AES block
8k
+9 - round
8
5337 pmull2
$t6.1q
, $res4.2d
, $h34k.2d @ GHASH block
8k
+4 - mid
5338 pmull
$h34k.1q
, $res4.1d
, $h34k.1d @ GHASH block
8k
+5 - mid
5339 aese
$ctr2b, $rk8 \n aesmc
$ctr2b, $ctr2b @ AES block
8k
+10 - round
8
5341 aese
$ctr5b, $rk8 \n aesmc
$ctr5b, $ctr5b @ AES block
8k
+13 - round
8
5342 pmull2
$t7.1q
, $res6.2d
, $h2.2d @ GHASH block
8k
+6 - high
5343 pmull
$h2.1q
, $res6.1d
, $h2.1d @ GHASH block
8k
+6 - low
5345 aese
$ctr6b, $rk8 \n aesmc
$ctr6b, $ctr6b @ AES block
8k
+14 - round
8
5346 trn2
$res6.2d
, $res7.2d
, $res6.2d @ GHASH block
8k
+6, 8k
+7 - mid
5347 aese
$ctr4b, $rk8 \n aesmc
$ctr4b, $ctr4b @ AES block
8k
+12 - round
8
5349 eor3
$acc_mb, $acc_mb, $h34k.16b
, $t6.16b @ GHASH block
8k
+4, 8k
+5 - mid
5350 aese
$ctr7b, $rk9 \n aesmc
$ctr7b, $ctr7b @ AES block
8k
+15 - round
9
5351 aese
$ctr5b, $rk9 \n aesmc
$ctr5b, $ctr5b @ AES block
8k
+13 - round
9
5353 eor
$res6.16b
, $res6.16b
, $t9.16b @ GHASH block
8k
+6, 8k
+7 - mid
5354 aese
$ctr6b, $rk9 \n aesmc
$ctr6b, $ctr6b @ AES block
8k
+14 - round
9
5355 aese
$ctr4b, $rk9 \n aesmc
$ctr4b, $ctr4b @ AES block
8k
+12 - round
9
5357 ldp
$rk10q, $rk11q, [$cc, #160] @ load rk10, rk11
5358 aese
$ctr2b, $rk9 \n aesmc
$ctr2b, $ctr2b @ AES block
8k
+10 - round
9
5359 aese
$ctr3b, $rk9 \n aesmc
$ctr3b, $ctr3b @ AES block
8k
+11 - round
9
5361 pmull2
$t8.1q
, $res7.2d
, $h1.2d @ GHASH block
8k
+7 - high
5362 eor3
$acc_lb, $acc_lb, $h4.16b
, $h3.16b @ GHASH block
8k
+4, 8k
+5 - low
5363 pmull
$h1.1q
, $res7.1d
, $h1.1d @ GHASH block
8k
+7 - low
5365 ldr
$mod_constantd, [$modulo_constant] @ MODULO
- load modulo constant
5366 pmull2
$t9.1q
, $res6.2d
, $h12k.2d @ GHASH block
8k
+6 - mid
5367 pmull
$h12k.1q
, $res6.1d
, $h12k.1d @ GHASH block
8k
+7 - mid
5369 aese
$ctr1b, $rk9 \n aesmc
$ctr1b, $ctr1b @ AES block
8k
+9 - round
9
5371 eor3
$acc_mb, $acc_mb, $h12k.16b
, $t9.16b @ GHASH block
8k
+6, 8k
+7 - mid
5372 eor3
$acc_lb, $acc_lb, $h2.16b
, $h1.16b @ GHASH block
8k
+6, 8k
+7 - low
5373 eor3
$acc_hb, $acc_hb, $t4.16b
, $t5.16b @ GHASH block
8k
+4, 8k
+5 - high
5375 aese
$ctr4b, $rk10 \n aesmc
$ctr4b, $ctr4b @ AES block
8k
+12 - round
10
5376 aese
$ctr3b, $rk10 \n aesmc
$ctr3b, $ctr3b @ AES block
8k
+11 - round
10
5377 aese
$ctr5b, $rk10 \n aesmc
$ctr5b, $ctr5b @ AES block
8k
+13 - round
10
5379 aese
$ctr0b, $rk10 \n aesmc
$ctr0b, $ctr0b @ AES block
8k
+8 - round
10
5380 aese
$ctr2b, $rk10 \n aesmc
$ctr2b, $ctr2b @ AES block
8k
+10 - round
10
5381 add
$rtmp_ctr.4s
, $rtmp_ctr.4s
, $rctr_inc.4s @ CTR block
8k
+15
5383 aese
$ctr1b, $rk10 \n aesmc
$ctr1b, $ctr1b @ AES block
8k
+9 - round
10
5384 aese
$ctr7b, $rk10 \n aesmc
$ctr7b, $ctr7b @ AES block
8k
+15 - round
10
5385 aese
$ctr6b, $rk10 \n aesmc
$ctr6b, $ctr6b @ AES block
8k
+14 - round
10
5387 eor3
$acc_hb, $acc_hb, $t7.16b
, $t8.16b @ GHASH block
8k
+6, 8k
+7 - high
5389 ldp
$rk12q, $rk13q, [$cc, #192] @ load rk12, rk13
5390 rev32
$h1.16b
, $rtmp_ctr.16b @ CTR block
8k
+16
5392 ext
$t11.16b
, $acc_hb, $acc_hb, #8 @ MODULO - other top alignment
5393 ldp
$ctr_t0q, $ctr_t1q, [$input_ptr], #32 @ AES block 8k+8, 8k+9 - load plaintext
5394 aese
$ctr2b, $rk11 \n aesmc
$ctr2b, $ctr2b @ AES block
8k
+10 - round
11
5396 aese
$ctr6b, $rk11 \n aesmc
$ctr6b, $ctr6b @ AES block
8k
+14 - round
11
5397 add
$rtmp_ctr.4s
, $rtmp_ctr.4s
, $rctr_inc.4s @ CTR block
8k
+16
5398 aese
$ctr3b, $rk11 \n aesmc
$ctr3b, $ctr3b @ AES block
8k
+11 - round
11
5400 aese
$ctr0b, $rk11 \n aesmc
$ctr0b, $ctr0b @ AES block
8k
+8 - round
11
5401 aese
$ctr7b, $rk11 \n aesmc
$ctr7b, $ctr7b @ AES block
8k
+15 - round
11
5403 pmull
$t12.1q
, $acc_h.1d
, $mod_constant.1d @ MODULO
- top
64b align with mid
5404 aese
$ctr1b, $rk11 \n aesmc
$ctr1b, $ctr1b @ AES block
8k
+9 - round
11
5406 aese
$ctr7b, $rk12 \n aesmc
$ctr7b, $ctr7b @ AES block
8k
+15 - round
12
5407 aese
$ctr5b, $rk11 \n aesmc
$ctr5b, $ctr5b @ AES block
8k
+13 - round
11
5409 aese
$ctr3b, $rk12 \n aesmc
$ctr3b, $ctr3b @ AES block
8k
+11 - round
12
5410 aese
$ctr6b, $rk12 \n aesmc
$ctr6b, $ctr6b @ AES block
8k
+14 - round
12
5411 rev32
$h2.16b
, $rtmp_ctr.16b @ CTR block
8k
+17
5413 add
$rtmp_ctr.4s
, $rtmp_ctr.4s
, $rctr_inc.4s @ CTR block
8k
+17
5414 aese
$ctr4b, $rk11 \n aesmc
$ctr4b, $ctr4b @ AES block
8k
+12 - round
11
5415 eor3
$acc_mb, $acc_mb, $acc_hb, $acc_lb @ MODULO
- karatsuba tidy up
5417 aese
$ctr5b, $rk12 \n aesmc
$ctr5b, $ctr5b @ AES block
8k
+13 - round
12
5418 ldr
$rk14q, [$cc, #224] @ load rk14
5419 aese
$ctr7b, $rk13 @ AES block
8k
+15 - round
13
5421 ldp
$ctr_t2q, $ctr_t3q, [$input_ptr], #32 @ AES block 8k+10, 8k+11 - load plaintext
5422 aese
$ctr2b, $rk12 \n aesmc
$ctr2b, $ctr2b @ AES block
8k
+10 - round
12
5423 aese
$ctr4b, $rk12 \n aesmc
$ctr4b, $ctr4b @ AES block
8k
+12 - round
12
5425 eor3
$acc_mb, $acc_mb, $t12.16b
, $t11.16b @ MODULO
- fold into mid
5426 aese
$ctr1b, $rk12 \n aesmc
$ctr1b, $ctr1b @ AES block
8k
+9 - round
12
5427 ldp
$ctr_t4q, $ctr_t5q, [$input_ptr], #32 @ AES block 4, 5 - load plaintext
5429 ldp
$ctr_t6q, $ctr_t7q, [$input_ptr], #32 @ AES block 6, 7 - load plaintext
5430 aese
$ctr2b, $rk13 @ AES block
8k
+10 - round
13
5431 aese
$ctr4b, $rk13 @ AES block
8k
+12 - round
13
5433 rev32
$h3.16b
, $rtmp_ctr.16b @ CTR block
8k
+18
5434 add
$rtmp_ctr.4s
, $rtmp_ctr.4s
, $rctr_inc.4s @ CTR block
8k
+18
5435 aese
$ctr5b, $rk13 @ AES block
8k
+13 - round
13
5437 aese
$ctr0b, $rk12 \n aesmc
$ctr0b, $ctr0b @ AES block
8k
+8 - round
12
5438 aese
$ctr3b, $rk13 @ AES block
8k
+11 - round
13
5439 cmp $input_ptr, $main_end_input_ptr @ LOOP CONTROL
5441 eor3
$res2b, $ctr_t2b, $ctr2b, $rk14 @ AES block
8k
+10 - result
5442 rev32
$h4.16b
, $rtmp_ctr.16b @ CTR block
8k
+19
5443 add
$rtmp_ctr.4s
, $rtmp_ctr.4s
, $rctr_inc.4s @ CTR block
8k
+19
5445 aese
$ctr0b, $rk13 @ AES block
8k
+8 - round
13
5446 aese
$ctr6b, $rk13 @ AES block
8k
+14 - round
13
5447 eor3
$res5b, $ctr_t5b, $ctr5b, $rk14 @ AES block
5 - result
5449 ext
$t11.16b
, $acc_mb, $acc_mb, #8 @ MODULO - other mid alignment
5450 pmull
$acc_h.1q
, $acc_m.1d
, $mod_constant.1d @ MODULO
- mid
64b align with low
5451 aese
$ctr1b, $rk13 @ AES block
8k
+9 - round
13
5453 eor3
$res4b, $ctr_t4b, $ctr4b, $rk14 @ AES block
4 - result
5454 rev32
$ctr4.16b
, $rtmp_ctr.16b @ CTR block
8k
+20
5455 eor3
$res3b, $ctr_t3b, $ctr3b, $rk14 @ AES block
8k
+11 - result
5457 mov
$ctr3.16b
, $h4.16b @ CTR block
8k
+19
5458 eor3
$res1b, $ctr_t1b, $ctr1b, $rk14 @ AES block
8k
+9 - result
5459 eor3
$res0b, $ctr_t0b, $ctr0b, $rk14 @ AES block
8k
+8 - result
5461 add
$rtmp_ctr.4s
, $rtmp_ctr.4s
, $rctr_inc.4s @ CTR block
8k
+20
5462 stp
$res0q, $res1q, [$output_ptr], #32 @ AES block 8k+8, 8k+9 - store result
5463 mov
$ctr2.16b
, $h3.16b @ CTR block
8k
+18
5465 eor3
$res7b, $ctr_t7b, $ctr7b, $rk14 @ AES block
7 - result
5466 eor3
$acc_lb, $acc_lb, $t11.16b
, $acc_hb @ MODULO
- fold into low
5467 stp
$res2q, $res3q, [$output_ptr], #32 @ AES block 8k+10, 8k+11 - store result
5469 eor3
$res6b, $ctr_t6b, $ctr6b, $rk14 @ AES block
6 - result
5470 mov
$ctr1.16b
, $h2.16b @ CTR block
8k
+17
5471 stp
$res4q, $res5q, [$output_ptr], #32 @ AES block 4, 5 - store result
5473 stp
$res6q, $res7q, [$output_ptr], #32 @ AES block 6, 7 - store result
5474 mov
$ctr0.16b
, $h1.16b @ CTR block
8k
+16
5475 b
.lt .L256_enc_main_loop
5477 .L256_enc_prepretail
: @ PREPRETAIL
5478 rev32
$ctr5.16b
, $rtmp_ctr.16b @ CTR block
8k
+13
5479 ldp
$rk0q, $rk1q, [$cc, #0] @ load rk0, rk1
5480 add
$rtmp_ctr.4s
, $rtmp_ctr.4s
, $rctr_inc.4s @ CTR block
8k
+13
5482 rev64
$res2b, $res2b @ GHASH block
8k
+2
5484 rev32
$ctr6.16b
, $rtmp_ctr.16b @ CTR block
8k
+14
5485 add
$rtmp_ctr.4s
, $rtmp_ctr.4s
, $rctr_inc.4s @ CTR block
8k
+14
5487 rev64
$res5b, $res5b @ GHASH block
8k
+5
5488 ldr
$h56kq, [$current_tag, #144] @ load h6k | h5k
5489 ldr
$h78kq, [$current_tag, #192] @ load h8k | h7k
5491 rev32
$ctr7.16b
, $rtmp_ctr.16b @ CTR block
8k
+15
5493 aese
$ctr6b, $rk0 \n aesmc
$ctr6b, $ctr6b @ AES block
8k
+14 - round
0
5494 aese
$ctr4b, $rk0 \n aesmc
$ctr4b, $ctr4b @ AES block
8k
+12 - round
0
5495 aese
$ctr1b, $rk0 \n aesmc
$ctr1b, $ctr1b @ AES block
8k
+9 - round
0
5497 aese
$ctr5b, $rk0 \n aesmc
$ctr5b, $ctr5b @ AES block
8k
+13 - round
0
5498 aese
$ctr0b, $rk0 \n aesmc
$ctr0b, $ctr0b @ AES block
8k
+8 - round
0
5500 aese
$ctr2b, $rk0 \n aesmc
$ctr2b, $ctr2b @ AES block
8k
+10 - round
0
5501 aese
$ctr7b, $rk0 \n aesmc
$ctr7b, $ctr7b @ AES block
8k
+15 - round
0
5502 aese
$ctr3b, $rk0 \n aesmc
$ctr3b, $ctr3b @ AES block
8k
+11 - round
0
5504 ext
$acc_lb, $acc_lb, $acc_lb, #8 @ PRE 0
5505 rev64
$res0b, $res0b @ GHASH block
8k
5506 aese
$ctr1b, $rk1 \n aesmc
$ctr1b, $ctr1b @ AES block
8k
+9 - round
1
5508 rev64
$res1b, $res1b @ GHASH block
8k
+1
5509 ldp
$rk2q, $rk3q, [$cc, #32] @ load rk2, rk3
5510 aese
$ctr3b, $rk1 \n aesmc
$ctr3b, $ctr3b @ AES block
8k
+11 - round
1
5512 ldr
$h7q, [$current_tag, #176] @ load h7l | h7h
5513 ext
$h7.16b
, $h7.16b
, $h7.16b
, #8
5514 ldr
$h8q, [$current_tag, #208] @ load h8l | h8h
5515 ext
$h8.16b
, $h8.16b
, $h8.16b
, #8
5516 aese
$ctr2b, $rk1 \n aesmc
$ctr2b, $ctr2b @ AES block
8k
+10 - round
1
5518 ldr
$h5q, [$current_tag, #128] @ load h5l | h5h
5519 ext
$h5.16b
, $h5.16b
, $h5.16b
, #8
5520 ldr
$h6q, [$current_tag, #160] @ load h6l | h6h
5521 ext
$h6.16b
, $h6.16b
, $h6.16b
, #8
5522 aese
$ctr0b, $rk1 \n aesmc
$ctr0b, $ctr0b @ AES block
8k
+8 - round
1
5523 aese
$ctr5b, $rk1 \n aesmc
$ctr5b, $ctr5b @ AES block
8k
+13 - round
1
5525 aese
$ctr4b, $rk1 \n aesmc
$ctr4b, $ctr4b @ AES block
8k
+12 - round
1
5526 eor
$res0b, $res0b, $acc_lb @ PRE
1
5528 rev64
$res3b, $res3b @ GHASH block
8k
+3
5529 aese
$ctr6b, $rk1 \n aesmc
$ctr6b, $ctr6b @ AES block
8k
+14 - round
1
5531 aese
$ctr1b, $rk2 \n aesmc
$ctr1b, $ctr1b @ AES block
8k
+9 - round
2
5532 aese
$ctr2b, $rk2 \n aesmc
$ctr2b, $ctr2b @ AES block
8k
+10 - round
2
5533 aese
$ctr7b, $rk1 \n aesmc
$ctr7b, $ctr7b @ AES block
8k
+15 - round
1
5535 aese
$ctr4b, $rk2 \n aesmc
$ctr4b, $ctr4b @ AES block
8k
+12 - round
2
5536 aese
$ctr0b, $rk2 \n aesmc
$ctr0b, $ctr0b @ AES block
8k
+8 - round
2
5537 aese
$ctr6b, $rk2 \n aesmc
$ctr6b, $ctr6b @ AES block
8k
+14 - round
2
5539 aese
$ctr5b, $rk2 \n aesmc
$ctr5b, $ctr5b @ AES block
8k
+13 - round
2
5540 aese
$ctr7b, $rk2 \n aesmc
$ctr7b, $ctr7b @ AES block
8k
+15 - round
2
5541 aese
$ctr3b, $rk2 \n aesmc
$ctr3b, $ctr3b @ AES block
8k
+11 - round
2
5543 ldp
$rk4q, $rk5q, [$cc, #64] @ load rk4, rk5
5544 trn1
$acc_m.2d
, $res1.2d
, $res0.2d @ GHASH block
8k
, 8k
+1 - mid
5545 pmull2
$acc_h.1q
, $res0.2d
, $h8.2d @ GHASH block
8k
- high
5547 rev64
$res6b, $res6b @ GHASH block
8k
+6
5548 aese
$ctr4b, $rk3 \n aesmc
$ctr4b, $ctr4b @ AES block
8k
+12 - round
3
5549 pmull2
$t0.1q
, $res1.2d
, $h7.2d @ GHASH block
8k
+1 - high
5551 aese
$ctr7b, $rk3 \n aesmc
$ctr7b, $ctr7b @ AES block
8k
+15 - round
3
5552 pmull
$acc_l.1q
, $res0.1d
, $h8.1d @ GHASH block
8k
- low
5553 trn2
$res0.2d
, $res1.2d
, $res0.2d @ GHASH block
8k
, 8k
+1 - mid
5555 pmull2
$t1.1q
, $res2.2d
, $h6.2d @ GHASH block
8k
+2 - high
5556 aese
$ctr6b, $rk3 \n aesmc
$ctr6b, $ctr6b @ AES block
8k
+14 - round
3
5558 aese
$ctr2b, $rk3 \n aesmc
$ctr2b, $ctr2b @ AES block
8k
+10 - round
3
5559 aese
$ctr3b, $rk3 \n aesmc
$ctr3b, $ctr3b @ AES block
8k
+11 - round
3
5560 eor
$acc_hb, $acc_hb, $t0.16b @ GHASH block
8k
+1 - high
5562 pmull
$h7.1q
, $res1.1d
, $h7.1d @ GHASH block
8k
+1 - low
5563 pmull2
$t2.1q
, $res3.2d
, $h5.2d @ GHASH block
8k
+3 - high
5564 aese
$ctr1b, $rk3 \n aesmc
$ctr1b, $ctr1b @ AES block
8k
+9 - round
3
5566 aese
$ctr0b, $rk3 \n aesmc
$ctr0b, $ctr0b @ AES block
8k
+8 - round
3
5567 eor
$res0.16b
, $res0.16b
, $acc_m.16b @ GHASH block
8k
, 8k
+1 - mid
5568 aese
$ctr5b, $rk3 \n aesmc
$ctr5b, $ctr5b @ AES block
8k
+13 - round
3
5570 pmull
$h6.1q
, $res2.1d
, $h6.1d @ GHASH block
8k
+2 - low
5571 aese
$ctr1b, $rk4 \n aesmc
$ctr1b, $ctr1b @ AES block
8k
+9 - round
4
5572 aese
$ctr6b, $rk4 \n aesmc
$ctr6b, $ctr6b @ AES block
8k
+14 - round
4
5574 aese
$ctr0b, $rk4 \n aesmc
$ctr0b, $ctr0b @ AES block
8k
+8 - round
4
5575 aese
$ctr2b, $rk4 \n aesmc
$ctr2b, $ctr2b @ AES block
8k
+10 - round
4
5576 aese
$ctr4b, $rk4 \n aesmc
$ctr4b, $ctr4b @ AES block
8k
+12 - round
4
5578 aese
$ctr6b, $rk5 \n aesmc
$ctr6b, $ctr6b @ AES block
8k
+14 - round
5
5579 pmull2
$acc_m.1q
, $res0.2d
, $h78k.2d @ GHASH block
8k
- mid
5580 eor3
$acc_hb, $acc_hb, $t1.16b
, $t2.16b @ GHASH block
8k
+2, 8k
+3 - high
5582 aese
$ctr7b, $rk4 \n aesmc
$ctr7b, $ctr7b @ AES block
8k
+15 - round
4
5583 trn1
$t3.2d
, $res3.2d
, $res2.2d @ GHASH block
8k
+2, 8k
+3 - mid
5584 trn2
$res2.2d
, $res3.2d
, $res2.2d @ GHASH block
8k
+2, 8k
+3 - mid
5586 aese
$ctr5b, $rk4 \n aesmc
$ctr5b, $ctr5b @ AES block
8k
+13 - round
4
5587 eor
$acc_lb, $acc_lb, $h7.16b @ GHASH block
8k
+1 - low
5588 aese
$ctr3b, $rk4 \n aesmc
$ctr3b, $ctr3b @ AES block
8k
+11 - round
4
5590 pmull
$h5.1q
, $res3.1d
, $h5.1d @ GHASH block
8k
+3 - low
5591 pmull
$h78k.1q
, $res0.1d
, $h78k.1d @ GHASH block
8k
+1 - mid
5592 eor
$res2.16b
, $res2.16b
, $t3.16b @ GHASH block
8k
+2, 8k
+3 - mid
5594 rev64
$res4b, $res4b @ GHASH block
8k
+4
5595 aese
$ctr1b, $rk5 \n aesmc
$ctr1b, $ctr1b @ AES block
8k
+9 - round
5
5596 aese
$ctr0b, $rk5 \n aesmc
$ctr0b, $ctr0b @ AES block
8k
+8 - round
5
5598 aese
$ctr7b, $rk5 \n aesmc
$ctr7b, $ctr7b @ AES block
8k
+15 - round
5
5599 aese
$ctr4b, $rk5 \n aesmc
$ctr4b, $ctr4b @ AES block
8k
+12 - round
5
5600 ldp
$rk6q, $rk7q, [$cc, #96] @ load rk6, rk7
5602 ldr
$h3q, [$current_tag, #80] @ load h3l | h3h
5603 ext
$h3.16b
, $h3.16b
, $h3.16b
, #8
5604 ldr
$h4q, [$current_tag, #112] @ load h4l | h4h
5605 ext
$h4.16b
, $h4.16b
, $h4.16b
, #8
5606 pmull2
$t3.1q
, $res2.2d
, $h56k.2d @ GHASH block
8k
+2 - mid
5607 pmull
$h56k.1q
, $res2.1d
, $h56k.1d @ GHASH block
8k
+3 - mid
5609 eor3
$acc_lb, $acc_lb, $h6.16b
, $h5.16b @ GHASH block
8k
+2, 8k
+3 - low
5610 eor
$acc_mb, $acc_mb, $h78k.16b @ GHASH block
8k
+1 - mid
5612 aese
$ctr5b, $rk5 \n aesmc
$ctr5b, $ctr5b @ AES block
8k
+13 - round
5
5613 rev64
$res7b, $res7b @ GHASH block
8k
+7
5614 trn1
$t6.2d
, $res5.2d
, $res4.2d @ GHASH block
8k
+4, 8k
+5 - mid
5616 aese
$ctr3b, $rk5 \n aesmc
$ctr3b, $ctr3b @ AES block
8k
+11 - round
5
5617 aese
$ctr2b, $rk5 \n aesmc
$ctr2b, $ctr2b @ AES block
8k
+10 - round
5
5618 eor3
$acc_mb, $acc_mb, $h56k.16b
, $t3.16b @ GHASH block
8k
+2, 8k
+3 - mid
5620 aese
$ctr7b, $rk6 \n aesmc
$ctr7b, $ctr7b @ AES block
8k
+15 - round
6
5621 aese
$ctr4b, $rk6 \n aesmc
$ctr4b, $ctr4b @ AES block
8k
+12 - round
6
5622 aese
$ctr6b, $rk6 \n aesmc
$ctr6b, $ctr6b @ AES block
8k
+14 - round
6
5624 ldr
$h12kq, [$current_tag, #48] @ load h2k | h1k
5625 ldr
$h34kq, [$current_tag, #96] @ load h4k | h3k
5626 aese
$ctr5b, $rk6 \n aesmc
$ctr5b, $ctr5b @ AES block
8k
+13 - round
6
5627 aese
$ctr3b, $rk6 \n aesmc
$ctr3b, $ctr3b @ AES block
8k
+11 - round
6
5629 aese
$ctr0b, $rk6 \n aesmc
$ctr0b, $ctr0b @ AES block
8k
+8 - round
6
5630 aese
$ctr1b, $rk6 \n aesmc
$ctr1b, $ctr1b @ AES block
8k
+9 - round
6
5631 aese
$ctr2b, $rk6 \n aesmc
$ctr2b, $ctr2b @ AES block
8k
+10 - round
6
5633 pmull2
$t4.1q
, $res4.2d
, $h4.2d @ GHASH block
8k
+4 - high
5634 pmull
$h4.1q
, $res4.1d
, $h4.1d @ GHASH block
8k
+4 - low
5635 ldr
$h1q, [$current_tag, #32] @ load h1l | h1h
5636 ext
$h1.16b
, $h1.16b
, $h1.16b
, #8
5637 ldr
$h2q, [$current_tag, #64] @ load h1l | h1h
5638 ext
$h2.16b
, $h2.16b
, $h2.16b
, #8
5640 ldp
$rk8q, $rk9q, [$cc, #128] @ load rk8, rk9
5641 aese
$ctr1b, $rk7 \n aesmc
$ctr1b, $ctr1b @ AES block
8k
+9 - round
7
5642 aese
$ctr4b, $rk7 \n aesmc
$ctr4b, $ctr4b @ AES block
8k
+12 - round
7
5644 pmull2
$t5.1q
, $res5.2d
, $h3.2d @ GHASH block
8k
+5 - high
5645 trn2
$res4.2d
, $res5.2d
, $res4.2d @ GHASH block
8k
+4, 8k
+5 - mid
5647 aese
$ctr5b, $rk7 \n aesmc
$ctr5b, $ctr5b @ AES block
8k
+13 - round
7
5648 aese
$ctr6b, $rk7 \n aesmc
$ctr6b, $ctr6b @ AES block
8k
+14 - round
7
5649 pmull
$h3.1q
, $res5.1d
, $h3.1d @ GHASH block
8k
+5 - low
5651 aese
$ctr7b, $rk7 \n aesmc
$ctr7b, $ctr7b @ AES block
8k
+15 - round
7
5652 aese
$ctr3b, $rk7 \n aesmc
$ctr3b, $ctr3b @ AES block
8k
+11 - round
7
5653 eor
$res4.16b
, $res4.16b
, $t6.16b @ GHASH block
8k
+4, 8k
+5 - mid
5655 pmull2
$t7.1q
, $res6.2d
, $h2.2d @ GHASH block
8k
+6 - high
5656 pmull
$h2.1q
, $res6.1d
, $h2.1d @ GHASH block
8k
+6 - low
5657 aese
$ctr2b, $rk7 \n aesmc
$ctr2b, $ctr2b @ AES block
8k
+10 - round
7
5659 trn1
$t9.2d
, $res7.2d
, $res6.2d @ GHASH block
8k
+6, 8k
+7 - mid
5660 trn2
$res6.2d
, $res7.2d
, $res6.2d @ GHASH block
8k
+6, 8k
+7 - mid
5661 aese
$ctr0b, $rk7 \n aesmc
$ctr0b, $ctr0b @ AES block
8k
+8 - round
7
5663 aese
$ctr7b, $rk8 \n aesmc
$ctr7b, $ctr7b @ AES block
8k
+15 - round
8
5664 eor3
$acc_lb, $acc_lb, $h4.16b
, $h3.16b @ GHASH block
8k
+4, 8k
+5 - low
5665 aese
$ctr2b, $rk8 \n aesmc
$ctr2b, $ctr2b @ AES block
8k
+10 - round
8
5667 aese
$ctr6b, $rk8 \n aesmc
$ctr6b, $ctr6b @ AES block
8k
+14 - round
8
5668 aese
$ctr4b, $rk8 \n aesmc
$ctr4b, $ctr4b @ AES block
8k
+12 - round
8
5669 aese
$ctr3b, $rk8 \n aesmc
$ctr3b, $ctr3b @ AES block
8k
+11 - round
8
5671 aese
$ctr5b, $rk8 \n aesmc
$ctr5b, $ctr5b @ AES block
8k
+13 - round
8
5672 eor
$res6.16b
, $res6.16b
, $t9.16b @ GHASH block
8k
+6, 8k
+7 - mid
5673 aese
$ctr0b, $rk8 \n aesmc
$ctr0b, $ctr0b @ AES block
8k
+8 - round
8
5675 pmull2
$t6.1q
, $res4.2d
, $h34k.2d @ GHASH block
8k
+4 - mid
5676 pmull
$h34k.1q
, $res4.1d
, $h34k.1d @ GHASH block
8k
+5 - mid
5677 aese
$ctr1b, $rk8 \n aesmc
$ctr1b, $ctr1b @ AES block
8k
+9 - round
8
5679 pmull2
$t8.1q
, $res7.2d
, $h1.2d @ GHASH block
8k
+7 - high
5680 pmull2
$t9.1q
, $res6.2d
, $h12k.2d @ GHASH block
8k
+6 - mid
5681 pmull
$h12k.1q
, $res6.1d
, $h12k.1d @ GHASH block
8k
+7 - mid
5683 pmull
$h1.1q
, $res7.1d
, $h1.1d @ GHASH block
8k
+7 - low
5684 eor3
$acc_mb, $acc_mb, $h34k.16b
, $t6.16b @ GHASH block
8k
+4, 8k
+5 - mid
5685 eor3
$acc_hb, $acc_hb, $t4.16b
, $t5.16b @ GHASH block
8k
+4, 8k
+5 - high
5687 ldp
$rk10q, $rk11q, [$cc, #160] @ load rk10, rk11
5688 aese
$ctr1b, $rk9 \n aesmc
$ctr1b, $ctr1b @ AES block
8k
+9 - round
9
5689 aese
$ctr0b, $rk9 \n aesmc
$ctr0b, $ctr0b @ AES block
8k
+8 - round
9
5691 eor3
$acc_hb, $acc_hb, $t7.16b
, $t8.16b @ GHASH block
8k
+6, 8k
+7 - high
5692 eor3
$acc_mb, $acc_mb, $h12k.16b
, $t9.16b @ GHASH block
8k
+6, 8k
+7 - mid
5693 ldr
$mod_constantd, [$modulo_constant] @ MODULO
- load modulo constant
5695 eor3
$acc_lb, $acc_lb, $h2.16b
, $h1.16b @ GHASH block
8k
+6, 8k
+7 - low
5697 aese
$ctr3b, $rk9 \n aesmc
$ctr3b, $ctr3b @ AES block
8k
+11 - round
9
5698 aese
$ctr7b, $rk9 \n aesmc
$ctr7b, $ctr7b @ AES block
8k
+15 - round
9
5699 aese
$ctr5b, $rk9 \n aesmc
$ctr5b, $ctr5b @ AES block
8k
+13 - round
9
5701 aese
$ctr2b, $rk9 \n aesmc
$ctr2b, $ctr2b @ AES block
8k
+10 - round
9
5702 aese
$ctr6b, $rk9 \n aesmc
$ctr6b, $ctr6b @ AES block
8k
+14 - round
9
5704 aese
$ctr5b, $rk10 \n aesmc
$ctr5b, $ctr5b @ AES block
8k
+13 - round
10
5705 aese
$ctr1b, $rk10 \n aesmc
$ctr1b, $ctr1b @ AES block
8k
+9 - round
10
5706 aese
$ctr4b, $rk9 \n aesmc
$ctr4b, $ctr4b @ AES block
8k
+12 - round
9
5708 aese
$ctr7b, $rk10 \n aesmc
$ctr7b, $ctr7b @ AES block
8k
+15 - round
10
5709 aese
$ctr6b, $rk10 \n aesmc
$ctr6b, $ctr6b @ AES block
8k
+14 - round
10
5710 aese
$ctr3b, $rk10 \n aesmc
$ctr3b, $ctr3b @ AES block
8k
+11 - round
10
5712 aese
$ctr4b, $rk10 \n aesmc
$ctr4b, $ctr4b @ AES block
8k
+12 - round
10
5713 aese
$ctr0b, $rk10 \n aesmc
$ctr0b, $ctr0b @ AES block
8k
+8 - round
10
5714 aese
$ctr2b, $rk10 \n aesmc
$ctr2b, $ctr2b @ AES block
8k
+10 - round
10
5716 pmull
$t12.1q
, $acc_h.1d
, $mod_constant.1d @ MODULO
- top
64b align with mid
5717 eor3
$acc_mb, $acc_mb, $acc_hb, $acc_lb @ MODULO
- karatsuba tidy up
5718 aese
$ctr7b, $rk11 \n aesmc
$ctr7b, $ctr7b @ AES block
8k
+15 - round
11
5720 ldp
$rk12q, $rk13q, [$cc, #192] @ load rk12, rk13
5721 ext
$t11.16b
, $acc_hb, $acc_hb, #8 @ MODULO - other top alignment
5722 aese
$ctr2b, $rk11 \n aesmc
$ctr2b, $ctr2b @ AES block
8k
+10 - round
11
5724 eor3
$acc_mb, $acc_mb, $t12.16b
, $t11.16b @ MODULO
- fold into mid
5725 aese
$ctr1b, $rk11 \n aesmc
$ctr1b, $ctr1b @ AES block
8k
+9 - round
11
5726 aese
$ctr6b, $rk11 \n aesmc
$ctr6b, $ctr6b @ AES block
8k
+14 - round
11
5728 aese
$ctr0b, $rk11 \n aesmc
$ctr0b, $ctr0b @ AES block
8k
+8 - round
11
5729 aese
$ctr4b, $rk11 \n aesmc
$ctr4b, $ctr4b @ AES block
8k
+12 - round
11
5730 aese
$ctr5b, $rk11 \n aesmc
$ctr5b, $ctr5b @ AES block
8k
+13 - round
11
5732 pmull
$acc_h.1q
, $acc_m.1d
, $mod_constant.1d @ MODULO
- mid
64b align with low
5733 aese
$ctr3b, $rk11 \n aesmc
$ctr3b, $ctr3b @ AES block
8k
+11 - round
11
5734 ldr
$rk14q, [$cc, #224] @ load rk14
5736 aese
$ctr1b, $rk12 \n aesmc
$ctr1b, $ctr1b @ AES block
8k
+9 - round
12
5737 aese
$ctr2b, $rk12 \n aesmc
$ctr2b, $ctr2b @ AES block
8k
+10 - round
12
5738 aese
$ctr0b, $rk12 \n aesmc
$ctr0b, $ctr0b @ AES block
8k
+8 - round
12
5740 aese
$ctr6b, $rk12 \n aesmc
$ctr6b, $ctr6b @ AES block
8k
+14 - round
12
5741 aese
$ctr5b, $rk12 \n aesmc
$ctr5b, $ctr5b @ AES block
8k
+13 - round
12
5742 ext
$t11.16b
, $acc_mb, $acc_mb, #8 @ MODULO - other mid alignment
5744 aese
$ctr4b, $rk12 \n aesmc
$ctr4b, $ctr4b @ AES block
8k
+12 - round
12
5745 add
$rtmp_ctr.4s
, $rtmp_ctr.4s
, $rctr_inc.4s @ CTR block
8k
+15
5747 aese
$ctr3b, $rk12 \n aesmc
$ctr3b, $ctr3b @ AES block
8k
+11 - round
12
5748 aese
$ctr7b, $rk12 \n aesmc
$ctr7b, $ctr7b @ AES block
8k
+15 - round
12
5749 aese
$ctr0b, $rk13 @ AES block
8k
+8 - round
13
5751 eor3
$acc_lb, $acc_lb, $t11.16b
, $acc_hb @ MODULO
- fold into low
5752 aese
$ctr5b, $rk13 @ AES block
8k
+13 - round
13
5753 aese
$ctr1b, $rk13 @ AES block
8k
+9 - round
13
5755 aese
$ctr3b, $rk13 @ AES block
8k
+11 - round
13
5756 aese
$ctr4b, $rk13 @ AES block
8k
+12 - round
13
5757 aese
$ctr7b, $rk13 @ AES block
8k
+15 - round
13
5759 aese
$ctr2b, $rk13 @ AES block
8k
+10 - round
13
5760 aese
$ctr6b, $rk13 @ AES block
8k
+14 - round
13
5761 .L256_enc_tail
: @ TAIL
5763 ldp
$h78kq, $h8q, [$current_tag, #192] @ load h8l | h8h
5764 ext
$h8.16b
, $h8.16b
, $h8.16b
, #8
5765 sub $main_end_input_ptr, $end_input_ptr, $input_ptr @ main_end_input_ptr is number of bytes left to process
5767 ldr
$ctr_t0q, [$input_ptr], #16 @ AES block 8k+8 - load plaintext
5769 ldp
$h5q, $h56kq, [$current_tag, #128] @ load h5l | h5h
5770 ext
$h5.16b
, $h5.16b
, $h5.16b
, #8
5772 ext
$t0.16b
, $acc_lb, $acc_lb, #8 @ prepare final partial tag
5773 ldp
$h6q, $h7q, [$current_tag, #160] @ load h6l | h6h
5774 ext
$h6.16b
, $h6.16b
, $h6.16b
, #8
5775 ext
$h7.16b
, $h7.16b
, $h7.16b
, #8
5778 cmp $main_end_input_ptr, #112
5779 eor3
$res1b, $ctr_t0b, $ctr0b, $t1.16b @ AES block
8k
+8 - result
5780 b
.gt .L256_enc_blocks_more_than_7
5791 sub $rtmp_ctr.4s
, $rtmp_ctr.4s
, $rctr_inc.4s
5795 cmp $main_end_input_ptr, #96
5796 b
.gt .L256_enc_blocks_more_than_6
5800 cmp $main_end_input_ptr, #80
5806 sub $rtmp_ctr.4s
, $rtmp_ctr.4s
, $rctr_inc.4s
5807 b
.gt .L256_enc_blocks_more_than_5
5810 sub $rtmp_ctr.4s
, $rtmp_ctr.4s
, $rctr_inc.4s
5815 cmp $main_end_input_ptr, #64
5817 b
.gt .L256_enc_blocks_more_than_4
5819 cmp $main_end_input_ptr, #48
5824 sub $rtmp_ctr.4s
, $rtmp_ctr.4s
, $rctr_inc.4s
5825 b
.gt .L256_enc_blocks_more_than_3
5827 cmp $main_end_input_ptr, #32
5829 ldr
$h34kq, [$current_tag, #96] @ load h4k | h3k
5832 sub $rtmp_ctr.4s
, $rtmp_ctr.4s
, $rctr_inc.4s
5833 b
.gt .L256_enc_blocks_more_than_2
5837 sub $rtmp_ctr.4s
, $rtmp_ctr.4s
, $rctr_inc.4s
5838 cmp $main_end_input_ptr, #16
5839 b
.gt .L256_enc_blocks_more_than_1
5841 sub $rtmp_ctr.4s
, $rtmp_ctr.4s
, $rctr_inc.4s
5842 ldr
$h12kq, [$current_tag, #48] @ load h2k | h1k
5843 b
.L256_enc_blocks_less_than_1
5844 .L256_enc_blocks_more_than_7
: @ blocks left
> 7
5845 st1
{ $res1b}, [$output_ptr], #16 @ AES final-7 block - store result
5847 rev64
$res0b, $res1b @ GHASH final
-7 block
5849 eor
$res0b, $res0b, $t0.16b @ feed
in partial tag
5851 ldr
$ctr_t1q, [$input_ptr], #16 @ AES final-6 block - load plaintext
5853 pmull2
$acc_h.1q
, $res0.2d
, $h8.2d @ GHASH final
-7 block
- high
5854 ins
$rk4v.d
[0], $res0.d
[1] @ GHASH final
-7 block
- mid
5855 ins
$acc_m.d
[0], $h78k.d
[1] @ GHASH final
-7 block
- mid
5857 movi
$t0.8b
, #0 @ supress further partial tag feed in
5859 eor
$rk4v.8b
, $rk4v.8b
, $res0.8b @ GHASH final
-7 block
- mid
5860 eor3
$res1b, $ctr_t1b, $ctr1b, $t1.16b @ AES final
-6 block
- result
5862 pmull
$acc_m.1q
, $rk4v.1d
, $acc_m.1d @ GHASH final
-7 block
- mid
5863 pmull
$acc_l.1q
, $res0.1d
, $h8.1d @ GHASH final
-7 block
- low
5864 .L256_enc_blocks_more_than_6
: @ blocks left
> 6
5866 st1
{ $res1b}, [$output_ptr], #16 @ AES final-6 block - store result
5868 rev64
$res0b, $res1b @ GHASH final
-6 block
5870 eor
$res0b, $res0b, $t0.16b @ feed
in partial tag
5872 pmull
$rk3q1, $res0.1d
, $h7.1d @ GHASH final
-6 block
- low
5873 ins
$rk4v.d
[0], $res0.d
[1] @ GHASH final
-6 block
- mid
5874 pmull2
$rk2q1, $res0.2d
, $h7.2d @ GHASH final
-6 block
- high
5876 ldr
$ctr_t1q, [$input_ptr], #16 @ AES final-5 block - load plaintext
5878 eor
$acc_lb, $acc_lb, $rk3 @ GHASH final
-6 block
- low
5880 eor
$rk4v.8b
, $rk4v.8b
, $res0.8b @ GHASH final
-6 block
- mid
5882 pmull
$rk4v.1q
, $rk4v.1d
, $h78k.1d @ GHASH final
-6 block
- mid
5883 eor3
$res1b, $ctr_t1b, $ctr2b, $t1.16b @ AES final
-5 block
- result
5885 movi
$t0.8b
, #0 @ supress further partial tag feed in
5887 eor
$acc_mb, $acc_mb, $rk4v.16b @ GHASH final
-6 block
- mid
5888 eor
$acc_hb, $acc_hb, $rk2 @ GHASH final
-6 block
- high
5889 .L256_enc_blocks_more_than_5
: @ blocks left
> 5
5891 st1
{ $res1b}, [$output_ptr], #16 @ AES final-5 block - store result
5893 rev64
$res0b, $res1b @ GHASH final
-5 block
5895 eor
$res0b, $res0b, $t0.16b @ feed
in partial tag
5897 ins
$rk4v.d
[0], $res0.d
[1] @ GHASH final
-5 block
- mid
5899 pmull2
$rk2q1, $res0.2d
, $h6.2d @ GHASH final
-5 block
- high
5901 eor
$acc_hb, $acc_hb, $rk2 @ GHASH final
-5 block
- high
5902 eor
$rk4v.8b
, $rk4v.8b
, $res0.8b @ GHASH final
-5 block
- mid
5904 ins
$rk4v.d
[1], $rk4v.d
[0] @ GHASH final
-5 block
- mid
5906 ldr
$ctr_t1q, [$input_ptr], #16 @ AES final-4 block - load plaintext
5907 pmull
$rk3q1, $res0.1d
, $h6.1d @ GHASH final
-5 block
- low
5909 pmull2
$rk4v.1q
, $rk4v.2d
, $h56k.2d @ GHASH final
-5 block
- mid
5910 movi
$t0.8b
, #0 @ supress further partial tag feed in
5911 eor
$acc_lb, $acc_lb, $rk3 @ GHASH final
-5 block
- low
5913 eor
$acc_mb, $acc_mb, $rk4v.16b @ GHASH final
-5 block
- mid
5914 eor3
$res1b, $ctr_t1b, $ctr3b, $t1.16b @ AES final
-4 block
- result
5915 .L256_enc_blocks_more_than_4
: @ blocks left
> 4
5917 st1
{ $res1b}, [$output_ptr], #16 @ AES final-4 block - store result
5919 rev64
$res0b, $res1b @ GHASH final
-4 block
5921 ldr
$ctr_t1q, [$input_ptr], #16 @ AES final-3 block - load plaintext
5923 eor
$res0b, $res0b, $t0.16b @ feed
in partial tag
5925 ins
$rk4v.d
[0], $res0.d
[1] @ GHASH final
-4 block
- mid
5926 pmull2
$rk2q1, $res0.2d
, $h5.2d @ GHASH final
-4 block
- high
5928 eor3
$res1b, $ctr_t1b, $ctr4b, $t1.16b @ AES final
-3 block
- result
5929 pmull
$rk3q1, $res0.1d
, $h5.1d @ GHASH final
-4 block
- low
5931 eor
$rk4v.8b
, $rk4v.8b
, $res0.8b @ GHASH final
-4 block
- mid
5932 eor
$acc_lb, $acc_lb, $rk3 @ GHASH final
-4 block
- low
5934 pmull
$rk4v.1q
, $rk4v.1d
, $h56k.1d @ GHASH final
-4 block
- mid
5936 movi
$t0.8b
, #0 @ supress further partial tag feed in
5938 eor
$acc_mb, $acc_mb, $rk4v.16b @ GHASH final
-4 block
- mid
5939 eor
$acc_hb, $acc_hb, $rk2 @ GHASH final
-4 block
- high
5940 .L256_enc_blocks_more_than_3
: @ blocks left
> 3
5942 st1
{ $res1b}, [$output_ptr], #16 @ AES final-3 block - store result
5944 ldr
$h4q, [$current_tag, #112] @ load h4l | h4h
5945 ext
$h4.16b
, $h4.16b
, $h4.16b
, #8
5946 rev64
$res0b, $res1b @ GHASH final
-3 block
5948 eor
$res0b, $res0b, $t0.16b @ feed
in partial tag
5950 ins
$rk4v.d
[0], $res0.d
[1] @ GHASH final
-3 block
- mid
5951 pmull2
$rk2q1, $res0.2d
, $h4.2d @ GHASH final
-3 block
- high
5953 eor
$acc_hb, $acc_hb, $rk2 @ GHASH final
-3 block
- high
5954 eor
$rk4v.8b
, $rk4v.8b
, $res0.8b @ GHASH final
-3 block
- mid
5955 ldr
$h34kq, [$current_tag, #96] @ load h4k | h3k
5957 ins
$rk4v.d
[1], $rk4v.d
[0] @ GHASH final
-3 block
- mid
5958 ldr
$ctr_t1q, [$input_ptr], #16 @ AES final-2 block - load plaintext
5960 pmull2
$rk4v.1q
, $rk4v.2d
, $h34k.2d @ GHASH final
-3 block
- mid
5961 pmull
$rk3q1, $res0.1d
, $h4.1d @ GHASH final
-3 block
- low
5963 eor3
$res1b, $ctr_t1b, $ctr5b, $t1.16b @ AES final
-2 block
- result
5964 movi
$t0.8b
, #0 @ supress further partial tag feed in
5966 eor
$acc_mb, $acc_mb, $rk4v.16b @ GHASH final
-3 block
- mid
5967 eor
$acc_lb, $acc_lb, $rk3 @ GHASH final
-3 block
- low
5968 .L256_enc_blocks_more_than_2
: @ blocks left
> 2
5970 ldr
$h3q, [$current_tag, #80] @ load h3l | h3h
5971 ext
$h3.16b
, $h3.16b
, $h3.16b
, #8
5973 st1
{ $res1b}, [$output_ptr], #16 @ AES final-2 block - store result
5975 rev64
$res0b, $res1b @ GHASH final
-2 block
5976 ldr
$ctr_t1q, [$input_ptr], #16 @ AES final-1 block - load plaintext
5978 eor
$res0b, $res0b, $t0.16b @ feed
in partial tag
5980 ins
$rk4v.d
[0], $res0.d
[1] @ GHASH final
-2 block
- mid
5982 movi
$t0.8b
, #0 @ supress further partial tag feed in
5984 pmull2
$rk2q1, $res0.2d
, $h3.2d @ GHASH final
-2 block
- high
5985 eor3
$res1b, $ctr_t1b, $ctr6b, $t1.16b @ AES final
-1 block
- result
5987 eor
$rk4v.8b
, $rk4v.8b
, $res0.8b @ GHASH final
-2 block
- mid
5989 eor
$acc_hb, $acc_hb, $rk2 @ GHASH final
-2 block
- high
5991 pmull
$rk4v.1q
, $rk4v.1d
, $h34k.1d @ GHASH final
-2 block
- mid
5992 pmull
$rk3q1, $res0.1d
, $h3.1d @ GHASH final
-2 block
- low
5994 eor
$acc_mb, $acc_mb, $rk4v.16b @ GHASH final
-2 block
- mid
5995 eor
$acc_lb, $acc_lb, $rk3 @ GHASH final
-2 block
- low
5996 .L256_enc_blocks_more_than_1
: @ blocks left
> 1
5998 st1
{ $res1b}, [$output_ptr], #16 @ AES final-1 block - store result
6000 ldr
$h2q, [$current_tag, #64] @ load h1l | h1h
6001 ext
$h2.16b
, $h2.16b
, $h2.16b
, #8
6002 rev64
$res0b, $res1b @ GHASH final
-1 block
6003 ldr
$ctr_t1q, [$input_ptr], #16 @ AES final block - load plaintext
6005 eor
$res0b, $res0b, $t0.16b @ feed
in partial tag
6006 movi
$t0.8b
, #0 @ supress further partial tag feed in
6008 ins
$rk4v.d
[0], $res0.d
[1] @ GHASH final
-1 block
- mid
6009 pmull2
$rk2q1, $res0.2d
, $h2.2d @ GHASH final
-1 block
- high
6011 eor3
$res1b, $ctr_t1b, $ctr7b, $t1.16b @ AES final block
- result
6012 eor
$acc_hb, $acc_hb, $rk2 @ GHASH final
-1 block
- high
6014 pmull
$rk3q1, $res0.1d
, $h2.1d @ GHASH final
-1 block
- low
6015 eor
$rk4v.8b
, $rk4v.8b
, $res0.8b @ GHASH final
-1 block
- mid
6017 ldr
$h12kq, [$current_tag, #48] @ load h2k | h1k
6019 eor
$acc_lb, $acc_lb, $rk3 @ GHASH final
-1 block
- low
6020 ins
$rk4v.d
[1], $rk4v.d
[0] @ GHASH final
-1 block
- mid
6022 pmull2
$rk4v.1q
, $rk4v.2d
, $h12k.2d @ GHASH final
-1 block
- mid
6024 eor
$acc_mb, $acc_mb, $rk4v.16b @ GHASH final
-1 block
- mid
6025 .L256_enc_blocks_less_than_1
: @ blocks left
<= 1
6027 and $bit_length, $bit_length, #127 @ bit_length %= 128
6029 sub $bit_length, $bit_length, #128 @ bit_length -= 128
6031 neg
$bit_length, $bit_length @ bit_length
= 128 - #bits in input (in range [1,128])
6033 mvn
$temp0_x, xzr @ temp0_x
= 0xffffffffffffffff
6034 and $bit_length, $bit_length, #127 @ bit_length %= 128
6036 lsr
$temp0_x, $temp0_x, $bit_length @ temp0_x is mask
for top
64b of
last block
6037 cmp $bit_length, #64
6038 mvn
$temp1_x, xzr @ temp1_x
= 0xffffffffffffffff
6040 csel
$temp3_x, $temp0_x, xzr
, lt
6041 csel
$temp2_x, $temp1_x, $temp0_x, lt
6043 mov
$ctr0.d
[0], $temp2_x @ ctr0b is mask
for last block
6044 ldr
$h1q, [$current_tag, #32] @ load h1l | h1h
6045 ext
$h1.16b
, $h1.16b
, $h1.16b
, #8
6047 ld1
{ $rk0}, [$output_ptr] @ load existing bytes where the possibly partial
last block is to be stored
6048 mov
$ctr0.d
[1], $temp3_x
6050 and $res1b, $res1b, $ctr0b @ possibly partial
last block has zeroes
in highest bits
6052 rev64
$res0b, $res1b @ GHASH final block
6054 rev32
$rtmp_ctr.16b
, $rtmp_ctr.16b
6055 bif
$res1b, $rk0, $ctr0b @ insert existing bytes
in top end of result before storing
6056 str
$rtmp_ctrq, [$counter] @ store the updated counter
6058 eor
$res0b, $res0b, $t0.16b @ feed
in partial tag
6059 st1
{ $res1b}, [$output_ptr] @ store all
16B
6061 ins
$t0.d
[0], $res0.d
[1] @ GHASH final block
- mid
6062 pmull2
$rk2q1, $res0.2d
, $h1.2d @ GHASH final block
- high
6063 pmull
$rk3q1, $res0.1d
, $h1.1d @ GHASH final block
- low
6065 eor
$acc_hb, $acc_hb, $rk2 @ GHASH final block
- high
6066 eor
$acc_lb, $acc_lb, $rk3 @ GHASH final block
- low
6068 eor
$t0.8b
, $t0.8b
, $res0.8b @ GHASH final block
- mid
6070 pmull
$t0.1q
, $t0.1d
, $h12k.1d @ GHASH final block
- mid
6072 eor
$acc_mb, $acc_mb, $t0.16b @ GHASH final block
- mid
6073 ldr
$mod_constantd, [$modulo_constant] @ MODULO
- load modulo constant
6075 ext
$t11.16b
, $acc_hb, $acc_hb, #8 @ MODULO - other top alignment
6077 eor3
$acc_mb, $acc_mb, $acc_hb, $acc_lb @ MODULO
- karatsuba tidy up
6078 pmull
$t12.1q
, $acc_h.1d
, $mod_constant.1d @ MODULO
- top
64b align with mid
6080 eor3
$acc_mb, $acc_mb, $t12.16b
, $t11.16b @ MODULO
- fold into mid
6082 pmull
$acc_h.1q
, $acc_m.1d
, $mod_constant.1d @ MODULO
- mid
64b align with low
6083 ext
$t11.16b
, $acc_mb, $acc_mb, #8 @ MODULO - other mid alignment
6085 eor3
$acc_lb, $acc_lb, $acc_hb, $t11.16b @ MODULO
- fold into low
6086 ext
$acc_lb, $acc_lb, $acc_lb, #8
6087 rev64
$acc_lb, $acc_lb
6088 st1
{ $acc_l.16b
}, [$current_tag]
6089 lsr x0
, $bit_length, #3 @ return sizes
6091 ldp d10
, d11
, [sp
, #16]
6092 ldp d12
, d13
, [sp
, #32]
6093 ldp d14
, d15
, [sp
, #48]
6094 ldp d8
, d9
, [sp
], #80
6100 .size unroll8_eor3_aes_gcm_enc_256_kernel
,.-unroll8_eor3_aes_gcm_enc_256_kernel
6104 #########################################################################################
6105 # size_t unroll8_eor3_aes_gcm_dec_256_kernel(const unsigned char *in,
6107 # unsigned char *out,
6109 # unsigned char ivec[16],
6113 .global unroll8_eor3_aes_gcm_dec_256_kernel
6114 .type unroll8_eor3_aes_gcm_dec_256_kernel
,%function
6116 unroll8_eor3_aes_gcm_dec_256_kernel
:
6117 AARCH64_VALID_CALL_TARGET
6118 cbz x1
, .L256_dec_ret
6119 stp d8
, d9
, [sp
, #-80]!
6122 stp d10
, d11
, [sp
, #16]
6123 stp d12
, d13
, [sp
, #32]
6124 stp d14
, d15
, [sp
, #48]
6125 mov x5
, #0xc200000000000000
6126 stp x5
, xzr
, [sp
, #64]
6127 add
$modulo_constant, sp
, #64
6129 ld1
{ $ctr0b}, [$counter] @ CTR block
0
6131 mov
$constant_temp, #0x100000000 @ set up counter increment
6132 movi
$rctr_inc.16b
, #0x0
6133 mov
$rctr_inc.d
[1], $constant_temp
6134 lsr
$main_end_input_ptr, $bit_length, #3 @ byte_len
6136 sub $main_end_input_ptr, $main_end_input_ptr, #1 @ byte_len - 1
6138 rev32
$rtmp_ctr.16b
, $ctr0.16b @ set up reversed counter
6140 add
$rtmp_ctr.4s
, $rtmp_ctr.4s
, $rctr_inc.4s @ CTR block
0
6142 rev32
$ctr1.16b
, $rtmp_ctr.16b @ CTR block
1
6143 add
$rtmp_ctr.4s
, $rtmp_ctr.4s
, $rctr_inc.4s @ CTR block
1
6145 rev32
$ctr2.16b
, $rtmp_ctr.16b @ CTR block
2
6146 add
$rtmp_ctr.4s
, $rtmp_ctr.4s
, $rctr_inc.4s @ CTR block
2
6147 ldp
$rk0q, $rk1q, [$cc, #0] @ load rk0, rk1
6149 rev32
$ctr3.16b
, $rtmp_ctr.16b @ CTR block
3
6150 add
$rtmp_ctr.4s
, $rtmp_ctr.4s
, $rctr_inc.4s @ CTR block
3
6152 rev32
$ctr4.16b
, $rtmp_ctr.16b @ CTR block
4
6153 add
$rtmp_ctr.4s
, $rtmp_ctr.4s
, $rctr_inc.4s @ CTR block
4
6155 aese
$ctr0b, $rk0 \n aesmc
$ctr0b, $ctr0b @ AES block
0 - round
0
6157 rev32
$ctr5.16b
, $rtmp_ctr.16b @ CTR block
5
6158 add
$rtmp_ctr.4s
, $rtmp_ctr.4s
, $rctr_inc.4s @ CTR block
5
6160 aese
$ctr1b, $rk0 \n aesmc
$ctr1b, $ctr1b @ AES block
1 - round
0
6161 aese
$ctr2b, $rk0 \n aesmc
$ctr2b, $ctr2b @ AES block
2 - round
0
6163 rev32
$ctr6.16b
, $rtmp_ctr.16b @ CTR block
6
6164 add
$rtmp_ctr.4s
, $rtmp_ctr.4s
, $rctr_inc.4s @ CTR block
6
6166 rev32
$ctr7.16b
, $rtmp_ctr.16b @ CTR block
7
6167 aese
$ctr4b, $rk0 \n aesmc
$ctr4b, $ctr4b @ AES block
4 - round
0
6169 aese
$ctr6b, $rk0 \n aesmc
$ctr6b, $ctr6b @ AES block
6 - round
0
6170 aese
$ctr5b, $rk0 \n aesmc
$ctr5b, $ctr5b @ AES block
5 - round
0
6172 aese
$ctr3b, $rk0 \n aesmc
$ctr3b, $ctr3b @ AES block
3 - round
0
6173 aese
$ctr7b, $rk0 \n aesmc
$ctr7b, $ctr7b @ AES block
7 - round
0
6174 ldp
$rk2q, $rk3q, [$cc, #32] @ load rk2, rk3
6176 aese
$ctr6b, $rk1 \n aesmc
$ctr6b, $ctr6b @ AES block
6 - round
1
6177 aese
$ctr4b, $rk1 \n aesmc
$ctr4b, $ctr4b @ AES block
4 - round
1
6178 aese
$ctr0b, $rk1 \n aesmc
$ctr0b, $ctr0b @ AES block
0 - round
1
6180 aese
$ctr5b, $rk1 \n aesmc
$ctr5b, $ctr5b @ AES block
5 - round
1
6181 aese
$ctr7b, $rk1 \n aesmc
$ctr7b, $ctr7b @ AES block
7 - round
1
6182 aese
$ctr1b, $rk1 \n aesmc
$ctr1b, $ctr1b @ AES block
1 - round
1
6184 aese
$ctr2b, $rk1 \n aesmc
$ctr2b, $ctr2b @ AES block
2 - round
1
6185 aese
$ctr3b, $rk1 \n aesmc
$ctr3b, $ctr3b @ AES block
3 - round
1
6187 aese
$ctr3b, $rk2 \n aesmc
$ctr3b, $ctr3b @ AES block
3 - round
2
6188 aese
$ctr2b, $rk2 \n aesmc
$ctr2b, $ctr2b @ AES block
2 - round
2
6189 aese
$ctr6b, $rk2 \n aesmc
$ctr6b, $ctr6b @ AES block
6 - round
2
6191 aese
$ctr1b, $rk2 \n aesmc
$ctr1b, $ctr1b @ AES block
1 - round
2
6192 aese
$ctr7b, $rk2 \n aesmc
$ctr7b, $ctr7b @ AES block
7 - round
2
6193 aese
$ctr5b, $rk2 \n aesmc
$ctr5b, $ctr5b @ AES block
5 - round
2
6195 aese
$ctr0b, $rk2 \n aesmc
$ctr0b, $ctr0b @ AES block
0 - round
2
6196 aese
$ctr4b, $rk2 \n aesmc
$ctr4b, $ctr4b @ AES block
4 - round
2
6197 ldp
$rk4q, $rk5q, [$cc, #64] @ load rk4, rk5
6199 aese
$ctr1b, $rk3 \n aesmc
$ctr1b, $ctr1b @ AES block
1 - round
3
6200 aese
$ctr2b, $rk3 \n aesmc
$ctr2b, $ctr2b @ AES block
2 - round
3
6202 aese
$ctr3b, $rk3 \n aesmc
$ctr3b, $ctr3b @ AES block
3 - round
3
6203 aese
$ctr4b, $rk3 \n aesmc
$ctr4b, $ctr4b @ AES block
4 - round
3
6205 aese
$ctr5b, $rk3 \n aesmc
$ctr5b, $ctr5b @ AES block
5 - round
3
6206 aese
$ctr7b, $rk3 \n aesmc
$ctr7b, $ctr7b @ AES block
7 - round
3
6207 aese
$ctr0b, $rk3 \n aesmc
$ctr0b, $ctr0b @ AES block
0 - round
3
6209 aese
$ctr6b, $rk3 \n aesmc
$ctr6b, $ctr6b @ AES block
6 - round
3
6211 aese
$ctr7b, $rk4 \n aesmc
$ctr7b, $ctr7b @ AES block
7 - round
4
6212 aese
$ctr3b, $rk4 \n aesmc
$ctr3b, $ctr3b @ AES block
3 - round
4
6214 aese
$ctr6b, $rk4 \n aesmc
$ctr6b, $ctr6b @ AES block
6 - round
4
6215 aese
$ctr2b, $rk4 \n aesmc
$ctr2b, $ctr2b @ AES block
2 - round
4
6216 aese
$ctr0b, $rk4 \n aesmc
$ctr0b, $ctr0b @ AES block
0 - round
4
6218 aese
$ctr4b, $rk4 \n aesmc
$ctr4b, $ctr4b @ AES block
4 - round
4
6219 aese
$ctr1b, $rk4 \n aesmc
$ctr1b, $ctr1b @ AES block
1 - round
4
6220 aese
$ctr5b, $rk4 \n aesmc
$ctr5b, $ctr5b @ AES block
5 - round
4
6222 aese
$ctr0b, $rk5 \n aesmc
$ctr0b, $ctr0b @ AES block
0 - round
5
6223 aese
$ctr6b, $rk5 \n aesmc
$ctr6b, $ctr6b @ AES block
6 - round
5
6225 ldp
$rk6q, $rk7q, [$cc, #96] @ load rk6, rk7
6226 aese
$ctr4b, $rk5 \n aesmc
$ctr4b, $ctr4b @ AES block
4 - round
5
6227 aese
$ctr7b, $rk5 \n aesmc
$ctr7b, $ctr7b @ AES block
7 - round
5
6229 aese
$ctr5b, $rk5 \n aesmc
$ctr5b, $ctr5b @ AES block
5 - round
5
6231 aese
$ctr2b, $rk5 \n aesmc
$ctr2b, $ctr2b @ AES block
2 - round
5
6232 aese
$ctr3b, $rk5 \n aesmc
$ctr3b, $ctr3b @ AES block
3 - round
5
6234 aese
$ctr1b, $rk5 \n aesmc
$ctr1b, $ctr1b @ AES block
1 - round
5
6236 aese
$ctr4b, $rk6 \n aesmc
$ctr4b, $ctr4b @ AES block
4 - round
6
6237 aese
$ctr3b, $rk6 \n aesmc
$ctr3b, $ctr3b @ AES block
3 - round
6
6238 aese
$ctr7b, $rk6 \n aesmc
$ctr7b, $ctr7b @ AES block
7 - round
6
6240 aese
$ctr6b, $rk6 \n aesmc
$ctr6b, $ctr6b @ AES block
6 - round
6
6241 aese
$ctr0b, $rk6 \n aesmc
$ctr0b, $ctr0b @ AES block
0 - round
6
6242 aese
$ctr5b, $rk6 \n aesmc
$ctr5b, $ctr5b @ AES block
5 - round
6
6244 aese
$ctr2b, $rk6 \n aesmc
$ctr2b, $ctr2b @ AES block
2 - round
6
6245 aese
$ctr1b, $rk6 \n aesmc
$ctr1b, $ctr1b @ AES block
1 - round
6
6246 ldp
$rk8q, $rk9q, [$cc, #128] @ load rk8, rk9
6248 aese
$ctr5b, $rk7 \n aesmc
$ctr5b, $ctr5b @ AES block
5 - round
7
6249 aese
$ctr0b, $rk7 \n aesmc
$ctr0b, $ctr0b @ AES block
0 - round
7
6251 aese
$ctr3b, $rk7 \n aesmc
$ctr3b, $ctr3b @ AES block
3 - round
7
6252 aese
$ctr2b, $rk7 \n aesmc
$ctr2b, $ctr2b @ AES block
2 - round
7
6253 aese
$ctr7b, $rk7 \n aesmc
$ctr7b, $ctr7b @ AES block
7 - round
7
6255 aese
$ctr4b, $rk7 \n aesmc
$ctr4b, $ctr4b @ AES block
4 - round
7
6256 aese
$ctr1b, $rk7 \n aesmc
$ctr1b, $ctr1b @ AES block
1 - round
7
6257 aese
$ctr6b, $rk7 \n aesmc
$ctr6b, $ctr6b @ AES block
6 - round
7
6259 and $main_end_input_ptr, $main_end_input_ptr, #0xffffffffffffff80 @ number of bytes to be processed in main loop (at least 1 byte must be handled by tail)
6260 aese
$ctr7b, $rk8 \n aesmc
$ctr7b, $ctr7b @ AES block
7 - round
8
6261 aese
$ctr5b, $rk8 \n aesmc
$ctr5b, $ctr5b @ AES block
5 - round
8
6263 aese
$ctr0b, $rk8 \n aesmc
$ctr0b, $ctr0b @ AES block
0 - round
8
6264 aese
$ctr1b, $rk8 \n aesmc
$ctr1b, $ctr1b @ AES block
1 - round
8
6265 aese
$ctr2b, $rk8 \n aesmc
$ctr2b, $ctr2b @ AES block
2 - round
8
6267 aese
$ctr4b, $rk8 \n aesmc
$ctr4b, $ctr4b @ AES block
4 - round
8
6268 aese
$ctr3b, $rk8 \n aesmc
$ctr3b, $ctr3b @ AES block
3 - round
8
6269 aese
$ctr6b, $rk8 \n aesmc
$ctr6b, $ctr6b @ AES block
6 - round
8
6271 aese
$ctr2b, $rk9 \n aesmc
$ctr2b, $ctr2b @ AES block
2 - round
9
6273 ld1
{ $acc_lb}, [$current_tag]
6274 ext
$acc_lb, $acc_lb, $acc_lb, #8
6275 rev64
$acc_lb, $acc_lb
6276 ldp
$rk10q, $rk11q, [$cc, #160] @ load rk10, rk11
6277 add
$end_input_ptr, $input_ptr, $bit_length, lsr
#3 @ end_input_ptr
6278 add
$main_end_input_ptr, $main_end_input_ptr, $input_ptr
6280 aese
$ctr3b, $rk9 \n aesmc
$ctr3b, $ctr3b @ AES block
3 - round
9
6281 aese
$ctr6b, $rk9 \n aesmc
$ctr6b, $ctr6b @ AES block
6 - round
9
6283 aese
$ctr4b, $rk9 \n aesmc
$ctr4b, $ctr4b @ AES block
4 - round
9
6284 aese
$ctr5b, $rk9 \n aesmc
$ctr5b, $ctr5b @ AES block
5 - round
9
6286 aese
$ctr7b, $rk9 \n aesmc
$ctr7b, $ctr7b @ AES block
7 - round
9
6288 aese
$ctr0b, $rk9 \n aesmc
$ctr0b, $ctr0b @ AES block
0 - round
9
6289 aese
$ctr1b, $rk9 \n aesmc
$ctr1b, $ctr1b @ AES block
1 - round
9
6291 aese
$ctr4b, $rk10 \n aesmc
$ctr4b, $ctr4b @ AES block
4 - round
10
6292 aese
$ctr7b, $rk10 \n aesmc
$ctr7b, $ctr7b @ AES block
7 - round
10
6293 aese
$ctr5b, $rk10 \n aesmc
$ctr5b, $ctr5b @ AES block
5 - round
10
6295 aese
$ctr1b, $rk10 \n aesmc
$ctr1b, $ctr1b @ AES block
1 - round
10
6296 aese
$ctr2b, $rk10 \n aesmc
$ctr2b, $ctr2b @ AES block
2 - round
10
6297 aese
$ctr0b, $rk10 \n aesmc
$ctr0b, $ctr0b @ AES block
0 - round
10
6299 aese
$ctr6b, $rk10 \n aesmc
$ctr6b, $ctr6b @ AES block
6 - round
10
6300 aese
$ctr3b, $rk10 \n aesmc
$ctr3b, $ctr3b @ AES block
3 - round
10
6301 ldp
$rk12q, $rk13q, [$cc, #192] @ load rk12, rk13
6303 aese
$ctr0b, $rk11 \n aesmc
$ctr0b, $ctr0b @ AES block
0 - round
11
6304 add
$rtmp_ctr.4s
, $rtmp_ctr.4s
, $rctr_inc.4s @ CTR block
7
6306 aese
$ctr7b, $rk11 \n aesmc
$ctr7b, $ctr7b @ AES block
7 - round
11
6307 aese
$ctr3b, $rk11 \n aesmc
$ctr3b, $ctr3b @ AES block
3 - round
11
6308 aese
$ctr1b, $rk11 \n aesmc
$ctr1b, $ctr1b @ AES block
1 - round
11
6310 aese
$ctr5b, $rk11 \n aesmc
$ctr5b, $ctr5b @ AES block
5 - round
11
6311 aese
$ctr4b, $rk11 \n aesmc
$ctr4b, $ctr4b @ AES block
4 - round
11
6312 aese
$ctr2b, $rk11 \n aesmc
$ctr2b, $ctr2b @ AES block
2 - round
11
6314 aese
$ctr6b, $rk11 \n aesmc
$ctr6b, $ctr6b @ AES block
6 - round
11
6315 ldr
$rk14q, [$cc, #224] @ load rk14
6317 aese
$ctr1b, $rk12 \n aesmc
$ctr1b, $ctr1b @ AES block
1 - round
12
6318 aese
$ctr4b, $rk12 \n aesmc
$ctr4b, $ctr4b @ AES block
4 - round
12
6319 aese
$ctr5b, $rk12 \n aesmc
$ctr5b, $ctr5b @ AES block
5 - round
12
6321 cmp $input_ptr, $main_end_input_ptr @ check
if we have
<= 8 blocks
6322 aese
$ctr3b, $rk12 \n aesmc
$ctr3b, $ctr3b @ AES block
3 - round
12
6323 aese
$ctr2b, $rk12 \n aesmc
$ctr2b, $ctr2b @ AES block
2 - round
12
6325 aese
$ctr6b, $rk12 \n aesmc
$ctr6b, $ctr6b @ AES block
6 - round
12
6326 aese
$ctr0b, $rk12 \n aesmc
$ctr0b, $ctr0b @ AES block
0 - round
12
6327 aese
$ctr7b, $rk12 \n aesmc
$ctr7b, $ctr7b @ AES block
7 - round
12
6329 aese
$ctr5b, $rk13 @ AES block
5 - round
13
6330 aese
$ctr1b, $rk13 @ AES block
1 - round
13
6331 aese
$ctr2b, $rk13 @ AES block
2 - round
13
6333 aese
$ctr0b, $rk13 @ AES block
0 - round
13
6334 aese
$ctr4b, $rk13 @ AES block
4 - round
13
6335 aese
$ctr6b, $rk13 @ AES block
6 - round
13
6337 aese
$ctr3b, $rk13 @ AES block
3 - round
13
6338 aese
$ctr7b, $rk13 @ AES block
7 - round
13
6339 b
.ge .L256_dec_tail @ handle tail
6341 ldp
$res0q, $res1q, [$input_ptr], #32 @ AES block 0, 1 - load ciphertext
6343 ldp
$res2q, $res3q, [$input_ptr], #32 @ AES block 2, 3 - load ciphertext
6345 ldp
$res4q, $res5q, [$input_ptr], #32 @ AES block 4, 5 - load ciphertext
6347 ldp
$res6q, $res7q, [$input_ptr], #32 @ AES block 6, 7 - load ciphertext
6348 cmp $input_ptr, $main_end_input_ptr @ check
if we have
<= 8 blocks
6350 eor3
$ctr1b, $res1b, $ctr1b, $rk14 @ AES block
1 - result
6351 eor3
$ctr0b, $res0b, $ctr0b, $rk14 @ AES block
0 - result
6352 stp
$ctr0q, $ctr1q, [$output_ptr], #32 @ AES block 0, 1 - store result
6354 rev32
$ctr0.16b
, $rtmp_ctr.16b @ CTR block
8
6355 add
$rtmp_ctr.4s
, $rtmp_ctr.4s
, $rctr_inc.4s @ CTR block
8
6356 eor3
$ctr3b, $res3b, $ctr3b, $rk14 @ AES block
3 - result
6358 eor3
$ctr5b, $res5b, $ctr5b, $rk14 @ AES block
5 - result
6360 eor3
$ctr4b, $res4b, $ctr4b, $rk14 @ AES block
4 - result
6361 rev32
$ctr1.16b
, $rtmp_ctr.16b @ CTR block
9
6362 add
$rtmp_ctr.4s
, $rtmp_ctr.4s
, $rctr_inc.4s @ CTR block
9
6364 eor3
$ctr2b, $res2b, $ctr2b, $rk14 @ AES block
2 - result
6365 stp
$ctr2q, $ctr3q, [$output_ptr], #32 @ AES block 2, 3 - store result
6367 rev32
$ctr2.16b
, $rtmp_ctr.16b @ CTR block
10
6368 add
$rtmp_ctr.4s
, $rtmp_ctr.4s
, $rctr_inc.4s @ CTR block
10
6370 eor3
$ctr6b, $res6b, $ctr6b, $rk14 @ AES block
6 - result
6372 rev32
$ctr3.16b
, $rtmp_ctr.16b @ CTR block
11
6373 add
$rtmp_ctr.4s
, $rtmp_ctr.4s
, $rctr_inc.4s @ CTR block
11
6374 stp
$ctr4q, $ctr5q, [$output_ptr], #32 @ AES block 4, 5 - store result
6376 eor3
$ctr7b, $res7b, $ctr7b, $rk14 @ AES block
7 - result
6377 stp
$ctr6q, $ctr7q, [$output_ptr], #32 @ AES block 6, 7 - store result
6379 rev32
$ctr4.16b
, $rtmp_ctr.16b @ CTR block
12
6380 add
$rtmp_ctr.4s
, $rtmp_ctr.4s
, $rctr_inc.4s @ CTR block
12
6381 b
.ge .L256_dec_prepretail @
do prepretail
6383 .L256_dec_main_loop
: @ main
loop start
6384 rev32
$ctr5.16b
, $rtmp_ctr.16b @ CTR block
8k
+13
6385 ldp
$rk0q, $rk1q, [$cc, #0] @ load rk0, rk1
6386 add
$rtmp_ctr.4s
, $rtmp_ctr.4s
, $rctr_inc.4s @ CTR block
8k
+13
6388 rev64
$res1b, $res1b @ GHASH block
8k
+1
6389 ldr
$h7q, [$current_tag, #176] @ load h7l | h7h
6390 ext
$h7.16b
, $h7.16b
, $h7.16b
, #8
6391 ldr
$h8q, [$current_tag, #208] @ load h8l | h8h
6392 ext
$h8.16b
, $h8.16b
, $h8.16b
, #8
6394 rev32
$ctr6.16b
, $rtmp_ctr.16b @ CTR block
8k
+14
6395 add
$rtmp_ctr.4s
, $rtmp_ctr.4s
, $rctr_inc.4s @ CTR block
8k
+14
6396 rev64
$res0b, $res0b @ GHASH block
8k
6398 ext
$acc_lb, $acc_lb, $acc_lb, #8 @ PRE 0
6399 rev64
$res4b, $res4b @ GHASH block
8k
+4
6400 rev64
$res3b, $res3b @ GHASH block
8k
+3
6402 rev32
$ctr7.16b
, $rtmp_ctr.16b @ CTR block
8k
+15
6403 rev64
$res7b, $res7b @ GHASH block
8k
+7
6405 aese
$ctr3b, $rk0 \n aesmc
$ctr3b, $ctr3b @ AES block
8k
+11 - round
0
6406 aese
$ctr6b, $rk0 \n aesmc
$ctr6b, $ctr6b @ AES block
8k
+14 - round
0
6407 aese
$ctr2b, $rk0 \n aesmc
$ctr2b, $ctr2b @ AES block
8k
+10 - round
0
6409 aese
$ctr7b, $rk0 \n aesmc
$ctr7b, $ctr7b @ AES block
8k
+15 - round
0
6410 aese
$ctr0b, $rk0 \n aesmc
$ctr0b, $ctr0b @ AES block
8k
+8 - round
0
6411 aese
$ctr5b, $rk0 \n aesmc
$ctr5b, $ctr5b @ AES block
8k
+13 - round
0
6413 aese
$ctr4b, $rk0 \n aesmc
$ctr4b, $ctr4b @ AES block
8k
+12 - round
0
6414 aese
$ctr1b, $rk0 \n aesmc
$ctr1b, $ctr1b @ AES block
8k
+9 - round
0
6415 ldp
$rk2q, $rk3q, [$cc, #32] @ load rk2, rk3
6417 eor
$res0b, $res0b, $acc_lb @ PRE
1
6418 ldr
$h5q, [$current_tag, #128] @ load h5l | h5h
6419 ext
$h5.16b
, $h5.16b
, $h5.16b
, #8
6420 ldr
$h6q, [$current_tag, #160] @ load h6l | h6h
6421 ext
$h6.16b
, $h6.16b
, $h6.16b
, #8
6422 aese
$ctr6b, $rk1 \n aesmc
$ctr6b, $ctr6b @ AES block
8k
+14 - round
1
6424 aese
$ctr4b, $rk1 \n aesmc
$ctr4b, $ctr4b @ AES block
8k
+12 - round
1
6425 rev64
$res2b, $res2b @ GHASH block
8k
+2
6426 aese
$ctr3b, $rk1 \n aesmc
$ctr3b, $ctr3b @ AES block
8k
+11 - round
1
6428 aese
$ctr0b, $rk1 \n aesmc
$ctr0b, $ctr0b @ AES block
8k
+8 - round
1
6429 aese
$ctr5b, $rk1 \n aesmc
$ctr5b, $ctr5b @ AES block
8k
+13 - round
1
6430 aese
$ctr2b, $rk1 \n aesmc
$ctr2b, $ctr2b @ AES block
8k
+10 - round
1
6432 trn1
$acc_m.2d
, $res1.2d
, $res0.2d @ GHASH block
8k
, 8k
+1 - mid
6433 aese
$ctr7b, $rk1 \n aesmc
$ctr7b, $ctr7b @ AES block
8k
+15 - round
1
6434 aese
$ctr1b, $rk1 \n aesmc
$ctr1b, $ctr1b @ AES block
8k
+9 - round
1
6436 aese
$ctr4b, $rk2 \n aesmc
$ctr4b, $ctr4b @ AES block
8k
+12 - round
2
6437 aese
$ctr0b, $rk2 \n aesmc
$ctr0b, $ctr0b @ AES block
8k
+8 - round
2
6438 aese
$ctr3b, $rk2 \n aesmc
$ctr3b, $ctr3b @ AES block
8k
+11 - round
2
6440 aese
$ctr6b, $rk2 \n aesmc
$ctr6b, $ctr6b @ AES block
8k
+14 - round
2
6441 aese
$ctr7b, $rk2 \n aesmc
$ctr7b, $ctr7b @ AES block
8k
+15 - round
2
6442 pmull
$acc_l.1q
, $res0.1d
, $h8.1d @ GHASH block
8k
- low
6444 aese
$ctr5b, $rk2 \n aesmc
$ctr5b, $ctr5b @ AES block
8k
+13 - round
2
6445 aese
$ctr2b, $rk2 \n aesmc
$ctr2b, $ctr2b @ AES block
8k
+10 - round
2
6446 aese
$ctr1b, $rk2 \n aesmc
$ctr1b, $ctr1b @ AES block
8k
+9 - round
2
6448 ldp
$rk4q, $rk5q, [$cc, #64] @ load rk4, rk5
6449 pmull2
$t1.1q
, $res2.2d
, $h6.2d @ GHASH block
8k
+2 - high
6450 aese
$ctr3b, $rk3 \n aesmc
$ctr3b, $ctr3b @ AES block
8k
+11 - round
3
6452 aese
$ctr0b, $rk3 \n aesmc
$ctr0b, $ctr0b @ AES block
8k
+8 - round
3
6453 pmull2
$t0.1q
, $res1.2d
, $h7.2d @ GHASH block
8k
+1 - high
6454 pmull
$h7.1q
, $res1.1d
, $h7.1d @ GHASH block
8k
+1 - low
6456 aese
$ctr5b, $rk3 \n aesmc
$ctr5b, $ctr5b @ AES block
8k
+13 - round
3
6457 aese
$ctr6b, $rk3 \n aesmc
$ctr6b, $ctr6b @ AES block
8k
+14 - round
3
6458 pmull2
$acc_h.1q
, $res0.2d
, $h8.2d @ GHASH block
8k
- high
6460 aese
$ctr4b, $rk3 \n aesmc
$ctr4b, $ctr4b @ AES block
8k
+12 - round
3
6461 aese
$ctr1b, $rk3 \n aesmc
$ctr1b, $ctr1b @ AES block
8k
+9 - round
3
6462 trn2
$res0.2d
, $res1.2d
, $res0.2d @ GHASH block
8k
, 8k
+1 - mid
6464 pmull2
$t2.1q
, $res3.2d
, $h5.2d @ GHASH block
8k
+3 - high
6465 aese
$ctr2b, $rk3 \n aesmc
$ctr2b, $ctr2b @ AES block
8k
+10 - round
3
6466 eor
$acc_hb, $acc_hb, $t0.16b @ GHASH block
8k
+1 - high
6468 aese
$ctr5b, $rk4 \n aesmc
$ctr5b, $ctr5b @ AES block
8k
+13 - round
4
6469 aese
$ctr7b, $rk3 \n aesmc
$ctr7b, $ctr7b @ AES block
8k
+15 - round
3
6470 aese
$ctr3b, $rk4 \n aesmc
$ctr3b, $ctr3b @ AES block
8k
+11 - round
4
6472 aese
$ctr2b, $rk4 \n aesmc
$ctr2b, $ctr2b @ AES block
8k
+10 - round
4
6473 aese
$ctr0b, $rk4 \n aesmc
$ctr0b, $ctr0b @ AES block
8k
+8 - round
4
6474 aese
$ctr1b, $rk4 \n aesmc
$ctr1b, $ctr1b @ AES block
8k
+9 - round
4
6476 aese
$ctr6b, $rk4 \n aesmc
$ctr6b, $ctr6b @ AES block
8k
+14 - round
4
6477 aese
$ctr7b, $rk4 \n aesmc
$ctr7b, $ctr7b @ AES block
8k
+15 - round
4
6478 aese
$ctr4b, $rk4 \n aesmc
$ctr4b, $ctr4b @ AES block
8k
+12 - round
4
6480 ldr
$h56kq, [$current_tag, #144] @ load h6k | h5k
6481 ldr
$h78kq, [$current_tag, #192] @ load h8k | h7k
6482 eor
$res0.16b
, $res0.16b
, $acc_m.16b @ GHASH block
8k
, 8k
+1 - mid
6483 pmull
$h6.1q
, $res2.1d
, $h6.1d @ GHASH block
8k
+2 - low
6485 ldp
$rk6q, $rk7q, [$cc, #96] @ load rk6, rk7
6486 aese
$ctr5b, $rk5 \n aesmc
$ctr5b, $ctr5b @ AES block
8k
+13 - round
5
6487 eor
$acc_lb, $acc_lb, $h7.16b @ GHASH block
8k
+1 - low
6489 aese
$ctr0b, $rk5 \n aesmc
$ctr0b, $ctr0b @ AES block
8k
+8 - round
5
6490 aese
$ctr3b, $rk5 \n aesmc
$ctr3b, $ctr3b @ AES block
8k
+11 - round
5
6491 aese
$ctr7b, $rk5 \n aesmc
$ctr7b, $ctr7b @ AES block
8k
+15 - round
5
6493 aese
$ctr1b, $rk5 \n aesmc
$ctr1b, $ctr1b @ AES block
8k
+9 - round
5
6494 aese
$ctr2b, $rk5 \n aesmc
$ctr2b, $ctr2b @ AES block
8k
+10 - round
5
6495 aese
$ctr6b, $rk5 \n aesmc
$ctr6b, $ctr6b @ AES block
8k
+14 - round
5
6497 eor3
$acc_hb, $acc_hb, $t1.16b
, $t2.16b @ GHASH block
8k
+2, 8k
+3 - high
6498 trn1
$t3.2d
, $res3.2d
, $res2.2d @ GHASH block
8k
+2, 8k
+3 - mid
6499 rev64
$res5b, $res5b @ GHASH block
8k
+5
6501 pmull2
$acc_m.1q
, $res0.2d
, $h78k.2d @ GHASH block
8k
- mid
6502 pmull
$h78k.1q
, $res0.1d
, $h78k.1d @ GHASH block
8k
+1 - mid
6503 trn2
$res2.2d
, $res3.2d
, $res2.2d @ GHASH block
8k
+2, 8k
+3 - mid
6505 aese
$ctr3b, $rk6 \n aesmc
$ctr3b, $ctr3b @ AES block
8k
+11 - round
6
6506 aese
$ctr0b, $rk6 \n aesmc
$ctr0b, $ctr0b @ AES block
8k
+8 - round
6
6507 aese
$ctr4b, $rk5 \n aesmc
$ctr4b, $ctr4b @ AES block
8k
+12 - round
5
6509 trn1
$t6.2d
, $res5.2d
, $res4.2d @ GHASH block
8k
+4, 8k
+5 - mid
6510 aese
$ctr1b, $rk6 \n aesmc
$ctr1b, $ctr1b @ AES block
8k
+9 - round
6
6511 aese
$ctr6b, $rk6 \n aesmc
$ctr6b, $ctr6b @ AES block
8k
+14 - round
6
6513 eor
$res2.16b
, $res2.16b
, $t3.16b @ GHASH block
8k
+2, 8k
+3 - mid
6514 pmull
$h5.1q
, $res3.1d
, $h5.1d @ GHASH block
8k
+3 - low
6515 aese
$ctr4b, $rk6 \n aesmc
$ctr4b, $ctr4b @ AES block
8k
+12 - round
6
6517 aese
$ctr2b, $rk6 \n aesmc
$ctr2b, $ctr2b @ AES block
8k
+10 - round
6
6518 aese
$ctr5b, $rk6 \n aesmc
$ctr5b, $ctr5b @ AES block
8k
+13 - round
6
6519 aese
$ctr7b, $rk6 \n aesmc
$ctr7b, $ctr7b @ AES block
8k
+15 - round
6
6521 pmull2
$t3.1q
, $res2.2d
, $h56k.2d @ GHASH block
8k
+2 - mid
6522 pmull
$h56k.1q
, $res2.1d
, $h56k.1d @ GHASH block
8k
+3 - mid
6523 eor3
$acc_lb, $acc_lb, $h6.16b
, $h5.16b @ GHASH block
8k
+2, 8k
+3 - low
6525 ldr
$h3q, [$current_tag, #80] @ load h3l | h3h
6526 ext
$h3.16b
, $h3.16b
, $h3.16b
, #8
6527 ldr
$h4q, [$current_tag, #112] @ load h4l | h4h
6528 ext
$h4.16b
, $h4.16b
, $h4.16b
, #8
6529 rev64
$res6b, $res6b @ GHASH block
8k
+6
6530 eor
$acc_mb, $acc_mb, $h78k.16b @ GHASH block
8k
+1 - mid
6532 aese
$ctr2b, $rk7 \n aesmc
$ctr2b, $ctr2b @ AES block
8k
+10 - round
7
6533 aese
$ctr5b, $rk7 \n aesmc
$ctr5b, $ctr5b @ AES block
8k
+13 - round
7
6534 ldp
$rk8q, $rk9q, [$cc, #128] @ load rk8, rk9
6536 ldr
$h1q, [$current_tag, #32] @ load h1l | h1h
6537 ext
$h1.16b
, $h1.16b
, $h1.16b
, #8
6538 ldr
$h2q, [$current_tag, #64] @ load h1l | h1h
6539 ext
$h2.16b
, $h2.16b
, $h2.16b
, #8
6540 eor3
$acc_mb, $acc_mb, $h56k.16b
, $t3.16b @ GHASH block
8k
+2, 8k
+3 - mid
6541 aese
$ctr7b, $rk7 \n aesmc
$ctr7b, $ctr7b @ AES block
8k
+15 - round
7
6543 aese
$ctr1b, $rk7 \n aesmc
$ctr1b, $ctr1b @ AES block
8k
+9 - round
7
6544 aese
$ctr3b, $rk7 \n aesmc
$ctr3b, $ctr3b @ AES block
8k
+11 - round
7
6545 aese
$ctr6b, $rk7 \n aesmc
$ctr6b, $ctr6b @ AES block
8k
+14 - round
7
6547 ldr
$h12kq, [$current_tag, #48] @ load h2k | h1k
6548 ldr
$h34kq, [$current_tag, #96] @ load h4k | h3k
6549 aese
$ctr0b, $rk7 \n aesmc
$ctr0b, $ctr0b @ AES block
8k
+8 - round
7
6550 aese
$ctr4b, $rk7 \n aesmc
$ctr4b, $ctr4b @ AES block
8k
+12 - round
7
6552 pmull2
$t4.1q
, $res4.2d
, $h4.2d @ GHASH block
8k
+4 - high
6553 pmull
$h4.1q
, $res4.1d
, $h4.1d @ GHASH block
8k
+4 - low
6554 trn2
$res4.2d
, $res5.2d
, $res4.2d @ GHASH block
8k
+4, 8k
+5 - mid
6556 aese
$ctr5b, $rk8 \n aesmc
$ctr5b, $ctr5b @ AES block
8k
+13 - round
8
6557 pmull2
$t5.1q
, $res5.2d
, $h3.2d @ GHASH block
8k
+5 - high
6558 aese
$ctr2b, $rk8 \n aesmc
$ctr2b, $ctr2b @ AES block
8k
+10 - round
8
6560 aese
$ctr6b, $rk8 \n aesmc
$ctr6b, $ctr6b @ AES block
8k
+14 - round
8
6561 pmull
$h3.1q
, $res5.1d
, $h3.1d @ GHASH block
8k
+5 - low
6562 aese
$ctr1b, $rk8 \n aesmc
$ctr1b, $ctr1b @ AES block
8k
+9 - round
8
6564 aese
$ctr4b, $rk8 \n aesmc
$ctr4b, $ctr4b @ AES block
8k
+12 - round
8
6565 aese
$ctr0b, $rk8 \n aesmc
$ctr0b, $ctr0b @ AES block
8k
+8 - round
8
6566 pmull2
$t7.1q
, $res6.2d
, $h2.2d @ GHASH block
8k
+6 - high
6568 trn1
$t9.2d
, $res7.2d
, $res6.2d @ GHASH block
8k
+6, 8k
+7 - mid
6569 aese
$ctr3b, $rk8 \n aesmc
$ctr3b, $ctr3b @ AES block
8k
+11 - round
8
6570 aese
$ctr7b, $rk8 \n aesmc
$ctr7b, $ctr7b @ AES block
8k
+15 - round
8
6572 ldp
$rk10q, $rk11q, [$cc, #160] @ load rk10, rk11
6573 pmull
$h2.1q
, $res6.1d
, $h2.1d @ GHASH block
8k
+6 - low
6574 trn2
$res6.2d
, $res7.2d
, $res6.2d @ GHASH block
8k
+6, 8k
+7 - mid
6576 add
$rtmp_ctr.4s
, $rtmp_ctr.4s
, $rctr_inc.4s @ CTR block
8k
+15
6577 eor3
$acc_hb, $acc_hb, $t4.16b
, $t5.16b @ GHASH block
8k
+4, 8k
+5 - high
6578 aese
$ctr3b, $rk9 \n aesmc
$ctr3b, $ctr3b @ AES block
8k
+11 - round
9
6580 aese
$ctr6b, $rk9 \n aesmc
$ctr6b, $ctr6b @ AES block
8k
+14 - round
9
6581 eor
$res6.16b
, $res6.16b
, $t9.16b @ GHASH block
8k
+6, 8k
+7 - mid
6582 aese
$ctr5b, $rk9 \n aesmc
$ctr5b, $ctr5b @ AES block
8k
+13 - round
9
6584 ldp
$res0q, $res1q, [$input_ptr], #32 @ AES block 8k+8, 8k+9 - load ciphertext
6585 eor
$res4.16b
, $res4.16b
, $t6.16b @ GHASH block
8k
+4, 8k
+5 - mid
6586 aese
$ctr7b, $rk9 \n aesmc
$ctr7b, $ctr7b @ AES block
8k
+15 - round
9
6588 pmull2
$t9.1q
, $res6.2d
, $h12k.2d @ GHASH block
8k
+6 - mid
6589 aese
$ctr2b, $rk9 \n aesmc
$ctr2b, $ctr2b @ AES block
8k
+10 - round
9
6590 aese
$ctr1b, $rk9 \n aesmc
$ctr1b, $ctr1b @ AES block
8k
+9 - round
9
6592 pmull2
$t6.1q
, $res4.2d
, $h34k.2d @ GHASH block
8k
+4 - mid
6593 pmull
$h34k.1q
, $res4.1d
, $h34k.1d @ GHASH block
8k
+5 - mid
6594 pmull2
$t8.1q
, $res7.2d
, $h1.2d @ GHASH block
8k
+7 - high
6596 pmull
$h1.1q
, $res7.1d
, $h1.1d @ GHASH block
8k
+7 - low
6597 aese
$ctr3b, $rk10 \n aesmc
$ctr3b, $ctr3b @ AES block
8k
+11 - round
10
6598 aese
$ctr6b, $rk10 \n aesmc
$ctr6b, $ctr6b @ AES block
8k
+14 - round
10
6600 pmull
$h12k.1q
, $res6.1d
, $h12k.1d @ GHASH block
8k
+7 - mid
6601 aese
$ctr0b, $rk9 \n aesmc
$ctr0b, $ctr0b @ AES block
8k
+8 - round
9
6602 eor3
$acc_lb, $acc_lb, $h4.16b
, $h3.16b @ GHASH block
8k
+4, 8k
+5 - low
6604 aese
$ctr4b, $rk9 \n aesmc
$ctr4b, $ctr4b @ AES block
8k
+12 - round
9
6605 eor3
$acc_mb, $acc_mb, $h34k.16b
, $t6.16b @ GHASH block
8k
+4, 8k
+5 - mid
6606 eor3
$acc_hb, $acc_hb, $t7.16b
, $t8.16b @ GHASH block
8k
+6, 8k
+7 - high
6608 aese
$ctr2b, $rk10 \n aesmc
$ctr2b, $ctr2b @ AES block
8k
+10 - round
10
6609 aese
$ctr5b, $rk10 \n aesmc
$ctr5b, $ctr5b @ AES block
8k
+13 - round
10
6610 aese
$ctr7b, $rk10 \n aesmc
$ctr7b, $ctr7b @ AES block
8k
+15 - round
10
6612 aese
$ctr1b, $rk10 \n aesmc
$ctr1b, $ctr1b @ AES block
8k
+9 - round
10
6613 aese
$ctr0b, $rk10 \n aesmc
$ctr0b, $ctr0b @ AES block
8k
+8 - round
10
6614 aese
$ctr4b, $rk10 \n aesmc
$ctr4b, $ctr4b @ AES block
8k
+12 - round
10
6616 eor3
$acc_lb, $acc_lb, $h2.16b
, $h1.16b @ GHASH block
8k
+6, 8k
+7 - low
6617 rev32
$h1.16b
, $rtmp_ctr.16b @ CTR block
8k
+16
6618 ldr
$mod_constantd, [$modulo_constant] @ MODULO
- load modulo constant
6620 add
$rtmp_ctr.4s
, $rtmp_ctr.4s
, $rctr_inc.4s @ CTR block
8k
+16
6621 aese
$ctr1b, $rk11 \n aesmc
$ctr1b, $ctr1b @ AES block
8k
+9 - round
11
6622 ldp
$rk12q, $rk13q, [$cc, #192] @ load rk12, rk13
6624 aese
$ctr0b, $rk11 \n aesmc
$ctr0b, $ctr0b @ AES block
8k
+8 - round
11
6625 aese
$ctr6b, $rk11 \n aesmc
$ctr6b, $ctr6b @ AES block
8k
+14 - round
11
6627 eor3
$acc_mb, $acc_mb, $h12k.16b
, $t9.16b @ GHASH block
8k
+6, 8k
+7 - mid
6628 rev32
$h2.16b
, $rtmp_ctr.16b @ CTR block
8k
+17
6629 aese
$ctr2b, $rk11 \n aesmc
$ctr2b, $ctr2b @ AES block
8k
+10 - round
11
6631 ldp
$res2q, $res3q, [$input_ptr], #32 @ AES block 8k+10, 8k+11 - load ciphertext
6632 aese
$ctr7b, $rk11 \n aesmc
$ctr7b, $ctr7b @ AES block
8k
+15 - round
11
6633 ext
$t11.16b
, $acc_hb, $acc_hb, #8 @ MODULO - other top alignment
6635 aese
$ctr5b, $rk11 \n aesmc
$ctr5b, $ctr5b @ AES block
8k
+13 - round
11
6636 add
$rtmp_ctr.4s
, $rtmp_ctr.4s
, $rctr_inc.4s @ CTR block
8k
+17
6637 aese
$ctr3b, $rk11 \n aesmc
$ctr3b, $ctr3b @ AES block
8k
+11 - round
11
6639 aese
$ctr2b, $rk12 \n aesmc
$ctr2b, $ctr2b @ AES block
8k
+10 - round
12
6640 aese
$ctr7b, $rk12 \n aesmc
$ctr7b, $ctr7b @ AES block
8k
+15 - round
12
6641 aese
$ctr6b, $rk12 \n aesmc
$ctr6b, $ctr6b @ AES block
8k
+14 - round
12
6643 rev32
$h3.16b
, $rtmp_ctr.16b @ CTR block
8k
+18
6644 add
$rtmp_ctr.4s
, $rtmp_ctr.4s
, $rctr_inc.4s @ CTR block
8k
+18
6645 pmull
$t12.1q
, $acc_h.1d
, $mod_constant.1d @ MODULO
- top
64b align with mid
6647 eor3
$acc_mb, $acc_mb, $acc_hb, $acc_lb @ MODULO
- karatsuba tidy up
6648 aese
$ctr1b, $rk12 \n aesmc
$ctr1b, $ctr1b @ AES block
8k
+9 - round
12
6649 aese
$ctr4b, $rk11 \n aesmc
$ctr4b, $ctr4b @ AES block
8k
+12 - round
11
6651 ldr
$rk14q, [$cc, #224] @ load rk14
6652 aese
$ctr5b, $rk12 \n aesmc
$ctr5b, $ctr5b @ AES block
8k
+13 - round
12
6653 aese
$ctr3b, $rk12 \n aesmc
$ctr3b, $ctr3b @ AES block
8k
+11 - round
12
6655 eor3
$acc_mb, $acc_mb, $t12.16b
, $t11.16b @ MODULO
- fold into mid
6656 aese
$ctr0b, $rk12 \n aesmc
$ctr0b, $ctr0b @ AES block
8k
+8 - round
12
6657 aese
$ctr4b, $rk12 \n aesmc
$ctr4b, $ctr4b @ AES block
8k
+12 - round
12
6659 ldp
$res4q, $res5q, [$input_ptr], #32 @ AES block 8k+12, 8k+13 - load ciphertext
6660 aese
$ctr1b, $rk13 @ AES block
8k
+9 - round
13
6661 aese
$ctr2b, $rk13 @ AES block
8k
+10 - round
13
6663 ldp
$res6q, $res7q, [$input_ptr], #32 @ AES block 8k+14, 8k+15 - load ciphertext
6664 aese
$ctr0b, $rk13 @ AES block
8k
+8 - round
13
6665 aese
$ctr5b, $rk13 @ AES block
8k
+13 - round
13
6667 rev32
$h4.16b
, $rtmp_ctr.16b @ CTR block
8k
+19
6668 eor3
$ctr2b, $res2b, $ctr2b, $rk14 @ AES block
8k
+10 - result
6669 eor3
$ctr1b, $res1b, $ctr1b, $rk14 @ AES block
8k
+9 - result
6671 ext
$t11.16b
, $acc_mb, $acc_mb, #8 @ MODULO - other mid alignment
6672 aese
$ctr7b, $rk13 @ AES block
8k
+15 - round
13
6674 add
$rtmp_ctr.4s
, $rtmp_ctr.4s
, $rctr_inc.4s @ CTR block
8k
+19
6675 pmull
$acc_h.1q
, $acc_m.1d
, $mod_constant.1d @ MODULO
- mid
64b align with low
6676 aese
$ctr4b, $rk13 @ AES block
8k
+12 - round
13
6678 eor3
$ctr5b, $res5b, $ctr5b, $rk14 @ AES block
8k
+13 - result
6679 eor3
$ctr0b, $res0b, $ctr0b, $rk14 @ AES block
8k
+8 - result
6680 aese
$ctr3b, $rk13 @ AES block
8k
+11 - round
13
6682 stp
$ctr0q, $ctr1q, [$output_ptr], #32 @ AES block 8k+8, 8k+9 - store result
6683 mov
$ctr0.16b
, $h1.16b @ CTR block
8k
+16
6684 eor3
$ctr4b, $res4b, $ctr4b, $rk14 @ AES block
8k
+12 - result
6686 eor3
$acc_lb, $acc_lb, $t11.16b
, $acc_hb @ MODULO
- fold into low
6687 eor3
$ctr3b, $res3b, $ctr3b, $rk14 @ AES block
8k
+11 - result
6688 stp
$ctr2q, $ctr3q, [$output_ptr], #32 @ AES block 8k+10, 8k+11 - store result
6690 mov
$ctr3.16b
, $h4.16b @ CTR block
8k
+19
6691 mov
$ctr2.16b
, $h3.16b @ CTR block
8k
+18
6692 aese
$ctr6b, $rk13 @ AES block
8k
+14 - round
13
6694 mov
$ctr1.16b
, $h2.16b @ CTR block
8k
+17
6695 stp
$ctr4q, $ctr5q, [$output_ptr], #32 @ AES block 8k+12, 8k+13 - store result
6696 eor3
$ctr7b, $res7b, $ctr7b, $rk14 @ AES block
8k
+15 - result
6698 eor3
$ctr6b, $res6b, $ctr6b, $rk14 @ AES block
8k
+14 - result
6699 rev32
$ctr4.16b
, $rtmp_ctr.16b @ CTR block
8k
+20
6700 add
$rtmp_ctr.4s
, $rtmp_ctr.4s
, $rctr_inc.4s @ CTR block
8k
+20
6702 cmp $input_ptr, $main_end_input_ptr @ LOOP CONTROL
6703 stp
$ctr6q, $ctr7q, [$output_ptr], #32 @ AES block 8k+14, 8k+15 - store result
6704 b
.lt .L256_dec_main_loop
6706 .L256_dec_prepretail
: @ PREPRETAIL
6707 ldp
$rk0q, $rk1q, [$cc, #0] @ load rk0, rk1
6708 rev32
$ctr5.16b
, $rtmp_ctr.16b @ CTR block
8k
+13
6709 add
$rtmp_ctr.4s
, $rtmp_ctr.4s
, $rctr_inc.4s @ CTR block
8k
+13
6711 rev64
$res4b, $res4b @ GHASH block
8k
+4
6712 ldr
$h56kq, [$current_tag, #144] @ load h6k | h5k
6713 ldr
$h78kq, [$current_tag, #192] @ load h8k | h7k
6715 rev32
$ctr6.16b
, $rtmp_ctr.16b @ CTR block
8k
+14
6716 rev64
$res0b, $res0b @ GHASH block
8k
6717 add
$rtmp_ctr.4s
, $rtmp_ctr.4s
, $rctr_inc.4s @ CTR block
8k
+14
6719 ext
$acc_lb, $acc_lb, $acc_lb, #8 @ PRE 0
6720 ldr
$h7q, [$current_tag, #176] @ load h7l | h7h
6721 ext
$h7.16b
, $h7.16b
, $h7.16b
, #8
6722 ldr
$h8q, [$current_tag, #208] @ load h8l | h8h
6723 ext
$h8.16b
, $h8.16b
, $h8.16b
, #8
6724 rev64
$res1b, $res1b @ GHASH block
8k
+1
6726 rev32
$ctr7.16b
, $rtmp_ctr.16b @ CTR block
8k
+15
6727 rev64
$res2b, $res2b @ GHASH block
8k
+2
6728 ldr
$h5q, [$current_tag, #128] @ load h5l | h5h
6729 ext
$h5.16b
, $h5.16b
, $h5.16b
, #8
6730 ldr
$h6q, [$current_tag, #160] @ load h6l | h6h
6731 ext
$h6.16b
, $h6.16b
, $h6.16b
, #8
6733 aese
$ctr0b, $rk0 \n aesmc
$ctr0b, $ctr0b @ AES block
8k
+8 - round
0
6734 aese
$ctr1b, $rk0 \n aesmc
$ctr1b, $ctr1b @ AES block
8k
+9 - round
0
6735 aese
$ctr4b, $rk0 \n aesmc
$ctr4b, $ctr4b @ AES block
8k
+12 - round
0
6737 aese
$ctr3b, $rk0 \n aesmc
$ctr3b, $ctr3b @ AES block
8k
+11 - round
0
6738 aese
$ctr5b, $rk0 \n aesmc
$ctr5b, $ctr5b @ AES block
8k
+13 - round
0
6739 aese
$ctr6b, $rk0 \n aesmc
$ctr6b, $ctr6b @ AES block
8k
+14 - round
0
6741 aese
$ctr4b, $rk1 \n aesmc
$ctr4b, $ctr4b @ AES block
8k
+12 - round
1
6742 aese
$ctr7b, $rk0 \n aesmc
$ctr7b, $ctr7b @ AES block
8k
+15 - round
0
6743 aese
$ctr2b, $rk0 \n aesmc
$ctr2b, $ctr2b @ AES block
8k
+10 - round
0
6745 ldp
$rk2q, $rk3q, [$cc, #32] @ load rk2, rk3
6746 aese
$ctr0b, $rk1 \n aesmc
$ctr0b, $ctr0b @ AES block
8k
+8 - round
1
6747 eor
$res0b, $res0b, $acc_lb @ PRE
1
6749 aese
$ctr7b, $rk1 \n aesmc
$ctr7b, $ctr7b @ AES block
8k
+15 - round
1
6750 aese
$ctr6b, $rk1 \n aesmc
$ctr6b, $ctr6b @ AES block
8k
+14 - round
1
6751 aese
$ctr2b, $rk1 \n aesmc
$ctr2b, $ctr2b @ AES block
8k
+10 - round
1
6753 aese
$ctr3b, $rk1 \n aesmc
$ctr3b, $ctr3b @ AES block
8k
+11 - round
1
6754 aese
$ctr1b, $rk1 \n aesmc
$ctr1b, $ctr1b @ AES block
8k
+9 - round
1
6755 aese
$ctr5b, $rk1 \n aesmc
$ctr5b, $ctr5b @ AES block
8k
+13 - round
1
6757 pmull2
$t0.1q
, $res1.2d
, $h7.2d @ GHASH block
8k
+1 - high
6758 trn1
$acc_m.2d
, $res1.2d
, $res0.2d @ GHASH block
8k
, 8k
+1 - mid
6759 pmull
$acc_l.1q
, $res0.1d
, $h8.1d @ GHASH block
8k
- low
6761 rev64
$res3b, $res3b @ GHASH block
8k
+3
6762 pmull
$h7.1q
, $res1.1d
, $h7.1d @ GHASH block
8k
+1 - low
6764 aese
$ctr5b, $rk2 \n aesmc
$ctr5b, $ctr5b @ AES block
8k
+13 - round
2
6765 aese
$ctr7b, $rk2 \n aesmc
$ctr7b, $ctr7b @ AES block
8k
+15 - round
2
6766 aese
$ctr1b, $rk2 \n aesmc
$ctr1b, $ctr1b @ AES block
8k
+9 - round
2
6768 aese
$ctr3b, $rk2 \n aesmc
$ctr3b, $ctr3b @ AES block
8k
+11 - round
2
6769 aese
$ctr6b, $rk2 \n aesmc
$ctr6b, $ctr6b @ AES block
8k
+14 - round
2
6770 pmull2
$acc_h.1q
, $res0.2d
, $h8.2d @ GHASH block
8k
- high
6772 aese
$ctr0b, $rk2 \n aesmc
$ctr0b, $ctr0b @ AES block
8k
+8 - round
2
6773 aese
$ctr7b, $rk3 \n aesmc
$ctr7b, $ctr7b @ AES block
8k
+15 - round
3
6775 aese
$ctr5b, $rk3 \n aesmc
$ctr5b, $ctr5b @ AES block
8k
+13 - round
3
6776 rev64
$res6b, $res6b @ GHASH block
8k
+6
6778 aese
$ctr0b, $rk3 \n aesmc
$ctr0b, $ctr0b @ AES block
8k
+8 - round
3
6779 aese
$ctr2b, $rk2 \n aesmc
$ctr2b, $ctr2b @ AES block
8k
+10 - round
2
6780 aese
$ctr6b, $rk3 \n aesmc
$ctr6b, $ctr6b @ AES block
8k
+14 - round
3
6782 pmull2
$t1.1q
, $res2.2d
, $h6.2d @ GHASH block
8k
+2 - high
6783 trn2
$res0.2d
, $res1.2d
, $res0.2d @ GHASH block
8k
, 8k
+1 - mid
6784 aese
$ctr4b, $rk2 \n aesmc
$ctr4b, $ctr4b @ AES block
8k
+12 - round
2
6786 ldp
$rk4q, $rk5q, [$cc, #64] @ load rk4, rk5
6787 aese
$ctr1b, $rk3 \n aesmc
$ctr1b, $ctr1b @ AES block
8k
+9 - round
3
6788 pmull2
$t2.1q
, $res3.2d
, $h5.2d @ GHASH block
8k
+3 - high
6790 aese
$ctr2b, $rk3 \n aesmc
$ctr2b, $ctr2b @ AES block
8k
+10 - round
3
6791 eor
$acc_hb, $acc_hb, $t0.16b @ GHASH block
8k
+1 - high
6792 eor
$res0.16b
, $res0.16b
, $acc_m.16b @ GHASH block
8k
, 8k
+1 - mid
6794 aese
$ctr4b, $rk3 \n aesmc
$ctr4b, $ctr4b @ AES block
8k
+12 - round
3
6795 pmull
$h6.1q
, $res2.1d
, $h6.1d @ GHASH block
8k
+2 - low
6796 aese
$ctr3b, $rk3 \n aesmc
$ctr3b, $ctr3b @ AES block
8k
+11 - round
3
6798 eor3
$acc_hb, $acc_hb, $t1.16b
, $t2.16b @ GHASH block
8k
+2, 8k
+3 - high
6799 trn1
$t3.2d
, $res3.2d
, $res2.2d @ GHASH block
8k
+2, 8k
+3 - mid
6800 trn2
$res2.2d
, $res3.2d
, $res2.2d @ GHASH block
8k
+2, 8k
+3 - mid
6802 pmull2
$acc_m.1q
, $res0.2d
, $h78k.2d @ GHASH block
8k
- mid
6803 pmull
$h5.1q
, $res3.1d
, $h5.1d @ GHASH block
8k
+3 - low
6804 eor
$acc_lb, $acc_lb, $h7.16b @ GHASH block
8k
+1 - low
6806 pmull
$h78k.1q
, $res0.1d
, $h78k.1d @ GHASH block
8k
+1 - mid
6807 aese
$ctr5b, $rk4 \n aesmc
$ctr5b, $ctr5b @ AES block
8k
+13 - round
4
6808 aese
$ctr0b, $rk4 \n aesmc
$ctr0b, $ctr0b @ AES block
8k
+8 - round
4
6810 eor3
$acc_lb, $acc_lb, $h6.16b
, $h5.16b @ GHASH block
8k
+2, 8k
+3 - low
6811 ldr
$h1q, [$current_tag, #32] @ load h1l | h1h
6812 ext
$h1.16b
, $h1.16b
, $h1.16b
, #8
6813 ldr
$h2q, [$current_tag, #64] @ load h1l | h1h
6814 ext
$h2.16b
, $h2.16b
, $h2.16b
, #8
6815 aese
$ctr7b, $rk4 \n aesmc
$ctr7b, $ctr7b @ AES block
8k
+15 - round
4
6817 aese
$ctr2b, $rk4 \n aesmc
$ctr2b, $ctr2b @ AES block
8k
+10 - round
4
6818 aese
$ctr6b, $rk4 \n aesmc
$ctr6b, $ctr6b @ AES block
8k
+14 - round
4
6819 eor
$acc_mb, $acc_mb, $h78k.16b @ GHASH block
8k
+1 - mid
6821 eor
$res2.16b
, $res2.16b
, $t3.16b @ GHASH block
8k
+2, 8k
+3 - mid
6822 aese
$ctr7b, $rk5 \n aesmc
$ctr7b, $ctr7b @ AES block
8k
+15 - round
5
6823 aese
$ctr1b, $rk4 \n aesmc
$ctr1b, $ctr1b @ AES block
8k
+9 - round
4
6825 aese
$ctr2b, $rk5 \n aesmc
$ctr2b, $ctr2b @ AES block
8k
+10 - round
5
6826 aese
$ctr3b, $rk4 \n aesmc
$ctr3b, $ctr3b @ AES block
8k
+11 - round
4
6827 aese
$ctr4b, $rk4 \n aesmc
$ctr4b, $ctr4b @ AES block
8k
+12 - round
4
6829 aese
$ctr1b, $rk5 \n aesmc
$ctr1b, $ctr1b @ AES block
8k
+9 - round
5
6830 pmull2
$t3.1q
, $res2.2d
, $h56k.2d @ GHASH block
8k
+2 - mid
6831 aese
$ctr6b, $rk5 \n aesmc
$ctr6b, $ctr6b @ AES block
8k
+14 - round
5
6833 aese
$ctr4b, $rk5 \n aesmc
$ctr4b, $ctr4b @ AES block
8k
+12 - round
5
6834 aese
$ctr3b, $rk5 \n aesmc
$ctr3b, $ctr3b @ AES block
8k
+11 - round
5
6835 pmull
$h56k.1q
, $res2.1d
, $h56k.1d @ GHASH block
8k
+3 - mid
6837 aese
$ctr0b, $rk5 \n aesmc
$ctr0b, $ctr0b @ AES block
8k
+8 - round
5
6838 aese
$ctr5b, $rk5 \n aesmc
$ctr5b, $ctr5b @ AES block
8k
+13 - round
5
6839 ldp
$rk6q, $rk7q, [$cc, #96] @ load rk6, rk7
6841 ldr
$h3q, [$current_tag, #80] @ load h3l | h3h
6842 ext
$h3.16b
, $h3.16b
, $h3.16b
, #8
6843 ldr
$h4q, [$current_tag, #112] @ load h4l | h4h
6844 ext
$h4.16b
, $h4.16b
, $h4.16b
, #8
6845 rev64
$res7b, $res7b @ GHASH block
8k
+7
6846 rev64
$res5b, $res5b @ GHASH block
8k
+5
6848 eor3
$acc_mb, $acc_mb, $h56k.16b
, $t3.16b @ GHASH block
8k
+2, 8k
+3 - mid
6850 trn1
$t6.2d
, $res5.2d
, $res4.2d @ GHASH block
8k
+4, 8k
+5 - mid
6852 aese
$ctr0b, $rk6 \n aesmc
$ctr0b, $ctr0b @ AES block
8k
+8 - round
6
6853 ldr
$h12kq, [$current_tag, #48] @ load h2k | h1k
6854 ldr
$h34kq, [$current_tag, #96] @ load h4k | h3k
6855 aese
$ctr6b, $rk6 \n aesmc
$ctr6b, $ctr6b @ AES block
8k
+14 - round
6
6857 aese
$ctr5b, $rk6 \n aesmc
$ctr5b, $ctr5b @ AES block
8k
+13 - round
6
6858 aese
$ctr7b, $rk6 \n aesmc
$ctr7b, $ctr7b @ AES block
8k
+15 - round
6
6860 pmull2
$t4.1q
, $res4.2d
, $h4.2d @ GHASH block
8k
+4 - high
6861 pmull2
$t5.1q
, $res5.2d
, $h3.2d @ GHASH block
8k
+5 - high
6862 pmull
$h4.1q
, $res4.1d
, $h4.1d @ GHASH block
8k
+4 - low
6864 trn2
$res4.2d
, $res5.2d
, $res4.2d @ GHASH block
8k
+4, 8k
+5 - mid
6865 pmull
$h3.1q
, $res5.1d
, $h3.1d @ GHASH block
8k
+5 - low
6866 trn1
$t9.2d
, $res7.2d
, $res6.2d @ GHASH block
8k
+6, 8k
+7 - mid
6868 aese
$ctr7b, $rk7 \n aesmc
$ctr7b, $ctr7b @ AES block
8k
+15 - round
7
6869 pmull2
$t7.1q
, $res6.2d
, $h2.2d @ GHASH block
8k
+6 - high
6870 aese
$ctr1b, $rk6 \n aesmc
$ctr1b, $ctr1b @ AES block
8k
+9 - round
6
6872 aese
$ctr2b, $rk6 \n aesmc
$ctr2b, $ctr2b @ AES block
8k
+10 - round
6
6873 aese
$ctr3b, $rk6 \n aesmc
$ctr3b, $ctr3b @ AES block
8k
+11 - round
6
6874 aese
$ctr4b, $rk6 \n aesmc
$ctr4b, $ctr4b @ AES block
8k
+12 - round
6
6876 ldp
$rk8q, $rk9q, [$cc, #128] @ load rk8, rk9
6877 pmull
$h2.1q
, $res6.1d
, $h2.1d @ GHASH block
8k
+6 - low
6878 aese
$ctr5b, $rk7 \n aesmc
$ctr5b, $ctr5b @ AES block
8k
+13 - round
7
6880 aese
$ctr1b, $rk7 \n aesmc
$ctr1b, $ctr1b @ AES block
8k
+9 - round
7
6881 aese
$ctr4b, $rk7 \n aesmc
$ctr4b, $ctr4b @ AES block
8k
+12 - round
7
6883 aese
$ctr6b, $rk7 \n aesmc
$ctr6b, $ctr6b @ AES block
8k
+14 - round
7
6884 aese
$ctr2b, $rk7 \n aesmc
$ctr2b, $ctr2b @ AES block
8k
+10 - round
7
6885 eor3
$acc_hb, $acc_hb, $t4.16b
, $t5.16b @ GHASH block
8k
+4, 8k
+5 - high
6887 aese
$ctr0b, $rk7 \n aesmc
$ctr0b, $ctr0b @ AES block
8k
+8 - round
7
6888 trn2
$res6.2d
, $res7.2d
, $res6.2d @ GHASH block
8k
+6, 8k
+7 - mid
6889 aese
$ctr3b, $rk7 \n aesmc
$ctr3b, $ctr3b @ AES block
8k
+11 - round
7
6891 aese
$ctr0b, $rk8 \n aesmc
$ctr0b, $ctr0b @ AES block
8k
+8 - round
8
6892 aese
$ctr7b, $rk8 \n aesmc
$ctr7b, $ctr7b @ AES block
8k
+15 - round
8
6893 aese
$ctr4b, $rk8 \n aesmc
$ctr4b, $ctr4b @ AES block
8k
+12 - round
8
6895 aese
$ctr1b, $rk8 \n aesmc
$ctr1b, $ctr1b @ AES block
8k
+9 - round
8
6896 aese
$ctr5b, $rk8 \n aesmc
$ctr5b, $ctr5b @ AES block
8k
+13 - round
8
6897 aese
$ctr6b, $rk8 \n aesmc
$ctr6b, $ctr6b @ AES block
8k
+14 - round
8
6899 aese
$ctr3b, $rk8 \n aesmc
$ctr3b, $ctr3b @ AES block
8k
+11 - round
8
6900 aese
$ctr4b, $rk9 \n aesmc
$ctr4b, $ctr4b @ AES block
8k
+12 - round
9
6901 eor
$res4.16b
, $res4.16b
, $t6.16b @ GHASH block
8k
+4, 8k
+5 - mid
6903 aese
$ctr0b, $rk9 \n aesmc
$ctr0b, $ctr0b @ AES block
8k
+8 - round
9
6904 aese
$ctr1b, $rk9 \n aesmc
$ctr1b, $ctr1b @ AES block
8k
+9 - round
9
6905 eor
$res6.16b
, $res6.16b
, $t9.16b @ GHASH block
8k
+6, 8k
+7 - mid
6907 aese
$ctr6b, $rk9 \n aesmc
$ctr6b, $ctr6b @ AES block
8k
+14 - round
9
6908 aese
$ctr7b, $rk9 \n aesmc
$ctr7b, $ctr7b @ AES block
8k
+15 - round
9
6909 pmull2
$t6.1q
, $res4.2d
, $h34k.2d @ GHASH block
8k
+4 - mid
6911 aese
$ctr2b, $rk8 \n aesmc
$ctr2b, $ctr2b @ AES block
8k
+10 - round
8
6912 pmull
$h34k.1q
, $res4.1d
, $h34k.1d @ GHASH block
8k
+5 - mid
6913 pmull2
$t8.1q
, $res7.2d
, $h1.2d @ GHASH block
8k
+7 - high
6915 pmull2
$t9.1q
, $res6.2d
, $h12k.2d @ GHASH block
8k
+6 - mid
6916 pmull
$h12k.1q
, $res6.1d
, $h12k.1d @ GHASH block
8k
+7 - mid
6917 pmull
$h1.1q
, $res7.1d
, $h1.1d @ GHASH block
8k
+7 - low
6919 ldp
$rk10q, $rk11q, [$cc, #160] @ load rk10, rk11
6920 eor3
$acc_lb, $acc_lb, $h4.16b
, $h3.16b @ GHASH block
8k
+4, 8k
+5 - low
6921 eor3
$acc_mb, $acc_mb, $h34k.16b
, $t6.16b @ GHASH block
8k
+4, 8k
+5 - mid
6923 aese
$ctr2b, $rk9 \n aesmc
$ctr2b, $ctr2b @ AES block
8k
+10 - round
9
6924 aese
$ctr3b, $rk9 \n aesmc
$ctr3b, $ctr3b @ AES block
8k
+11 - round
9
6925 aese
$ctr5b, $rk9 \n aesmc
$ctr5b, $ctr5b @ AES block
8k
+13 - round
9
6927 eor3
$acc_hb, $acc_hb, $t7.16b
, $t8.16b @ GHASH block
8k
+6, 8k
+7 - high
6928 eor3
$acc_lb, $acc_lb, $h2.16b
, $h1.16b @ GHASH block
8k
+6, 8k
+7 - low
6929 ldr
$mod_constantd, [$modulo_constant] @ MODULO
- load modulo constant
6931 eor3
$acc_mb, $acc_mb, $h12k.16b
, $t9.16b @ GHASH block
8k
+6, 8k
+7 - mid
6933 aese
$ctr4b, $rk10 \n aesmc
$ctr4b, $ctr4b @ AES block
8k
+12 - round
10
6934 aese
$ctr6b, $rk10 \n aesmc
$ctr6b, $ctr6b @ AES block
8k
+14 - round
10
6935 aese
$ctr5b, $rk10 \n aesmc
$ctr5b, $ctr5b @ AES block
8k
+13 - round
10
6937 aese
$ctr0b, $rk10 \n aesmc
$ctr0b, $ctr0b @ AES block
8k
+8 - round
10
6938 aese
$ctr2b, $rk10 \n aesmc
$ctr2b, $ctr2b @ AES block
8k
+10 - round
10
6939 aese
$ctr3b, $rk10 \n aesmc
$ctr3b, $ctr3b @ AES block
8k
+11 - round
10
6941 eor3
$acc_mb, $acc_mb, $acc_hb, $acc_lb @ MODULO
- karatsuba tidy up
6943 aese
$ctr7b, $rk10 \n aesmc
$ctr7b, $ctr7b @ AES block
8k
+15 - round
10
6944 aese
$ctr1b, $rk10 \n aesmc
$ctr1b, $ctr1b @ AES block
8k
+9 - round
10
6945 ldp
$rk12q, $rk13q, [$cc, #192] @ load rk12, rk13
6947 ext
$t11.16b
, $acc_hb, $acc_hb, #8 @ MODULO - other top alignment
6949 aese
$ctr2b, $rk11 \n aesmc
$ctr2b, $ctr2b @ AES block
8k
+10 - round
11
6950 aese
$ctr1b, $rk11 \n aesmc
$ctr1b, $ctr1b @ AES block
8k
+9 - round
11
6951 aese
$ctr0b, $rk11 \n aesmc
$ctr0b, $ctr0b @ AES block
8k
+8 - round
11
6953 pmull
$t12.1q
, $acc_h.1d
, $mod_constant.1d @ MODULO
- top
64b align with mid
6954 aese
$ctr3b, $rk11 \n aesmc
$ctr3b, $ctr3b @ AES block
8k
+11 - round
11
6956 aese
$ctr7b, $rk11 \n aesmc
$ctr7b, $ctr7b @ AES block
8k
+15 - round
11
6957 aese
$ctr6b, $rk11 \n aesmc
$ctr6b, $ctr6b @ AES block
8k
+14 - round
11
6958 aese
$ctr4b, $rk11 \n aesmc
$ctr4b, $ctr4b @ AES block
8k
+12 - round
11
6960 aese
$ctr5b, $rk11 \n aesmc
$ctr5b, $ctr5b @ AES block
8k
+13 - round
11
6961 aese
$ctr3b, $rk12 \n aesmc
$ctr3b, $ctr3b @ AES block
8k
+11 - round
12
6963 eor3
$acc_mb, $acc_mb, $t12.16b
, $t11.16b @ MODULO
- fold into mid
6965 aese
$ctr3b, $rk13 @ AES block
8k
+11 - round
13
6966 aese
$ctr2b, $rk12 \n aesmc
$ctr2b, $ctr2b @ AES block
8k
+10 - round
12
6967 aese
$ctr6b, $rk12 \n aesmc
$ctr6b, $ctr6b @ AES block
8k
+14 - round
12
6969 pmull
$acc_h.1q
, $acc_m.1d
, $mod_constant.1d @ MODULO
- mid
64b align with low
6970 aese
$ctr4b, $rk12 \n aesmc
$ctr4b, $ctr4b @ AES block
8k
+12 - round
12
6971 aese
$ctr7b, $rk12 \n aesmc
$ctr7b, $ctr7b @ AES block
8k
+15 - round
12
6973 aese
$ctr0b, $rk12 \n aesmc
$ctr0b, $ctr0b @ AES block
8k
+8 - round
12
6974 ldr
$rk14q, [$cc, #224] @ load rk14
6975 aese
$ctr1b, $rk12 \n aesmc
$ctr1b, $ctr1b @ AES block
8k
+9 - round
12
6977 aese
$ctr4b, $rk13 @ AES block
8k
+12 - round
13
6978 ext
$t11.16b
, $acc_mb, $acc_mb, #8 @ MODULO - other mid alignment
6979 aese
$ctr5b, $rk12 \n aesmc
$ctr5b, $ctr5b @ AES block
8k
+13 - round
12
6981 aese
$ctr6b, $rk13 @ AES block
8k
+14 - round
13
6982 aese
$ctr2b, $rk13 @ AES block
8k
+10 - round
13
6983 aese
$ctr1b, $rk13 @ AES block
8k
+9 - round
13
6985 aese
$ctr5b, $rk13 @ AES block
8k
+13 - round
13
6986 eor3
$acc_lb, $acc_lb, $t11.16b
, $acc_hb @ MODULO
- fold into low
6987 add
$rtmp_ctr.4s
, $rtmp_ctr.4s
, $rctr_inc.4s @ CTR block
8k
+15
6989 aese
$ctr7b, $rk13 @ AES block
8k
+15 - round
13
6990 aese
$ctr0b, $rk13 @ AES block
8k
+8 - round
13
6991 .L256_dec_tail
: @ TAIL
6993 ext
$t0.16b
, $acc_lb, $acc_lb, #8 @ prepare final partial tag
6994 sub $main_end_input_ptr, $end_input_ptr, $input_ptr @ main_end_input_ptr is number of bytes left to process
6995 cmp $main_end_input_ptr, #112
6997 ldr
$res1q, [$input_ptr], #16 @ AES block 8k+8 - load ciphertext
6999 ldp
$h78kq, $h8q, [$current_tag, #192] @ load h8l | h8h
7000 ext
$h8.16b
, $h8.16b
, $h8.16b
, #8
7003 ldp
$h5q, $h56kq, [$current_tag, #128] @ load h5l | h5h
7004 ext
$h5.16b
, $h5.16b
, $h5.16b
, #8
7006 eor3
$res4b, $res1b, $ctr0b, $t1.16b @ AES block
8k
+8 - result
7007 ldp
$h6q, $h7q, [$current_tag, #160] @ load h6l | h6h
7008 ext
$h6.16b
, $h6.16b
, $h6.16b
, #8
7009 ext
$h7.16b
, $h7.16b
, $h7.16b
, #8
7010 b
.gt .L256_dec_blocks_more_than_7
7013 sub $rtmp_ctr.4s
, $rtmp_ctr.4s
, $rctr_inc.4s
7024 cmp $main_end_input_ptr, #96
7026 b
.gt .L256_dec_blocks_more_than_6
7032 cmp $main_end_input_ptr, #80
7033 sub $rtmp_ctr.4s
, $rtmp_ctr.4s
, $rctr_inc.4s
7037 b
.gt .L256_dec_blocks_more_than_5
7039 cmp $main_end_input_ptr, #64
7041 sub $rtmp_ctr.4s
, $rtmp_ctr.4s
, $rctr_inc.4s
7047 b
.gt .L256_dec_blocks_more_than_4
7049 sub $rtmp_ctr.4s
, $rtmp_ctr.4s
, $rctr_inc.4s
7051 cmp $main_end_input_ptr, #48
7055 b
.gt .L256_dec_blocks_more_than_3
7057 ldr
$h34kq, [$current_tag, #96] @ load h4k | h3k
7058 sub $rtmp_ctr.4s
, $rtmp_ctr.4s
, $rctr_inc.4s
7061 cmp $main_end_input_ptr, #32
7063 b
.gt .L256_dec_blocks_more_than_2
7065 sub $rtmp_ctr.4s
, $rtmp_ctr.4s
, $rctr_inc.4s
7068 cmp $main_end_input_ptr, #16
7069 b
.gt .L256_dec_blocks_more_than_1
7071 sub $rtmp_ctr.4s
, $rtmp_ctr.4s
, $rctr_inc.4s
7072 ldr
$h12kq, [$current_tag, #48] @ load h2k | h1k
7073 b
.L256_dec_blocks_less_than_1
7074 .L256_dec_blocks_more_than_7
: @ blocks left
> 7
7075 rev64
$res0b, $res1b @ GHASH final
-7 block
7076 ldr
$res1q, [$input_ptr], #16 @ AES final-6 block - load ciphertext
7077 st1
{ $res4b}, [$output_ptr], #16 @ AES final-7 block - store result
7079 ins
$acc_m.d
[0], $h78k.d
[1] @ GHASH final
-7 block
- mid
7081 eor
$res0b, $res0b, $t0.16b @ feed
in partial tag
7083 ins
$rk4v.d
[0], $res0.d
[1] @ GHASH final
-7 block
- mid
7084 eor3
$res4b, $res1b, $ctr1b, $t1.16b @ AES final
-6 block
- result
7086 pmull2
$acc_h.1q
, $res0.2d
, $h8.2d @ GHASH final
-7 block
- high
7088 eor
$rk4v.8b
, $rk4v.8b
, $res0.8b @ GHASH final
-7 block
- mid
7089 movi
$t0.8b
, #0 @ supress further partial tag feed in
7091 pmull
$acc_l.1q
, $res0.1d
, $h8.1d @ GHASH final
-7 block
- low
7092 pmull
$acc_m.1q
, $rk4v.1d
, $acc_m.1d @ GHASH final
-7 block
- mid
7093 .L256_dec_blocks_more_than_6
: @ blocks left
> 6
7095 rev64
$res0b, $res1b @ GHASH final
-6 block
7097 eor
$res0b, $res0b, $t0.16b @ feed
in partial tag
7098 ldr
$res1q, [$input_ptr], #16 @ AES final-5 block - load ciphertext
7099 movi
$t0.8b
, #0 @ supress further partial tag feed in
7101 ins
$rk4v.d
[0], $res0.d
[1] @ GHASH final
-6 block
- mid
7102 st1
{ $res4b}, [$output_ptr], #16 @ AES final-6 block - store result
7103 pmull2
$rk2q1, $res0.2d
, $h7.2d @ GHASH final
-6 block
- high
7105 pmull
$rk3q1, $res0.1d
, $h7.1d @ GHASH final
-6 block
- low
7107 eor3
$res4b, $res1b, $ctr2b, $t1.16b @ AES final
-5 block
- result
7108 eor
$acc_lb, $acc_lb, $rk3 @ GHASH final
-6 block
- low
7109 eor
$rk4v.8b
, $rk4v.8b
, $res0.8b @ GHASH final
-6 block
- mid
7111 pmull
$rk4v.1q
, $rk4v.1d
, $h78k.1d @ GHASH final
-6 block
- mid
7113 eor
$acc_mb, $acc_mb, $rk4v.16b @ GHASH final
-6 block
- mid
7114 eor
$acc_hb, $acc_hb, $rk2 @ GHASH final
-6 block
- high
7115 .L256_dec_blocks_more_than_5
: @ blocks left
> 5
7117 rev64
$res0b, $res1b @ GHASH final
-5 block
7119 eor
$res0b, $res0b, $t0.16b @ feed
in partial tag
7121 pmull2
$rk2q1, $res0.2d
, $h6.2d @ GHASH final
-5 block
- high
7122 ins
$rk4v.d
[0], $res0.d
[1] @ GHASH final
-5 block
- mid
7124 ldr
$res1q, [$input_ptr], #16 @ AES final-4 block - load ciphertext
7126 eor
$rk4v.8b
, $rk4v.8b
, $res0.8b @ GHASH final
-5 block
- mid
7127 st1
{ $res4b}, [$output_ptr], #16 @ AES final-5 block - store result
7129 pmull
$rk3q1, $res0.1d
, $h6.1d @ GHASH final
-5 block
- low
7130 ins
$rk4v.d
[1], $rk4v.d
[0] @ GHASH final
-5 block
- mid
7132 pmull2
$rk4v.1q
, $rk4v.2d
, $h56k.2d @ GHASH final
-5 block
- mid
7134 eor
$acc_hb, $acc_hb, $rk2 @ GHASH final
-5 block
- high
7135 eor3
$res4b, $res1b, $ctr3b, $t1.16b @ AES final
-4 block
- result
7136 eor
$acc_lb, $acc_lb, $rk3 @ GHASH final
-5 block
- low
7138 eor
$acc_mb, $acc_mb, $rk4v.16b @ GHASH final
-5 block
- mid
7139 movi
$t0.8b
, #0 @ supress further partial tag feed in
7140 .L256_dec_blocks_more_than_4
: @ blocks left
> 4
7142 rev64
$res0b, $res1b @ GHASH final
-4 block
7144 eor
$res0b, $res0b, $t0.16b @ feed
in partial tag
7146 ins
$rk4v.d
[0], $res0.d
[1] @ GHASH final
-4 block
- mid
7147 ldr
$res1q, [$input_ptr], #16 @ AES final-3 block - load ciphertext
7149 movi
$t0.8b
, #0 @ supress further partial tag feed in
7151 pmull
$rk3q1, $res0.1d
, $h5.1d @ GHASH final
-4 block
- low
7152 pmull2
$rk2q1, $res0.2d
, $h5.2d @ GHASH final
-4 block
- high
7154 eor
$rk4v.8b
, $rk4v.8b
, $res0.8b @ GHASH final
-4 block
- mid
7156 eor
$acc_hb, $acc_hb, $rk2 @ GHASH final
-4 block
- high
7158 pmull
$rk4v.1q
, $rk4v.1d
, $h56k.1d @ GHASH final
-4 block
- mid
7160 eor
$acc_lb, $acc_lb, $rk3 @ GHASH final
-4 block
- low
7161 st1
{ $res4b}, [$output_ptr], #16 @ AES final-4 block - store result
7163 eor
$acc_mb, $acc_mb, $rk4v.16b @ GHASH final
-4 block
- mid
7164 eor3
$res4b, $res1b, $ctr4b, $t1.16b @ AES final
-3 block
- result
7165 .L256_dec_blocks_more_than_3
: @ blocks left
> 3
7167 ldr
$h4q, [$current_tag, #112] @ load h4l | h4h
7168 ext
$h4.16b
, $h4.16b
, $h4.16b
, #8
7169 rev64
$res0b, $res1b @ GHASH final
-3 block
7171 eor
$res0b, $res0b, $t0.16b @ feed
in partial tag
7172 ldr
$res1q, [$input_ptr], #16 @ AES final-2 block - load ciphertext
7173 ldr
$h34kq, [$current_tag, #96] @ load h4k | h3k
7175 ins
$rk4v.d
[0], $res0.d
[1] @ GHASH final
-3 block
- mid
7176 st1
{ $res4b}, [$output_ptr], #16 @ AES final-3 block - store result
7178 eor3
$res4b, $res1b, $ctr5b, $t1.16b @ AES final
-2 block
- result
7180 eor
$rk4v.8b
, $rk4v.8b
, $res0.8b @ GHASH final
-3 block
- mid
7182 ins
$rk4v.d
[1], $rk4v.d
[0] @ GHASH final
-3 block
- mid
7183 pmull
$rk3q1, $res0.1d
, $h4.1d @ GHASH final
-3 block
- low
7184 pmull2
$rk2q1, $res0.2d
, $h4.2d @ GHASH final
-3 block
- high
7186 movi
$t0.8b
, #0 @ supress further partial tag feed in
7187 pmull2
$rk4v.1q
, $rk4v.2d
, $h34k.2d @ GHASH final
-3 block
- mid
7188 eor
$acc_lb, $acc_lb, $rk3 @ GHASH final
-3 block
- low
7190 eor
$acc_hb, $acc_hb, $rk2 @ GHASH final
-3 block
- high
7192 eor
$acc_mb, $acc_mb, $rk4v.16b @ GHASH final
-3 block
- mid
7193 .L256_dec_blocks_more_than_2
: @ blocks left
> 2
7195 rev64
$res0b, $res1b @ GHASH final
-2 block
7197 ldr
$h3q, [$current_tag, #80] @ load h3l | h3h
7198 ext
$h3.16b
, $h3.16b
, $h3.16b
, #8
7199 ldr
$res1q, [$input_ptr], #16 @ AES final-1 block - load ciphertext
7201 eor
$res0b, $res0b, $t0.16b @ feed
in partial tag
7203 ins
$rk4v.d
[0], $res0.d
[1] @ GHASH final
-2 block
- mid
7205 pmull
$rk3q1, $res0.1d
, $h3.1d @ GHASH final
-2 block
- low
7206 st1
{ $res4b}, [$output_ptr], #16 @ AES final-2 block - store result
7207 eor3
$res4b, $res1b, $ctr6b, $t1.16b @ AES final
-1 block
- result
7209 eor
$rk4v.8b
, $rk4v.8b
, $res0.8b @ GHASH final
-2 block
- mid
7210 eor
$acc_lb, $acc_lb, $rk3 @ GHASH final
-2 block
- low
7211 movi
$t0.8b
, #0 @ supress further partial tag feed in
7213 pmull
$rk4v.1q
, $rk4v.1d
, $h34k.1d @ GHASH final
-2 block
- mid
7214 pmull2
$rk2q1, $res0.2d
, $h3.2d @ GHASH final
-2 block
- high
7216 eor
$acc_mb, $acc_mb, $rk4v.16b @ GHASH final
-2 block
- mid
7217 eor
$acc_hb, $acc_hb, $rk2 @ GHASH final
-2 block
- high
7218 .L256_dec_blocks_more_than_1
: @ blocks left
> 1
7220 rev64
$res0b, $res1b @ GHASH final
-1 block
7222 eor
$res0b, $res0b, $t0.16b @ feed
in partial tag
7224 ins
$rk4v.d
[0], $res0.d
[1] @ GHASH final
-1 block
- mid
7225 ldr
$h2q, [$current_tag, #64] @ load h1l | h1h
7226 ext
$h2.16b
, $h2.16b
, $h2.16b
, #8
7228 eor
$rk4v.8b
, $rk4v.8b
, $res0.8b @ GHASH final
-1 block
- mid
7229 ldr
$res1q, [$input_ptr], #16 @ AES final block - load ciphertext
7230 st1
{ $res4b}, [$output_ptr], #16 @ AES final-1 block - store result
7232 ldr
$h12kq, [$current_tag, #48] @ load h2k | h1k
7233 pmull
$rk3q1, $res0.1d
, $h2.1d @ GHASH final
-1 block
- low
7235 ins
$rk4v.d
[1], $rk4v.d
[0] @ GHASH final
-1 block
- mid
7237 eor
$acc_lb, $acc_lb, $rk3 @ GHASH final
-1 block
- low
7239 eor3
$res4b, $res1b, $ctr7b, $t1.16b @ AES final block
- result
7240 pmull2
$rk2q1, $res0.2d
, $h2.2d @ GHASH final
-1 block
- high
7242 pmull2
$rk4v.1q
, $rk4v.2d
, $h12k.2d @ GHASH final
-1 block
- mid
7244 movi
$t0.8b
, #0 @ supress further partial tag feed in
7245 eor
$acc_hb, $acc_hb, $rk2 @ GHASH final
-1 block
- high
7247 eor
$acc_mb, $acc_mb, $rk4v.16b @ GHASH final
-1 block
- mid
7248 .L256_dec_blocks_less_than_1
: @ blocks left
<= 1
7250 ld1
{ $rk0}, [$output_ptr] @ load existing bytes where the possibly partial
last block is to be stored
7251 mvn
$temp0_x, xzr @ temp0_x
= 0xffffffffffffffff
7252 and $bit_length, $bit_length, #127 @ bit_length %= 128
7254 sub $bit_length, $bit_length, #128 @ bit_length -= 128
7255 rev32
$rtmp_ctr.16b
, $rtmp_ctr.16b
7256 str
$rtmp_ctrq, [$counter] @ store the updated counter
7258 neg
$bit_length, $bit_length @ bit_length
= 128 - #bits in input (in range [1,128])
7260 and $bit_length, $bit_length, #127 @ bit_length %= 128
7262 lsr
$temp0_x, $temp0_x, $bit_length @ temp0_x is mask
for top
64b of
last block
7263 cmp $bit_length, #64
7264 mvn
$temp1_x, xzr @ temp1_x
= 0xffffffffffffffff
7266 csel
$temp3_x, $temp0_x, xzr
, lt
7267 csel
$temp2_x, $temp1_x, $temp0_x, lt
7269 mov
$ctr0.d
[0], $temp2_x @ ctr0b is mask
for last block
7270 mov
$ctr0.d
[1], $temp3_x
7272 and $res1b, $res1b, $ctr0b @ possibly partial
last block has zeroes
in highest bits
7273 ldr
$h1q, [$current_tag, #32] @ load h1l | h1h
7274 ext
$h1.16b
, $h1.16b
, $h1.16b
, #8
7275 bif
$res4b, $rk0, $ctr0b @ insert existing bytes
in top end of result before storing
7277 rev64
$res0b, $res1b @ GHASH final block
7279 eor
$res0b, $res0b, $t0.16b @ feed
in partial tag
7281 ins
$t0.d
[0], $res0.d
[1] @ GHASH final block
- mid
7282 pmull2
$rk2q1, $res0.2d
, $h1.2d @ GHASH final block
- high
7284 eor
$t0.8b
, $t0.8b
, $res0.8b @ GHASH final block
- mid
7286 pmull
$rk3q1, $res0.1d
, $h1.1d @ GHASH final block
- low
7287 eor
$acc_hb, $acc_hb, $rk2 @ GHASH final block
- high
7289 pmull
$t0.1q
, $t0.1d
, $h12k.1d @ GHASH final block
- mid
7291 eor
$acc_mb, $acc_mb, $t0.16b @ GHASH final block
- mid
7292 ldr
$mod_constantd, [$modulo_constant] @ MODULO
- load modulo constant
7293 eor
$acc_lb, $acc_lb, $rk3 @ GHASH final block
- low
7295 pmull
$t11.1q
, $acc_h.1d
, $mod_constant.1d @ MODULO
- top
64b align with mid
7296 eor
$t10.16b
, $acc_hb, $acc_lb @ MODULO
- karatsuba tidy up
7298 ext
$acc_hb, $acc_hb, $acc_hb, #8 @ MODULO - other top alignment
7299 st1
{ $res4b}, [$output_ptr] @ store all
16B
7301 eor
$acc_mb, $acc_mb, $t10.16b @ MODULO
- karatsuba tidy up
7303 eor
$t11.16b
, $acc_hb, $t11.16b @ MODULO
- fold into mid
7304 eor
$acc_mb, $acc_mb, $t11.16b @ MODULO
- fold into mid
7306 pmull
$acc_h.1q
, $acc_m.1d
, $mod_constant.1d @ MODULO
- mid
64b align with low
7308 ext
$acc_mb, $acc_mb, $acc_mb, #8 @ MODULO - other mid alignment
7309 eor
$acc_lb, $acc_lb, $acc_hb @ MODULO
- fold into low
7311 eor
$acc_lb, $acc_lb, $acc_mb @ MODULO
- fold into low
7312 ext
$acc_lb, $acc_lb, $acc_lb, #8
7313 rev64
$acc_lb, $acc_lb
7314 st1
{ $acc_l.16b
}, [$current_tag]
7315 lsr x0
, $bit_length, #3 @ return sizes
7317 ldp d10
, d11
, [sp
, #16]
7318 ldp d12
, d13
, [sp
, #32]
7319 ldp d14
, d15
, [sp
, #48]
7320 ldp d8
, d9
, [sp
], #80
7326 .size unroll8_eor3_aes_gcm_dec_256_kernel
,.-unroll8_eor3_aes_gcm_dec_256_kernel
7332 .asciz
"AES GCM module for ARMv8, SPDX BSD-3-Clause by <xiaokang.qian\@arm.com>"
7339 "rax1" => 0xce608c00, "eor3" => 0xce000000,
7340 "bcax" => 0xce200000, "xar" => 0xce800000 );
7343 my ($mnemonic,$arg)=@_;
7345 $arg =~ m/[qv]([0-9]+)[^,]*,\s*[qv]([0-9]+)[^,]*(?:,\s*[qv]([0-9]+)[^,]*(?:,\s*[qv#]([0-9\-]+))?)?/
7347 sprintf ".inst\t0x%08x\t//%s %s",
7348 $opcode{$mnemonic}|$1|($2<<5)|($3<<16)|(eval($4)<<10),
7354 $arg =~ m/q([0-9]+)#(lo|hi),\s*q([0-9]+)#(lo|hi)/o &&
7355 sprintf "ins v%d.d[%d],v%d.d[%d]",$1<8?
$1:$1+8,($2 eq "lo")?
0:1,
7356 $3<8?
$3:$3+8,($4 eq "lo")?
0:1;
7359 foreach(split("\n",$code)) {
7360 s/@\s/\/\
//o; # old->new style commentary
7361 s/\`([^\`]*)\`/eval($1)/ge;
7363 m/\bld1r\b/ and s/\.16b/.2d/g or
7364 s/\b(eor3|rax1|xar|bcax)\s+(v.*)/unsha3($1,$2)/ge;
7369 close STDOUT
or die "error closing STDOUT: $!"; # enforce flush