]> git.ipfire.org Git - thirdparty/openssl.git/blob - crypto/sha/sha512.c
Reorganize private crypto header files
[thirdparty/openssl.git] / crypto / sha / sha512.c
1 /*
2 * Copyright 2004-2018 The OpenSSL Project Authors. All Rights Reserved.
3 *
4 * Licensed under the Apache License 2.0 (the "License"). You may not use
5 * this file except in compliance with the License. You can obtain a copy
6 * in the file LICENSE in the source distribution or at
7 * https://www.openssl.org/source/license.html
8 */
9
10 #include <openssl/opensslconf.h>
11 /*-
12 * IMPLEMENTATION NOTES.
13 *
14 * As you might have noticed 32-bit hash algorithms:
15 *
16 * - permit SHA_LONG to be wider than 32-bit
17 * - optimized versions implement two transform functions: one operating
18 * on [aligned] data in host byte order and one - on data in input
19 * stream byte order;
20 * - share common byte-order neutral collector and padding function
21 * implementations, ../md32_common.h;
22 *
23 * Neither of the above applies to this SHA-512 implementations. Reasons
24 * [in reverse order] are:
25 *
26 * - it's the only 64-bit hash algorithm for the moment of this writing,
27 * there is no need for common collector/padding implementation [yet];
28 * - by supporting only one transform function [which operates on
29 * *aligned* data in input stream byte order, big-endian in this case]
30 * we minimize burden of maintenance in two ways: a) collector/padding
31 * function is simpler; b) only one transform function to stare at;
32 * - SHA_LONG64 is required to be exactly 64-bit in order to be able to
33 * apply a number of optimizations to mitigate potential performance
34 * penalties caused by previous design decision;
35 *
36 * Caveat lector.
37 *
38 * Implementation relies on the fact that "long long" is 64-bit on
39 * both 32- and 64-bit platforms. If some compiler vendor comes up
40 * with 128-bit long long, adjustment to sha.h would be required.
41 * As this implementation relies on 64-bit integer type, it's totally
42 * inappropriate for platforms which don't support it, most notably
43 * 16-bit platforms.
44 */
45 #include <stdlib.h>
46 #include <string.h>
47
48 #include <openssl/crypto.h>
49 #include <openssl/sha.h>
50 #include <openssl/opensslv.h>
51
52 #include "internal/cryptlib.h"
53 #include "crypto/sha.h"
54
55 #if defined(__i386) || defined(__i386__) || defined(_M_IX86) || \
56 defined(__x86_64) || defined(_M_AMD64) || defined(_M_X64) || \
57 defined(__s390__) || defined(__s390x__) || \
58 defined(__aarch64__) || \
59 defined(SHA512_ASM)
60 # define SHA512_BLOCK_CAN_MANAGE_UNALIGNED_DATA
61 #endif
62
63 int sha512_224_init(SHA512_CTX *c)
64 {
65 c->h[0] = U64(0x8c3d37c819544da2);
66 c->h[1] = U64(0x73e1996689dcd4d6);
67 c->h[2] = U64(0x1dfab7ae32ff9c82);
68 c->h[3] = U64(0x679dd514582f9fcf);
69 c->h[4] = U64(0x0f6d2b697bd44da8);
70 c->h[5] = U64(0x77e36f7304c48942);
71 c->h[6] = U64(0x3f9d85a86a1d36c8);
72 c->h[7] = U64(0x1112e6ad91d692a1);
73
74 c->Nl = 0;
75 c->Nh = 0;
76 c->num = 0;
77 c->md_len = SHA224_DIGEST_LENGTH;
78 return 1;
79 }
80
81 int sha512_256_init(SHA512_CTX *c)
82 {
83 c->h[0] = U64(0x22312194fc2bf72c);
84 c->h[1] = U64(0x9f555fa3c84c64c2);
85 c->h[2] = U64(0x2393b86b6f53b151);
86 c->h[3] = U64(0x963877195940eabd);
87 c->h[4] = U64(0x96283ee2a88effe3);
88 c->h[5] = U64(0xbe5e1e2553863992);
89 c->h[6] = U64(0x2b0199fc2c85b8aa);
90 c->h[7] = U64(0x0eb72ddc81c52ca2);
91
92 c->Nl = 0;
93 c->Nh = 0;
94 c->num = 0;
95 c->md_len = SHA256_DIGEST_LENGTH;
96 return 1;
97 }
98
99 int SHA384_Init(SHA512_CTX *c)
100 {
101 c->h[0] = U64(0xcbbb9d5dc1059ed8);
102 c->h[1] = U64(0x629a292a367cd507);
103 c->h[2] = U64(0x9159015a3070dd17);
104 c->h[3] = U64(0x152fecd8f70e5939);
105 c->h[4] = U64(0x67332667ffc00b31);
106 c->h[5] = U64(0x8eb44a8768581511);
107 c->h[6] = U64(0xdb0c2e0d64f98fa7);
108 c->h[7] = U64(0x47b5481dbefa4fa4);
109
110 c->Nl = 0;
111 c->Nh = 0;
112 c->num = 0;
113 c->md_len = SHA384_DIGEST_LENGTH;
114 return 1;
115 }
116
117 int SHA512_Init(SHA512_CTX *c)
118 {
119 c->h[0] = U64(0x6a09e667f3bcc908);
120 c->h[1] = U64(0xbb67ae8584caa73b);
121 c->h[2] = U64(0x3c6ef372fe94f82b);
122 c->h[3] = U64(0xa54ff53a5f1d36f1);
123 c->h[4] = U64(0x510e527fade682d1);
124 c->h[5] = U64(0x9b05688c2b3e6c1f);
125 c->h[6] = U64(0x1f83d9abfb41bd6b);
126 c->h[7] = U64(0x5be0cd19137e2179);
127
128 c->Nl = 0;
129 c->Nh = 0;
130 c->num = 0;
131 c->md_len = SHA512_DIGEST_LENGTH;
132 return 1;
133 }
134
135 #ifndef SHA512_ASM
136 static
137 #endif
138 void sha512_block_data_order(SHA512_CTX *ctx, const void *in, size_t num);
139
140 int SHA512_Final(unsigned char *md, SHA512_CTX *c)
141 {
142 unsigned char *p = (unsigned char *)c->u.p;
143 size_t n = c->num;
144
145 p[n] = 0x80; /* There always is a room for one */
146 n++;
147 if (n > (sizeof(c->u) - 16)) {
148 memset(p + n, 0, sizeof(c->u) - n);
149 n = 0;
150 sha512_block_data_order(c, p, 1);
151 }
152
153 memset(p + n, 0, sizeof(c->u) - 16 - n);
154 #ifdef B_ENDIAN
155 c->u.d[SHA_LBLOCK - 2] = c->Nh;
156 c->u.d[SHA_LBLOCK - 1] = c->Nl;
157 #else
158 p[sizeof(c->u) - 1] = (unsigned char)(c->Nl);
159 p[sizeof(c->u) - 2] = (unsigned char)(c->Nl >> 8);
160 p[sizeof(c->u) - 3] = (unsigned char)(c->Nl >> 16);
161 p[sizeof(c->u) - 4] = (unsigned char)(c->Nl >> 24);
162 p[sizeof(c->u) - 5] = (unsigned char)(c->Nl >> 32);
163 p[sizeof(c->u) - 6] = (unsigned char)(c->Nl >> 40);
164 p[sizeof(c->u) - 7] = (unsigned char)(c->Nl >> 48);
165 p[sizeof(c->u) - 8] = (unsigned char)(c->Nl >> 56);
166 p[sizeof(c->u) - 9] = (unsigned char)(c->Nh);
167 p[sizeof(c->u) - 10] = (unsigned char)(c->Nh >> 8);
168 p[sizeof(c->u) - 11] = (unsigned char)(c->Nh >> 16);
169 p[sizeof(c->u) - 12] = (unsigned char)(c->Nh >> 24);
170 p[sizeof(c->u) - 13] = (unsigned char)(c->Nh >> 32);
171 p[sizeof(c->u) - 14] = (unsigned char)(c->Nh >> 40);
172 p[sizeof(c->u) - 15] = (unsigned char)(c->Nh >> 48);
173 p[sizeof(c->u) - 16] = (unsigned char)(c->Nh >> 56);
174 #endif
175
176 sha512_block_data_order(c, p, 1);
177
178 if (md == 0)
179 return 0;
180
181 switch (c->md_len) {
182 /* Let compiler decide if it's appropriate to unroll... */
183 case SHA224_DIGEST_LENGTH:
184 for (n = 0; n < SHA224_DIGEST_LENGTH / 8; n++) {
185 SHA_LONG64 t = c->h[n];
186
187 *(md++) = (unsigned char)(t >> 56);
188 *(md++) = (unsigned char)(t >> 48);
189 *(md++) = (unsigned char)(t >> 40);
190 *(md++) = (unsigned char)(t >> 32);
191 *(md++) = (unsigned char)(t >> 24);
192 *(md++) = (unsigned char)(t >> 16);
193 *(md++) = (unsigned char)(t >> 8);
194 *(md++) = (unsigned char)(t);
195 }
196 /*
197 * For 224 bits, there are four bytes left over that have to be
198 * processed separately.
199 */
200 {
201 SHA_LONG64 t = c->h[SHA224_DIGEST_LENGTH / 8];
202
203 *(md++) = (unsigned char)(t >> 56);
204 *(md++) = (unsigned char)(t >> 48);
205 *(md++) = (unsigned char)(t >> 40);
206 *(md++) = (unsigned char)(t >> 32);
207 }
208 break;
209 case SHA256_DIGEST_LENGTH:
210 for (n = 0; n < SHA256_DIGEST_LENGTH / 8; n++) {
211 SHA_LONG64 t = c->h[n];
212
213 *(md++) = (unsigned char)(t >> 56);
214 *(md++) = (unsigned char)(t >> 48);
215 *(md++) = (unsigned char)(t >> 40);
216 *(md++) = (unsigned char)(t >> 32);
217 *(md++) = (unsigned char)(t >> 24);
218 *(md++) = (unsigned char)(t >> 16);
219 *(md++) = (unsigned char)(t >> 8);
220 *(md++) = (unsigned char)(t);
221 }
222 break;
223 case SHA384_DIGEST_LENGTH:
224 for (n = 0; n < SHA384_DIGEST_LENGTH / 8; n++) {
225 SHA_LONG64 t = c->h[n];
226
227 *(md++) = (unsigned char)(t >> 56);
228 *(md++) = (unsigned char)(t >> 48);
229 *(md++) = (unsigned char)(t >> 40);
230 *(md++) = (unsigned char)(t >> 32);
231 *(md++) = (unsigned char)(t >> 24);
232 *(md++) = (unsigned char)(t >> 16);
233 *(md++) = (unsigned char)(t >> 8);
234 *(md++) = (unsigned char)(t);
235 }
236 break;
237 case SHA512_DIGEST_LENGTH:
238 for (n = 0; n < SHA512_DIGEST_LENGTH / 8; n++) {
239 SHA_LONG64 t = c->h[n];
240
241 *(md++) = (unsigned char)(t >> 56);
242 *(md++) = (unsigned char)(t >> 48);
243 *(md++) = (unsigned char)(t >> 40);
244 *(md++) = (unsigned char)(t >> 32);
245 *(md++) = (unsigned char)(t >> 24);
246 *(md++) = (unsigned char)(t >> 16);
247 *(md++) = (unsigned char)(t >> 8);
248 *(md++) = (unsigned char)(t);
249 }
250 break;
251 /* ... as well as make sure md_len is not abused. */
252 default:
253 return 0;
254 }
255
256 return 1;
257 }
258
259 int SHA384_Final(unsigned char *md, SHA512_CTX *c)
260 {
261 return SHA512_Final(md, c);
262 }
263
264 int SHA512_Update(SHA512_CTX *c, const void *_data, size_t len)
265 {
266 SHA_LONG64 l;
267 unsigned char *p = c->u.p;
268 const unsigned char *data = (const unsigned char *)_data;
269
270 if (len == 0)
271 return 1;
272
273 l = (c->Nl + (((SHA_LONG64) len) << 3)) & U64(0xffffffffffffffff);
274 if (l < c->Nl)
275 c->Nh++;
276 if (sizeof(len) >= 8)
277 c->Nh += (((SHA_LONG64) len) >> 61);
278 c->Nl = l;
279
280 if (c->num != 0) {
281 size_t n = sizeof(c->u) - c->num;
282
283 if (len < n) {
284 memcpy(p + c->num, data, len), c->num += (unsigned int)len;
285 return 1;
286 } else {
287 memcpy(p + c->num, data, n), c->num = 0;
288 len -= n, data += n;
289 sha512_block_data_order(c, p, 1);
290 }
291 }
292
293 if (len >= sizeof(c->u)) {
294 #ifndef SHA512_BLOCK_CAN_MANAGE_UNALIGNED_DATA
295 if ((size_t)data % sizeof(c->u.d[0]) != 0)
296 while (len >= sizeof(c->u))
297 memcpy(p, data, sizeof(c->u)),
298 sha512_block_data_order(c, p, 1),
299 len -= sizeof(c->u), data += sizeof(c->u);
300 else
301 #endif
302 sha512_block_data_order(c, data, len / sizeof(c->u)),
303 data += len, len %= sizeof(c->u), data -= len;
304 }
305
306 if (len != 0)
307 memcpy(p, data, len), c->num = (int)len;
308
309 return 1;
310 }
311
312 int SHA384_Update(SHA512_CTX *c, const void *data, size_t len)
313 {
314 return SHA512_Update(c, data, len);
315 }
316
317 void SHA512_Transform(SHA512_CTX *c, const unsigned char *data)
318 {
319 #ifndef SHA512_BLOCK_CAN_MANAGE_UNALIGNED_DATA
320 if ((size_t)data % sizeof(c->u.d[0]) != 0)
321 memcpy(c->u.p, data, sizeof(c->u.p)), data = c->u.p;
322 #endif
323 sha512_block_data_order(c, data, 1);
324 }
325
326 unsigned char *SHA384(const unsigned char *d, size_t n, unsigned char *md)
327 {
328 SHA512_CTX c;
329 static unsigned char m[SHA384_DIGEST_LENGTH];
330
331 if (md == NULL)
332 md = m;
333 SHA384_Init(&c);
334 SHA512_Update(&c, d, n);
335 SHA512_Final(md, &c);
336 OPENSSL_cleanse(&c, sizeof(c));
337 return md;
338 }
339
340 unsigned char *SHA512(const unsigned char *d, size_t n, unsigned char *md)
341 {
342 SHA512_CTX c;
343 static unsigned char m[SHA512_DIGEST_LENGTH];
344
345 if (md == NULL)
346 md = m;
347 SHA512_Init(&c);
348 SHA512_Update(&c, d, n);
349 SHA512_Final(md, &c);
350 OPENSSL_cleanse(&c, sizeof(c));
351 return md;
352 }
353
354 #ifndef SHA512_ASM
355 static const SHA_LONG64 K512[80] = {
356 U64(0x428a2f98d728ae22), U64(0x7137449123ef65cd),
357 U64(0xb5c0fbcfec4d3b2f), U64(0xe9b5dba58189dbbc),
358 U64(0x3956c25bf348b538), U64(0x59f111f1b605d019),
359 U64(0x923f82a4af194f9b), U64(0xab1c5ed5da6d8118),
360 U64(0xd807aa98a3030242), U64(0x12835b0145706fbe),
361 U64(0x243185be4ee4b28c), U64(0x550c7dc3d5ffb4e2),
362 U64(0x72be5d74f27b896f), U64(0x80deb1fe3b1696b1),
363 U64(0x9bdc06a725c71235), U64(0xc19bf174cf692694),
364 U64(0xe49b69c19ef14ad2), U64(0xefbe4786384f25e3),
365 U64(0x0fc19dc68b8cd5b5), U64(0x240ca1cc77ac9c65),
366 U64(0x2de92c6f592b0275), U64(0x4a7484aa6ea6e483),
367 U64(0x5cb0a9dcbd41fbd4), U64(0x76f988da831153b5),
368 U64(0x983e5152ee66dfab), U64(0xa831c66d2db43210),
369 U64(0xb00327c898fb213f), U64(0xbf597fc7beef0ee4),
370 U64(0xc6e00bf33da88fc2), U64(0xd5a79147930aa725),
371 U64(0x06ca6351e003826f), U64(0x142929670a0e6e70),
372 U64(0x27b70a8546d22ffc), U64(0x2e1b21385c26c926),
373 U64(0x4d2c6dfc5ac42aed), U64(0x53380d139d95b3df),
374 U64(0x650a73548baf63de), U64(0x766a0abb3c77b2a8),
375 U64(0x81c2c92e47edaee6), U64(0x92722c851482353b),
376 U64(0xa2bfe8a14cf10364), U64(0xa81a664bbc423001),
377 U64(0xc24b8b70d0f89791), U64(0xc76c51a30654be30),
378 U64(0xd192e819d6ef5218), U64(0xd69906245565a910),
379 U64(0xf40e35855771202a), U64(0x106aa07032bbd1b8),
380 U64(0x19a4c116b8d2d0c8), U64(0x1e376c085141ab53),
381 U64(0x2748774cdf8eeb99), U64(0x34b0bcb5e19b48a8),
382 U64(0x391c0cb3c5c95a63), U64(0x4ed8aa4ae3418acb),
383 U64(0x5b9cca4f7763e373), U64(0x682e6ff3d6b2b8a3),
384 U64(0x748f82ee5defb2fc), U64(0x78a5636f43172f60),
385 U64(0x84c87814a1f0ab72), U64(0x8cc702081a6439ec),
386 U64(0x90befffa23631e28), U64(0xa4506cebde82bde9),
387 U64(0xbef9a3f7b2c67915), U64(0xc67178f2e372532b),
388 U64(0xca273eceea26619c), U64(0xd186b8c721c0c207),
389 U64(0xeada7dd6cde0eb1e), U64(0xf57d4f7fee6ed178),
390 U64(0x06f067aa72176fba), U64(0x0a637dc5a2c898a6),
391 U64(0x113f9804bef90dae), U64(0x1b710b35131c471b),
392 U64(0x28db77f523047d84), U64(0x32caab7b40c72493),
393 U64(0x3c9ebe0a15c9bebc), U64(0x431d67c49c100d4c),
394 U64(0x4cc5d4becb3e42b6), U64(0x597f299cfc657e2a),
395 U64(0x5fcb6fab3ad6faec), U64(0x6c44198c4a475817)
396 };
397
398 # ifndef PEDANTIC
399 # if defined(__GNUC__) && __GNUC__>=2 && \
400 !defined(OPENSSL_NO_ASM) && !defined(OPENSSL_NO_INLINE_ASM)
401 # if defined(__x86_64) || defined(__x86_64__)
402 # define ROTR(a,n) ({ SHA_LONG64 ret; \
403 asm ("rorq %1,%0" \
404 : "=r"(ret) \
405 : "J"(n),"0"(a) \
406 : "cc"); ret; })
407 # if !defined(B_ENDIAN)
408 # define PULL64(x) ({ SHA_LONG64 ret=*((const SHA_LONG64 *)(&(x))); \
409 asm ("bswapq %0" \
410 : "=r"(ret) \
411 : "0"(ret)); ret; })
412 # endif
413 # elif (defined(__i386) || defined(__i386__)) && !defined(B_ENDIAN)
414 # if defined(I386_ONLY)
415 # define PULL64(x) ({ const unsigned int *p=(const unsigned int *)(&(x));\
416 unsigned int hi=p[0],lo=p[1]; \
417 asm("xchgb %%ah,%%al;xchgb %%dh,%%dl;"\
418 "roll $16,%%eax; roll $16,%%edx; "\
419 "xchgb %%ah,%%al;xchgb %%dh,%%dl;"\
420 : "=a"(lo),"=d"(hi) \
421 : "0"(lo),"1"(hi) : "cc"); \
422 ((SHA_LONG64)hi)<<32|lo; })
423 # else
424 # define PULL64(x) ({ const unsigned int *p=(const unsigned int *)(&(x));\
425 unsigned int hi=p[0],lo=p[1]; \
426 asm ("bswapl %0; bswapl %1;" \
427 : "=r"(lo),"=r"(hi) \
428 : "0"(lo),"1"(hi)); \
429 ((SHA_LONG64)hi)<<32|lo; })
430 # endif
431 # elif (defined(_ARCH_PPC) && defined(__64BIT__)) || defined(_ARCH_PPC64)
432 # define ROTR(a,n) ({ SHA_LONG64 ret; \
433 asm ("rotrdi %0,%1,%2" \
434 : "=r"(ret) \
435 : "r"(a),"K"(n)); ret; })
436 # elif defined(__aarch64__)
437 # define ROTR(a,n) ({ SHA_LONG64 ret; \
438 asm ("ror %0,%1,%2" \
439 : "=r"(ret) \
440 : "r"(a),"I"(n)); ret; })
441 # if defined(__BYTE_ORDER__) && defined(__ORDER_LITTLE_ENDIAN__) && \
442 __BYTE_ORDER__==__ORDER_LITTLE_ENDIAN__
443 # define PULL64(x) ({ SHA_LONG64 ret; \
444 asm ("rev %0,%1" \
445 : "=r"(ret) \
446 : "r"(*((const SHA_LONG64 *)(&(x))))); ret; })
447 # endif
448 # endif
449 # elif defined(_MSC_VER)
450 # if defined(_WIN64) /* applies to both IA-64 and AMD64 */
451 # pragma intrinsic(_rotr64)
452 # define ROTR(a,n) _rotr64((a),n)
453 # endif
454 # if defined(_M_IX86) && !defined(OPENSSL_NO_ASM) && \
455 !defined(OPENSSL_NO_INLINE_ASM)
456 # if defined(I386_ONLY)
457 static SHA_LONG64 __fastcall __pull64be(const void *x)
458 {
459 _asm mov edx,[ecx + 0]
460 _asm mov eax,[ecx + 4]
461 _asm xchg dh, dl
462 _asm xchg ah, al
463 _asm rol edx, 16
464 _asm rol eax, 16
465 _asm xchg dh, dl
466 _asm xchg ah, al
467 }
468 # else
469 static SHA_LONG64 __fastcall __pull64be(const void *x)
470 {
471 _asm mov edx,[ecx + 0]
472 _asm mov eax,[ecx + 4]
473 _asm bswap edx
474 _asm bswap eax
475 }
476 # endif
477 # define PULL64(x) __pull64be(&(x))
478 # endif
479 # endif
480 # endif
481 # ifndef PULL64
482 # define B(x,j) (((SHA_LONG64)(*(((const unsigned char *)(&x))+j)))<<((7-j)*8))
483 # define PULL64(x) (B(x,0)|B(x,1)|B(x,2)|B(x,3)|B(x,4)|B(x,5)|B(x,6)|B(x,7))
484 # endif
485 # ifndef ROTR
486 # define ROTR(x,s) (((x)>>s) | (x)<<(64-s))
487 # endif
488 # define Sigma0(x) (ROTR((x),28) ^ ROTR((x),34) ^ ROTR((x),39))
489 # define Sigma1(x) (ROTR((x),14) ^ ROTR((x),18) ^ ROTR((x),41))
490 # define sigma0(x) (ROTR((x),1) ^ ROTR((x),8) ^ ((x)>>7))
491 # define sigma1(x) (ROTR((x),19) ^ ROTR((x),61) ^ ((x)>>6))
492 # define Ch(x,y,z) (((x) & (y)) ^ ((~(x)) & (z)))
493 # define Maj(x,y,z) (((x) & (y)) ^ ((x) & (z)) ^ ((y) & (z)))
494
495 # if defined(__i386) || defined(__i386__) || defined(_M_IX86)
496 /*
497 * This code should give better results on 32-bit CPU with less than
498 * ~24 registers, both size and performance wise...
499 */
500
501 static void sha512_block_data_order(SHA512_CTX *ctx, const void *in,
502 size_t num)
503 {
504 const SHA_LONG64 *W = in;
505 SHA_LONG64 A, E, T;
506 SHA_LONG64 X[9 + 80], *F;
507 int i;
508
509 while (num--) {
510
511 F = X + 80;
512 A = ctx->h[0];
513 F[1] = ctx->h[1];
514 F[2] = ctx->h[2];
515 F[3] = ctx->h[3];
516 E = ctx->h[4];
517 F[5] = ctx->h[5];
518 F[6] = ctx->h[6];
519 F[7] = ctx->h[7];
520
521 for (i = 0; i < 16; i++, F--) {
522 # ifdef B_ENDIAN
523 T = W[i];
524 # else
525 T = PULL64(W[i]);
526 # endif
527 F[0] = A;
528 F[4] = E;
529 F[8] = T;
530 T += F[7] + Sigma1(E) + Ch(E, F[5], F[6]) + K512[i];
531 E = F[3] + T;
532 A = T + Sigma0(A) + Maj(A, F[1], F[2]);
533 }
534
535 for (; i < 80; i++, F--) {
536 T = sigma0(F[8 + 16 - 1]);
537 T += sigma1(F[8 + 16 - 14]);
538 T += F[8 + 16] + F[8 + 16 - 9];
539
540 F[0] = A;
541 F[4] = E;
542 F[8] = T;
543 T += F[7] + Sigma1(E) + Ch(E, F[5], F[6]) + K512[i];
544 E = F[3] + T;
545 A = T + Sigma0(A) + Maj(A, F[1], F[2]);
546 }
547
548 ctx->h[0] += A;
549 ctx->h[1] += F[1];
550 ctx->h[2] += F[2];
551 ctx->h[3] += F[3];
552 ctx->h[4] += E;
553 ctx->h[5] += F[5];
554 ctx->h[6] += F[6];
555 ctx->h[7] += F[7];
556
557 W += SHA_LBLOCK;
558 }
559 }
560
561 # elif defined(OPENSSL_SMALL_FOOTPRINT)
562
563 static void sha512_block_data_order(SHA512_CTX *ctx, const void *in,
564 size_t num)
565 {
566 const SHA_LONG64 *W = in;
567 SHA_LONG64 a, b, c, d, e, f, g, h, s0, s1, T1, T2;
568 SHA_LONG64 X[16];
569 int i;
570
571 while (num--) {
572
573 a = ctx->h[0];
574 b = ctx->h[1];
575 c = ctx->h[2];
576 d = ctx->h[3];
577 e = ctx->h[4];
578 f = ctx->h[5];
579 g = ctx->h[6];
580 h = ctx->h[7];
581
582 for (i = 0; i < 16; i++) {
583 # ifdef B_ENDIAN
584 T1 = X[i] = W[i];
585 # else
586 T1 = X[i] = PULL64(W[i]);
587 # endif
588 T1 += h + Sigma1(e) + Ch(e, f, g) + K512[i];
589 T2 = Sigma0(a) + Maj(a, b, c);
590 h = g;
591 g = f;
592 f = e;
593 e = d + T1;
594 d = c;
595 c = b;
596 b = a;
597 a = T1 + T2;
598 }
599
600 for (; i < 80; i++) {
601 s0 = X[(i + 1) & 0x0f];
602 s0 = sigma0(s0);
603 s1 = X[(i + 14) & 0x0f];
604 s1 = sigma1(s1);
605
606 T1 = X[i & 0xf] += s0 + s1 + X[(i + 9) & 0xf];
607 T1 += h + Sigma1(e) + Ch(e, f, g) + K512[i];
608 T2 = Sigma0(a) + Maj(a, b, c);
609 h = g;
610 g = f;
611 f = e;
612 e = d + T1;
613 d = c;
614 c = b;
615 b = a;
616 a = T1 + T2;
617 }
618
619 ctx->h[0] += a;
620 ctx->h[1] += b;
621 ctx->h[2] += c;
622 ctx->h[3] += d;
623 ctx->h[4] += e;
624 ctx->h[5] += f;
625 ctx->h[6] += g;
626 ctx->h[7] += h;
627
628 W += SHA_LBLOCK;
629 }
630 }
631
632 # else
633 # define ROUND_00_15(i,a,b,c,d,e,f,g,h) do { \
634 T1 += h + Sigma1(e) + Ch(e,f,g) + K512[i]; \
635 h = Sigma0(a) + Maj(a,b,c); \
636 d += T1; h += T1; } while (0)
637
638 # define ROUND_16_80(i,j,a,b,c,d,e,f,g,h,X) do { \
639 s0 = X[(j+1)&0x0f]; s0 = sigma0(s0); \
640 s1 = X[(j+14)&0x0f]; s1 = sigma1(s1); \
641 T1 = X[(j)&0x0f] += s0 + s1 + X[(j+9)&0x0f]; \
642 ROUND_00_15(i+j,a,b,c,d,e,f,g,h); } while (0)
643
644 static void sha512_block_data_order(SHA512_CTX *ctx, const void *in,
645 size_t num)
646 {
647 const SHA_LONG64 *W = in;
648 SHA_LONG64 a, b, c, d, e, f, g, h, s0, s1, T1;
649 SHA_LONG64 X[16];
650 int i;
651
652 while (num--) {
653
654 a = ctx->h[0];
655 b = ctx->h[1];
656 c = ctx->h[2];
657 d = ctx->h[3];
658 e = ctx->h[4];
659 f = ctx->h[5];
660 g = ctx->h[6];
661 h = ctx->h[7];
662
663 # ifdef B_ENDIAN
664 T1 = X[0] = W[0];
665 ROUND_00_15(0, a, b, c, d, e, f, g, h);
666 T1 = X[1] = W[1];
667 ROUND_00_15(1, h, a, b, c, d, e, f, g);
668 T1 = X[2] = W[2];
669 ROUND_00_15(2, g, h, a, b, c, d, e, f);
670 T1 = X[3] = W[3];
671 ROUND_00_15(3, f, g, h, a, b, c, d, e);
672 T1 = X[4] = W[4];
673 ROUND_00_15(4, e, f, g, h, a, b, c, d);
674 T1 = X[5] = W[5];
675 ROUND_00_15(5, d, e, f, g, h, a, b, c);
676 T1 = X[6] = W[6];
677 ROUND_00_15(6, c, d, e, f, g, h, a, b);
678 T1 = X[7] = W[7];
679 ROUND_00_15(7, b, c, d, e, f, g, h, a);
680 T1 = X[8] = W[8];
681 ROUND_00_15(8, a, b, c, d, e, f, g, h);
682 T1 = X[9] = W[9];
683 ROUND_00_15(9, h, a, b, c, d, e, f, g);
684 T1 = X[10] = W[10];
685 ROUND_00_15(10, g, h, a, b, c, d, e, f);
686 T1 = X[11] = W[11];
687 ROUND_00_15(11, f, g, h, a, b, c, d, e);
688 T1 = X[12] = W[12];
689 ROUND_00_15(12, e, f, g, h, a, b, c, d);
690 T1 = X[13] = W[13];
691 ROUND_00_15(13, d, e, f, g, h, a, b, c);
692 T1 = X[14] = W[14];
693 ROUND_00_15(14, c, d, e, f, g, h, a, b);
694 T1 = X[15] = W[15];
695 ROUND_00_15(15, b, c, d, e, f, g, h, a);
696 # else
697 T1 = X[0] = PULL64(W[0]);
698 ROUND_00_15(0, a, b, c, d, e, f, g, h);
699 T1 = X[1] = PULL64(W[1]);
700 ROUND_00_15(1, h, a, b, c, d, e, f, g);
701 T1 = X[2] = PULL64(W[2]);
702 ROUND_00_15(2, g, h, a, b, c, d, e, f);
703 T1 = X[3] = PULL64(W[3]);
704 ROUND_00_15(3, f, g, h, a, b, c, d, e);
705 T1 = X[4] = PULL64(W[4]);
706 ROUND_00_15(4, e, f, g, h, a, b, c, d);
707 T1 = X[5] = PULL64(W[5]);
708 ROUND_00_15(5, d, e, f, g, h, a, b, c);
709 T1 = X[6] = PULL64(W[6]);
710 ROUND_00_15(6, c, d, e, f, g, h, a, b);
711 T1 = X[7] = PULL64(W[7]);
712 ROUND_00_15(7, b, c, d, e, f, g, h, a);
713 T1 = X[8] = PULL64(W[8]);
714 ROUND_00_15(8, a, b, c, d, e, f, g, h);
715 T1 = X[9] = PULL64(W[9]);
716 ROUND_00_15(9, h, a, b, c, d, e, f, g);
717 T1 = X[10] = PULL64(W[10]);
718 ROUND_00_15(10, g, h, a, b, c, d, e, f);
719 T1 = X[11] = PULL64(W[11]);
720 ROUND_00_15(11, f, g, h, a, b, c, d, e);
721 T1 = X[12] = PULL64(W[12]);
722 ROUND_00_15(12, e, f, g, h, a, b, c, d);
723 T1 = X[13] = PULL64(W[13]);
724 ROUND_00_15(13, d, e, f, g, h, a, b, c);
725 T1 = X[14] = PULL64(W[14]);
726 ROUND_00_15(14, c, d, e, f, g, h, a, b);
727 T1 = X[15] = PULL64(W[15]);
728 ROUND_00_15(15, b, c, d, e, f, g, h, a);
729 # endif
730
731 for (i = 16; i < 80; i += 16) {
732 ROUND_16_80(i, 0, a, b, c, d, e, f, g, h, X);
733 ROUND_16_80(i, 1, h, a, b, c, d, e, f, g, X);
734 ROUND_16_80(i, 2, g, h, a, b, c, d, e, f, X);
735 ROUND_16_80(i, 3, f, g, h, a, b, c, d, e, X);
736 ROUND_16_80(i, 4, e, f, g, h, a, b, c, d, X);
737 ROUND_16_80(i, 5, d, e, f, g, h, a, b, c, X);
738 ROUND_16_80(i, 6, c, d, e, f, g, h, a, b, X);
739 ROUND_16_80(i, 7, b, c, d, e, f, g, h, a, X);
740 ROUND_16_80(i, 8, a, b, c, d, e, f, g, h, X);
741 ROUND_16_80(i, 9, h, a, b, c, d, e, f, g, X);
742 ROUND_16_80(i, 10, g, h, a, b, c, d, e, f, X);
743 ROUND_16_80(i, 11, f, g, h, a, b, c, d, e, X);
744 ROUND_16_80(i, 12, e, f, g, h, a, b, c, d, X);
745 ROUND_16_80(i, 13, d, e, f, g, h, a, b, c, X);
746 ROUND_16_80(i, 14, c, d, e, f, g, h, a, b, X);
747 ROUND_16_80(i, 15, b, c, d, e, f, g, h, a, X);
748 }
749
750 ctx->h[0] += a;
751 ctx->h[1] += b;
752 ctx->h[2] += c;
753 ctx->h[3] += d;
754 ctx->h[4] += e;
755 ctx->h[5] += f;
756 ctx->h[6] += g;
757 ctx->h[7] += h;
758
759 W += SHA_LBLOCK;
760 }
761 }
762
763 # endif
764
765 #endif /* SHA512_ASM */