]> git.ipfire.org Git - thirdparty/openssl.git/blame - crypto/sha/sha512.c
Remove /* foo.c */ comments
[thirdparty/openssl.git] / crypto / sha / sha512.c
CommitLineData
c842261b 1/* ====================================================================
63077bd4
AP
2 * Copyright (c) 2004 The OpenSSL Project. All rights reserved
3 * according to the OpenSSL license [found in ../../LICENSE].
c842261b
AP
4 * ====================================================================
5 */
165fca51 6#include <openssl/opensslconf.h>
1d97c843 7/*-
c842261b
AP
8 * IMPLEMENTATION NOTES.
9 *
10 * As you might have noticed 32-bit hash algorithms:
11 *
fcf64ba0 12 * - permit SHA_LONG to be wider than 32-bit
c842261b
AP
13 * - optimized versions implement two transform functions: one operating
14 * on [aligned] data in host byte order and one - on data in input
15 * stream byte order;
16 * - share common byte-order neutral collector and padding function
17 * implementations, ../md32_common.h;
18 *
19 * Neither of the above applies to this SHA-512 implementations. Reasons
20 * [in reverse order] are:
21 *
22 * - it's the only 64-bit hash algorithm for the moment of this writing,
23 * there is no need for common collector/padding implementation [yet];
24 * - by supporting only one transform function [which operates on
25 * *aligned* data in input stream byte order, big-endian in this case]
26 * we minimize burden of maintenance in two ways: a) collector/padding
27 * function is simpler; b) only one transform function to stare at;
28 * - SHA_LONG64 is required to be exactly 64-bit in order to be able to
29 * apply a number of optimizations to mitigate potential performance
30 * penalties caused by previous design decision;
31 *
32 * Caveat lector.
33 *
34 * Implementation relies on the fact that "long long" is 64-bit on
35 * both 32- and 64-bit platforms. If some compiler vendor comes up
36 * with 128-bit long long, adjustment to sha.h would be required.
37 * As this implementation relies on 64-bit integer type, it's totally
38 * inappropriate for platforms which don't support it, most notably
39 * 16-bit platforms.
0f113f3e 40 * <appro@fy.chalmers.se>
c842261b 41 */
474e469b
RS
42#include <stdlib.h>
43#include <string.h>
c842261b 44
474e469b
RS
45#include <openssl/crypto.h>
46#include <openssl/sha.h>
47#include <openssl/opensslv.h>
c842261b 48
b39fc560 49#include "internal/cryptlib.h"
2b247cf8 50
474e469b 51#if defined(__i386) || defined(__i386__) || defined(_M_IX86) || \
b38c0add 52 defined(__x86_64) || defined(_M_AMD64) || defined(_M_X64) || \
1a42839b 53 defined(__s390__) || defined(__s390x__) || \
039081b8 54 defined(__aarch64__) || \
1a42839b 55 defined(SHA512_ASM)
474e469b
RS
56# define SHA512_BLOCK_CAN_MANAGE_UNALIGNED_DATA
57#endif
c842261b 58
0f113f3e
MC
59int SHA384_Init(SHA512_CTX *c)
60{
61 c->h[0] = U64(0xcbbb9d5dc1059ed8);
62 c->h[1] = U64(0x629a292a367cd507);
63 c->h[2] = U64(0x9159015a3070dd17);
64 c->h[3] = U64(0x152fecd8f70e5939);
65 c->h[4] = U64(0x67332667ffc00b31);
66 c->h[5] = U64(0x8eb44a8768581511);
67 c->h[6] = U64(0xdb0c2e0d64f98fa7);
68 c->h[7] = U64(0x47b5481dbefa4fa4);
69
70 c->Nl = 0;
71 c->Nh = 0;
72 c->num = 0;
73 c->md_len = SHA384_DIGEST_LENGTH;
74 return 1;
75}
76
77int SHA512_Init(SHA512_CTX *c)
78{
79 c->h[0] = U64(0x6a09e667f3bcc908);
80 c->h[1] = U64(0xbb67ae8584caa73b);
81 c->h[2] = U64(0x3c6ef372fe94f82b);
82 c->h[3] = U64(0xa54ff53a5f1d36f1);
83 c->h[4] = U64(0x510e527fade682d1);
84 c->h[5] = U64(0x9b05688c2b3e6c1f);
85 c->h[6] = U64(0x1f83d9abfb41bd6b);
86 c->h[7] = U64(0x5be0cd19137e2179);
87
88 c->Nl = 0;
89 c->Nh = 0;
90 c->num = 0;
91 c->md_len = SHA512_DIGEST_LENGTH;
92 return 1;
93}
94
474e469b 95#ifndef SHA512_ASM
859ceeeb 96static
474e469b 97#endif
0f113f3e
MC
98void sha512_block_data_order(SHA512_CTX *ctx, const void *in, size_t num);
99
100int SHA512_Final(unsigned char *md, SHA512_CTX *c)
101{
102 unsigned char *p = (unsigned char *)c->u.p;
103 size_t n = c->num;
104
105 p[n] = 0x80; /* There always is a room for one */
106 n++;
16f8d4eb
RS
107 if (n > (sizeof(c->u) - 16)) {
108 memset(p + n, 0, sizeof(c->u) - n);
109 n = 0;
110 sha512_block_data_order(c, p, 1);
111 }
0f113f3e
MC
112
113 memset(p + n, 0, sizeof(c->u) - 16 - n);
474e469b 114#ifdef B_ENDIAN
0f113f3e
MC
115 c->u.d[SHA_LBLOCK - 2] = c->Nh;
116 c->u.d[SHA_LBLOCK - 1] = c->Nl;
474e469b 117#else
0f113f3e
MC
118 p[sizeof(c->u) - 1] = (unsigned char)(c->Nl);
119 p[sizeof(c->u) - 2] = (unsigned char)(c->Nl >> 8);
120 p[sizeof(c->u) - 3] = (unsigned char)(c->Nl >> 16);
121 p[sizeof(c->u) - 4] = (unsigned char)(c->Nl >> 24);
122 p[sizeof(c->u) - 5] = (unsigned char)(c->Nl >> 32);
123 p[sizeof(c->u) - 6] = (unsigned char)(c->Nl >> 40);
124 p[sizeof(c->u) - 7] = (unsigned char)(c->Nl >> 48);
125 p[sizeof(c->u) - 8] = (unsigned char)(c->Nl >> 56);
126 p[sizeof(c->u) - 9] = (unsigned char)(c->Nh);
127 p[sizeof(c->u) - 10] = (unsigned char)(c->Nh >> 8);
128 p[sizeof(c->u) - 11] = (unsigned char)(c->Nh >> 16);
129 p[sizeof(c->u) - 12] = (unsigned char)(c->Nh >> 24);
130 p[sizeof(c->u) - 13] = (unsigned char)(c->Nh >> 32);
131 p[sizeof(c->u) - 14] = (unsigned char)(c->Nh >> 40);
132 p[sizeof(c->u) - 15] = (unsigned char)(c->Nh >> 48);
133 p[sizeof(c->u) - 16] = (unsigned char)(c->Nh >> 56);
474e469b 134#endif
0f113f3e
MC
135
136 sha512_block_data_order(c, p, 1);
137
138 if (md == 0)
139 return 0;
140
141 switch (c->md_len) {
142 /* Let compiler decide if it's appropriate to unroll... */
143 case SHA384_DIGEST_LENGTH:
144 for (n = 0; n < SHA384_DIGEST_LENGTH / 8; n++) {
145 SHA_LONG64 t = c->h[n];
146
147 *(md++) = (unsigned char)(t >> 56);
148 *(md++) = (unsigned char)(t >> 48);
149 *(md++) = (unsigned char)(t >> 40);
150 *(md++) = (unsigned char)(t >> 32);
151 *(md++) = (unsigned char)(t >> 24);
152 *(md++) = (unsigned char)(t >> 16);
153 *(md++) = (unsigned char)(t >> 8);
154 *(md++) = (unsigned char)(t);
155 }
156 break;
157 case SHA512_DIGEST_LENGTH:
158 for (n = 0; n < SHA512_DIGEST_LENGTH / 8; n++) {
159 SHA_LONG64 t = c->h[n];
160
161 *(md++) = (unsigned char)(t >> 56);
162 *(md++) = (unsigned char)(t >> 48);
163 *(md++) = (unsigned char)(t >> 40);
164 *(md++) = (unsigned char)(t >> 32);
165 *(md++) = (unsigned char)(t >> 24);
166 *(md++) = (unsigned char)(t >> 16);
167 *(md++) = (unsigned char)(t >> 8);
168 *(md++) = (unsigned char)(t);
169 }
170 break;
171 /* ... as well as make sure md_len is not abused. */
172 default:
173 return 0;
174 }
175
176 return 1;
177}
178
179int SHA384_Final(unsigned char *md, SHA512_CTX *c)
180{
181 return SHA512_Final(md, c);
182}
183
184int SHA512_Update(SHA512_CTX *c, const void *_data, size_t len)
185{
186 SHA_LONG64 l;
187 unsigned char *p = c->u.p;
188 const unsigned char *data = (const unsigned char *)_data;
189
190 if (len == 0)
191 return 1;
192
193 l = (c->Nl + (((SHA_LONG64) len) << 3)) & U64(0xffffffffffffffff);
194 if (l < c->Nl)
195 c->Nh++;
196 if (sizeof(len) >= 8)
197 c->Nh += (((SHA_LONG64) len) >> 61);
198 c->Nl = l;
199
200 if (c->num != 0) {
201 size_t n = sizeof(c->u) - c->num;
202
203 if (len < n) {
204 memcpy(p + c->num, data, len), c->num += (unsigned int)len;
205 return 1;
206 } else {
207 memcpy(p + c->num, data, n), c->num = 0;
208 len -= n, data += n;
209 sha512_block_data_order(c, p, 1);
210 }
211 }
212
213 if (len >= sizeof(c->u)) {
474e469b 214#ifndef SHA512_BLOCK_CAN_MANAGE_UNALIGNED_DATA
0f113f3e
MC
215 if ((size_t)data % sizeof(c->u.d[0]) != 0)
216 while (len >= sizeof(c->u))
217 memcpy(p, data, sizeof(c->u)),
218 sha512_block_data_order(c, p, 1),
219 len -= sizeof(c->u), data += sizeof(c->u);
220 else
474e469b 221#endif
0f113f3e
MC
222 sha512_block_data_order(c, data, len / sizeof(c->u)),
223 data += len, len %= sizeof(c->u), data -= len;
224 }
225
226 if (len != 0)
227 memcpy(p, data, len), c->num = (int)len;
228
229 return 1;
230}
231
232int SHA384_Update(SHA512_CTX *c, const void *data, size_t len)
233{
234 return SHA512_Update(c, data, len);
235}
236
237void SHA512_Transform(SHA512_CTX *c, const unsigned char *data)
238{
474e469b 239#ifndef SHA512_BLOCK_CAN_MANAGE_UNALIGNED_DATA
0f113f3e
MC
240 if ((size_t)data % sizeof(c->u.d[0]) != 0)
241 memcpy(c->u.p, data, sizeof(c->u.p)), data = c->u.p;
474e469b 242#endif
0f113f3e
MC
243 sha512_block_data_order(c, data, 1);
244}
c842261b
AP
245
246unsigned char *SHA384(const unsigned char *d, size_t n, unsigned char *md)
0f113f3e
MC
247{
248 SHA512_CTX c;
249 static unsigned char m[SHA384_DIGEST_LENGTH];
250
251 if (md == NULL)
252 md = m;
253 SHA384_Init(&c);
254 SHA512_Update(&c, d, n);
255 SHA512_Final(md, &c);
256 OPENSSL_cleanse(&c, sizeof(c));
257 return (md);
258}
c842261b
AP
259
260unsigned char *SHA512(const unsigned char *d, size_t n, unsigned char *md)
0f113f3e
MC
261{
262 SHA512_CTX c;
263 static unsigned char m[SHA512_DIGEST_LENGTH];
264
265 if (md == NULL)
266 md = m;
267 SHA512_Init(&c);
268 SHA512_Update(&c, d, n);
269 SHA512_Final(md, &c);
270 OPENSSL_cleanse(&c, sizeof(c));
271 return (md);
272}
273
474e469b 274#ifndef SHA512_ASM
c842261b 275static const SHA_LONG64 K512[80] = {
0f113f3e
MC
276 U64(0x428a2f98d728ae22), U64(0x7137449123ef65cd),
277 U64(0xb5c0fbcfec4d3b2f), U64(0xe9b5dba58189dbbc),
278 U64(0x3956c25bf348b538), U64(0x59f111f1b605d019),
279 U64(0x923f82a4af194f9b), U64(0xab1c5ed5da6d8118),
280 U64(0xd807aa98a3030242), U64(0x12835b0145706fbe),
281 U64(0x243185be4ee4b28c), U64(0x550c7dc3d5ffb4e2),
282 U64(0x72be5d74f27b896f), U64(0x80deb1fe3b1696b1),
283 U64(0x9bdc06a725c71235), U64(0xc19bf174cf692694),
284 U64(0xe49b69c19ef14ad2), U64(0xefbe4786384f25e3),
285 U64(0x0fc19dc68b8cd5b5), U64(0x240ca1cc77ac9c65),
286 U64(0x2de92c6f592b0275), U64(0x4a7484aa6ea6e483),
287 U64(0x5cb0a9dcbd41fbd4), U64(0x76f988da831153b5),
288 U64(0x983e5152ee66dfab), U64(0xa831c66d2db43210),
289 U64(0xb00327c898fb213f), U64(0xbf597fc7beef0ee4),
290 U64(0xc6e00bf33da88fc2), U64(0xd5a79147930aa725),
291 U64(0x06ca6351e003826f), U64(0x142929670a0e6e70),
292 U64(0x27b70a8546d22ffc), U64(0x2e1b21385c26c926),
293 U64(0x4d2c6dfc5ac42aed), U64(0x53380d139d95b3df),
294 U64(0x650a73548baf63de), U64(0x766a0abb3c77b2a8),
295 U64(0x81c2c92e47edaee6), U64(0x92722c851482353b),
296 U64(0xa2bfe8a14cf10364), U64(0xa81a664bbc423001),
297 U64(0xc24b8b70d0f89791), U64(0xc76c51a30654be30),
298 U64(0xd192e819d6ef5218), U64(0xd69906245565a910),
299 U64(0xf40e35855771202a), U64(0x106aa07032bbd1b8),
300 U64(0x19a4c116b8d2d0c8), U64(0x1e376c085141ab53),
301 U64(0x2748774cdf8eeb99), U64(0x34b0bcb5e19b48a8),
302 U64(0x391c0cb3c5c95a63), U64(0x4ed8aa4ae3418acb),
303 U64(0x5b9cca4f7763e373), U64(0x682e6ff3d6b2b8a3),
304 U64(0x748f82ee5defb2fc), U64(0x78a5636f43172f60),
305 U64(0x84c87814a1f0ab72), U64(0x8cc702081a6439ec),
306 U64(0x90befffa23631e28), U64(0xa4506cebde82bde9),
307 U64(0xbef9a3f7b2c67915), U64(0xc67178f2e372532b),
308 U64(0xca273eceea26619c), U64(0xd186b8c721c0c207),
309 U64(0xeada7dd6cde0eb1e), U64(0xf57d4f7fee6ed178),
310 U64(0x06f067aa72176fba), U64(0x0a637dc5a2c898a6),
311 U64(0x113f9804bef90dae), U64(0x1b710b35131c471b),
312 U64(0x28db77f523047d84), U64(0x32caab7b40c72493),
313 U64(0x3c9ebe0a15c9bebc), U64(0x431d67c49c100d4c),
314 U64(0x4cc5d4becb3e42b6), U64(0x597f299cfc657e2a),
315 U64(0x5fcb6fab3ad6faec), U64(0x6c44198c4a475817)
316};
317
474e469b
RS
318# ifndef PEDANTIC
319# if defined(__GNUC__) && __GNUC__>=2 && !defined(OPENSSL_NO_ASM) && !defined(OPENSSL_NO_INLINE_ASM)
320# if defined(__x86_64) || defined(__x86_64__)
321# define ROTR(a,n) ({ SHA_LONG64 ret; \
0f113f3e
MC
322 asm ("rorq %1,%0" \
323 : "=r"(ret) \
324 : "J"(n),"0"(a) \
325 : "cc"); ret; })
474e469b
RS
326# if !defined(B_ENDIAN)
327# define PULL64(x) ({ SHA_LONG64 ret=*((const SHA_LONG64 *)(&(x))); \
0f113f3e
MC
328 asm ("bswapq %0" \
329 : "=r"(ret) \
330 : "0"(ret)); ret; })
474e469b
RS
331# endif
332# elif (defined(__i386) || defined(__i386__)) && !defined(B_ENDIAN)
333# if defined(I386_ONLY)
334# define PULL64(x) ({ const unsigned int *p=(const unsigned int *)(&(x));\
0f113f3e
MC
335 unsigned int hi=p[0],lo=p[1]; \
336 asm("xchgb %%ah,%%al;xchgb %%dh,%%dl;"\
337 "roll $16,%%eax; roll $16,%%edx; "\
338 "xchgb %%ah,%%al;xchgb %%dh,%%dl;" \
339 : "=a"(lo),"=d"(hi) \
340 : "0"(lo),"1"(hi) : "cc"); \
341 ((SHA_LONG64)hi)<<32|lo; })
474e469b
RS
342# else
343# define PULL64(x) ({ const unsigned int *p=(const unsigned int *)(&(x));\
0f113f3e
MC
344 unsigned int hi=p[0],lo=p[1]; \
345 asm ("bswapl %0; bswapl %1;" \
346 : "=r"(lo),"=r"(hi) \
347 : "0"(lo),"1"(hi)); \
348 ((SHA_LONG64)hi)<<32|lo; })
474e469b
RS
349# endif
350# elif (defined(_ARCH_PPC) && defined(__64BIT__)) || defined(_ARCH_PPC64)
351# define ROTR(a,n) ({ SHA_LONG64 ret; \
0f113f3e
MC
352 asm ("rotrdi %0,%1,%2" \
353 : "=r"(ret) \
354 : "r"(a),"K"(n)); ret; })
474e469b
RS
355# elif defined(__aarch64__)
356# define ROTR(a,n) ({ SHA_LONG64 ret; \
0f113f3e
MC
357 asm ("ror %0,%1,%2" \
358 : "=r"(ret) \
359 : "r"(a),"I"(n)); ret; })
474e469b 360# if defined(__BYTE_ORDER__) && defined(__ORDER_LITTLE_ENDIAN__) && \
0f113f3e 361 __BYTE_ORDER__==__ORDER_LITTLE_ENDIAN__
474e469b 362# define PULL64(x) ({ SHA_LONG64 ret; \
0f113f3e
MC
363 asm ("rev %0,%1" \
364 : "=r"(ret) \
365 : "r"(*((const SHA_LONG64 *)(&(x))))); ret; })
0f113f3e 366# endif
474e469b
RS
367# endif
368# elif defined(_MSC_VER)
369# if defined(_WIN64) /* applies to both IA-64 and AMD64 */
370# pragma intrinsic(_rotr64)
371# define ROTR(a,n) _rotr64((a),n)
372# endif
373# if defined(_M_IX86) && !defined(OPENSSL_NO_ASM) && !defined(OPENSSL_NO_INLINE_ASM)
374# if defined(I386_ONLY)
0f113f3e
MC
375static SHA_LONG64 __fastcall __pull64be(const void *x)
376{
377 _asm mov edx,[ecx + 0]
378 _asm mov eax,[ecx + 4]
379_asm xchg dh, dl
380 _asm xchg ah, al
381 _asm rol edx, 16 _asm rol eax, 16 _asm xchg dh, dl _asm xchg ah, al}
474e469b 382# else
0f113f3e
MC
383static SHA_LONG64 __fastcall __pull64be(const void *x)
384{
385 _asm mov edx,[ecx + 0]
386 _asm mov eax,[ecx + 4]
387_asm bswap edx _asm bswap eax}
474e469b
RS
388# endif
389# define PULL64(x) __pull64be(&(x))
390# if _MSC_VER<=1200
391# pragma inline_depth(0)
0f113f3e 392# endif
039081b8 393# endif
63077bd4 394# endif
474e469b
RS
395# endif
396# ifndef PULL64
397# define B(x,j) (((SHA_LONG64)(*(((const unsigned char *)(&x))+j)))<<((7-j)*8))
398# define PULL64(x) (B(x,0)|B(x,1)|B(x,2)|B(x,3)|B(x,4)|B(x,5)|B(x,6)|B(x,7))
399# endif
400# ifndef ROTR
401# define ROTR(x,s) (((x)>>s) | (x)<<(64-s))
402# endif
403# define Sigma0(x) (ROTR((x),28) ^ ROTR((x),34) ^ ROTR((x),39))
404# define Sigma1(x) (ROTR((x),14) ^ ROTR((x),18) ^ ROTR((x),41))
405# define sigma0(x) (ROTR((x),1) ^ ROTR((x),8) ^ ((x)>>7))
406# define sigma1(x) (ROTR((x),19) ^ ROTR((x),61) ^ ((x)>>6))
407# define Ch(x,y,z) (((x) & (y)) ^ ((~(x)) & (z)))
408# define Maj(x,y,z) (((x) & (y)) ^ ((x) & (z)) ^ ((y) & (z)))
409# if defined(__i386) || defined(__i386__) || defined(_M_IX86)
0f113f3e
MC
410/*
411 * This code should give better results on 32-bit CPU with less than
412 * ~24 registers, both size and performance wise...
413 */ static void sha512_block_data_order(SHA512_CTX *ctx, const void *in,
414 size_t num)
415{
416 const SHA_LONG64 *W = in;
417 SHA_LONG64 A, E, T;
418 SHA_LONG64 X[9 + 80], *F;
419 int i;
420
421 while (num--) {
422
423 F = X + 80;
424 A = ctx->h[0];
425 F[1] = ctx->h[1];
426 F[2] = ctx->h[2];
427 F[3] = ctx->h[3];
428 E = ctx->h[4];
429 F[5] = ctx->h[5];
430 F[6] = ctx->h[6];
431 F[7] = ctx->h[7];
432
433 for (i = 0; i < 16; i++, F--) {
474e469b 434# ifdef B_ENDIAN
0f113f3e 435 T = W[i];
474e469b 436# else
0f113f3e 437 T = PULL64(W[i]);
474e469b 438# endif
0f113f3e
MC
439 F[0] = A;
440 F[4] = E;
441 F[8] = T;
442 T += F[7] + Sigma1(E) + Ch(E, F[5], F[6]) + K512[i];
443 E = F[3] + T;
444 A = T + Sigma0(A) + Maj(A, F[1], F[2]);
445 }
446
447 for (; i < 80; i++, F--) {
448 T = sigma0(F[8 + 16 - 1]);
449 T += sigma1(F[8 + 16 - 14]);
450 T += F[8 + 16] + F[8 + 16 - 9];
451
452 F[0] = A;
453 F[4] = E;
454 F[8] = T;
455 T += F[7] + Sigma1(E) + Ch(E, F[5], F[6]) + K512[i];
456 E = F[3] + T;
457 A = T + Sigma0(A) + Maj(A, F[1], F[2]);
458 }
459
460 ctx->h[0] += A;
461 ctx->h[1] += F[1];
462 ctx->h[2] += F[2];
463 ctx->h[3] += F[3];
464 ctx->h[4] += E;
465 ctx->h[5] += F[5];
466 ctx->h[6] += F[6];
467 ctx->h[7] += F[7];
468
469 W += SHA_LBLOCK;
ce6aa1e4 470 }
0f113f3e
MC
471}
472
474e469b 473# elif defined(OPENSSL_SMALL_FOOTPRINT)
0f113f3e
MC
474static void sha512_block_data_order(SHA512_CTX *ctx, const void *in,
475 size_t num)
476{
477 const SHA_LONG64 *W = in;
478 SHA_LONG64 a, b, c, d, e, f, g, h, s0, s1, T1, T2;
479 SHA_LONG64 X[16];
480 int i;
481
482 while (num--) {
483
484 a = ctx->h[0];
485 b = ctx->h[1];
486 c = ctx->h[2];
487 d = ctx->h[3];
488 e = ctx->h[4];
489 f = ctx->h[5];
490 g = ctx->h[6];
491 h = ctx->h[7];
492
493 for (i = 0; i < 16; i++) {
474e469b 494# ifdef B_ENDIAN
0f113f3e 495 T1 = X[i] = W[i];
474e469b 496# else
0f113f3e 497 T1 = X[i] = PULL64(W[i]);
474e469b 498# endif
0f113f3e
MC
499 T1 += h + Sigma1(e) + Ch(e, f, g) + K512[i];
500 T2 = Sigma0(a) + Maj(a, b, c);
501 h = g;
502 g = f;
503 f = e;
504 e = d + T1;
505 d = c;
506 c = b;
507 b = a;
508 a = T1 + T2;
509 }
510
511 for (; i < 80; i++) {
512 s0 = X[(i + 1) & 0x0f];
513 s0 = sigma0(s0);
514 s1 = X[(i + 14) & 0x0f];
515 s1 = sigma1(s1);
516
517 T1 = X[i & 0xf] += s0 + s1 + X[(i + 9) & 0xf];
518 T1 += h + Sigma1(e) + Ch(e, f, g) + K512[i];
519 T2 = Sigma0(a) + Maj(a, b, c);
520 h = g;
521 g = f;
522 f = e;
523 e = d + T1;
524 d = c;
525 c = b;
526 b = a;
527 a = T1 + T2;
528 }
529
530 ctx->h[0] += a;
531 ctx->h[1] += b;
532 ctx->h[2] += c;
533 ctx->h[3] += d;
534 ctx->h[4] += e;
535 ctx->h[5] += f;
536 ctx->h[6] += g;
537 ctx->h[7] += h;
538
539 W += SHA_LBLOCK;
540 }
541}
542
474e469b
RS
543# else
544# define ROUND_00_15(i,a,b,c,d,e,f,g,h) do { \
0f113f3e
MC
545 T1 += h + Sigma1(e) + Ch(e,f,g) + K512[i]; \
546 h = Sigma0(a) + Maj(a,b,c); \
547 d += T1; h += T1; } while (0)
474e469b 548# define ROUND_16_80(i,j,a,b,c,d,e,f,g,h,X) do { \
0f113f3e
MC
549 s0 = X[(j+1)&0x0f]; s0 = sigma0(s0); \
550 s1 = X[(j+14)&0x0f]; s1 = sigma1(s1); \
551 T1 = X[(j)&0x0f] += s0 + s1 + X[(j+9)&0x0f]; \
552 ROUND_00_15(i+j,a,b,c,d,e,f,g,h); } while (0)
553static void sha512_block_data_order(SHA512_CTX *ctx, const void *in,
554 size_t num)
555{
556 const SHA_LONG64 *W = in;
557 SHA_LONG64 a, b, c, d, e, f, g, h, s0, s1, T1;
558 SHA_LONG64 X[16];
559 int i;
560
561 while (num--) {
562
563 a = ctx->h[0];
564 b = ctx->h[1];
565 c = ctx->h[2];
566 d = ctx->h[3];
567 e = ctx->h[4];
568 f = ctx->h[5];
569 g = ctx->h[6];
570 h = ctx->h[7];
571
474e469b 572# ifdef B_ENDIAN
0f113f3e
MC
573 T1 = X[0] = W[0];
574 ROUND_00_15(0, a, b, c, d, e, f, g, h);
575 T1 = X[1] = W[1];
576 ROUND_00_15(1, h, a, b, c, d, e, f, g);
577 T1 = X[2] = W[2];
578 ROUND_00_15(2, g, h, a, b, c, d, e, f);
579 T1 = X[3] = W[3];
580 ROUND_00_15(3, f, g, h, a, b, c, d, e);
581 T1 = X[4] = W[4];
582 ROUND_00_15(4, e, f, g, h, a, b, c, d);
583 T1 = X[5] = W[5];
584 ROUND_00_15(5, d, e, f, g, h, a, b, c);
585 T1 = X[6] = W[6];
586 ROUND_00_15(6, c, d, e, f, g, h, a, b);
587 T1 = X[7] = W[7];
588 ROUND_00_15(7, b, c, d, e, f, g, h, a);
589 T1 = X[8] = W[8];
590 ROUND_00_15(8, a, b, c, d, e, f, g, h);
591 T1 = X[9] = W[9];
592 ROUND_00_15(9, h, a, b, c, d, e, f, g);
593 T1 = X[10] = W[10];
594 ROUND_00_15(10, g, h, a, b, c, d, e, f);
595 T1 = X[11] = W[11];
596 ROUND_00_15(11, f, g, h, a, b, c, d, e);
597 T1 = X[12] = W[12];
598 ROUND_00_15(12, e, f, g, h, a, b, c, d);
599 T1 = X[13] = W[13];
600 ROUND_00_15(13, d, e, f, g, h, a, b, c);
601 T1 = X[14] = W[14];
602 ROUND_00_15(14, c, d, e, f, g, h, a, b);
603 T1 = X[15] = W[15];
604 ROUND_00_15(15, b, c, d, e, f, g, h, a);
474e469b 605# else
0f113f3e
MC
606 T1 = X[0] = PULL64(W[0]);
607 ROUND_00_15(0, a, b, c, d, e, f, g, h);
608 T1 = X[1] = PULL64(W[1]);
609 ROUND_00_15(1, h, a, b, c, d, e, f, g);
610 T1 = X[2] = PULL64(W[2]);
611 ROUND_00_15(2, g, h, a, b, c, d, e, f);
612 T1 = X[3] = PULL64(W[3]);
613 ROUND_00_15(3, f, g, h, a, b, c, d, e);
614 T1 = X[4] = PULL64(W[4]);
615 ROUND_00_15(4, e, f, g, h, a, b, c, d);
616 T1 = X[5] = PULL64(W[5]);
617 ROUND_00_15(5, d, e, f, g, h, a, b, c);
618 T1 = X[6] = PULL64(W[6]);
619 ROUND_00_15(6, c, d, e, f, g, h, a, b);
620 T1 = X[7] = PULL64(W[7]);
621 ROUND_00_15(7, b, c, d, e, f, g, h, a);
622 T1 = X[8] = PULL64(W[8]);
623 ROUND_00_15(8, a, b, c, d, e, f, g, h);
624 T1 = X[9] = PULL64(W[9]);
625 ROUND_00_15(9, h, a, b, c, d, e, f, g);
626 T1 = X[10] = PULL64(W[10]);
627 ROUND_00_15(10, g, h, a, b, c, d, e, f);
628 T1 = X[11] = PULL64(W[11]);
629 ROUND_00_15(11, f, g, h, a, b, c, d, e);
630 T1 = X[12] = PULL64(W[12]);
631 ROUND_00_15(12, e, f, g, h, a, b, c, d);
632 T1 = X[13] = PULL64(W[13]);
633 ROUND_00_15(13, d, e, f, g, h, a, b, c);
634 T1 = X[14] = PULL64(W[14]);
635 ROUND_00_15(14, c, d, e, f, g, h, a, b);
636 T1 = X[15] = PULL64(W[15]);
637 ROUND_00_15(15, b, c, d, e, f, g, h, a);
474e469b 638# endif
c842261b 639
0f113f3e
MC
640 for (i = 16; i < 80; i += 16) {
641 ROUND_16_80(i, 0, a, b, c, d, e, f, g, h, X);
642 ROUND_16_80(i, 1, h, a, b, c, d, e, f, g, X);
643 ROUND_16_80(i, 2, g, h, a, b, c, d, e, f, X);
644 ROUND_16_80(i, 3, f, g, h, a, b, c, d, e, X);
645 ROUND_16_80(i, 4, e, f, g, h, a, b, c, d, X);
646 ROUND_16_80(i, 5, d, e, f, g, h, a, b, c, X);
647 ROUND_16_80(i, 6, c, d, e, f, g, h, a, b, X);
648 ROUND_16_80(i, 7, b, c, d, e, f, g, h, a, X);
649 ROUND_16_80(i, 8, a, b, c, d, e, f, g, h, X);
650 ROUND_16_80(i, 9, h, a, b, c, d, e, f, g, X);
651 ROUND_16_80(i, 10, g, h, a, b, c, d, e, f, X);
652 ROUND_16_80(i, 11, f, g, h, a, b, c, d, e, X);
653 ROUND_16_80(i, 12, e, f, g, h, a, b, c, d, X);
654 ROUND_16_80(i, 13, d, e, f, g, h, a, b, c, X);
655 ROUND_16_80(i, 14, c, d, e, f, g, h, a, b, X);
656 ROUND_16_80(i, 15, b, c, d, e, f, g, h, a, X);
657 }
658
659 ctx->h[0] += a;
660 ctx->h[1] += b;
661 ctx->h[2] += c;
662 ctx->h[3] += d;
663 ctx->h[4] += e;
664 ctx->h[5] += f;
665 ctx->h[6] += g;
666 ctx->h[7] += h;
667
668 W += SHA_LBLOCK;
669 }
670}
ce6aa1e4 671
0f113f3e 672# endif
a5804a75 673
474e469b 674#endif /* SHA512_ASM */